Merge branch 'timers/urgent' into timers/core, to pick up fix
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
		
						commit
						83bae01182
					
				
							
								
								
									
										4
									
								
								.mailmap
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								.mailmap
									
									
									
									
									
								
							@ -108,6 +108,10 @@ Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com>
 | 
			
		||||
Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>
 | 
			
		||||
Javi Merino <javi.merino@kernel.org> <javi.merino@arm.com>
 | 
			
		||||
<javier@osg.samsung.com> <javier.martinez@collabora.co.uk>
 | 
			
		||||
Jayachandran C <c.jayachandran@gmail.com> <jayachandranc@netlogicmicro.com>
 | 
			
		||||
Jayachandran C <c.jayachandran@gmail.com> <jchandra@broadcom.com>
 | 
			
		||||
Jayachandran C <c.jayachandran@gmail.com> <jchandra@digeo.com>
 | 
			
		||||
Jayachandran C <c.jayachandran@gmail.com> <jnair@caviumnetworks.com>
 | 
			
		||||
Jean Tourrilhes <jt@hpl.hp.com>
 | 
			
		||||
<jean-philippe@linaro.org> <jean-philippe.brucker@arm.com>
 | 
			
		||||
Jeff Garzik <jgarzik@pretzel.yyz.us>
 | 
			
		||||
 | 
			
		||||
@ -486,6 +486,8 @@ What:		/sys/devices/system/cpu/vulnerabilities
 | 
			
		||||
		/sys/devices/system/cpu/vulnerabilities/spec_store_bypass
 | 
			
		||||
		/sys/devices/system/cpu/vulnerabilities/l1tf
 | 
			
		||||
		/sys/devices/system/cpu/vulnerabilities/mds
 | 
			
		||||
		/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
 | 
			
		||||
		/sys/devices/system/cpu/vulnerabilities/itlb_multihit
 | 
			
		||||
Date:		January 2018
 | 
			
		||||
Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
 | 
			
		||||
Description:	Information about CPU vulnerabilities
 | 
			
		||||
 | 
			
		||||
@ -12,3 +12,5 @@ are configurable at compile, boot or run time.
 | 
			
		||||
   spectre
 | 
			
		||||
   l1tf
 | 
			
		||||
   mds
 | 
			
		||||
   tsx_async_abort
 | 
			
		||||
   multihit.rst
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										163
									
								
								Documentation/admin-guide/hw-vuln/multihit.rst
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										163
									
								
								Documentation/admin-guide/hw-vuln/multihit.rst
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,163 @@
 | 
			
		||||
iTLB multihit
 | 
			
		||||
=============
 | 
			
		||||
 | 
			
		||||
iTLB multihit is an erratum where some processors may incur a machine check
 | 
			
		||||
error, possibly resulting in an unrecoverable CPU lockup, when an
 | 
			
		||||
instruction fetch hits multiple entries in the instruction TLB. This can
 | 
			
		||||
occur when the page size is changed along with either the physical address
 | 
			
		||||
or cache type. A malicious guest running on a virtualized system can
 | 
			
		||||
exploit this erratum to perform a denial of service attack.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Affected processors
 | 
			
		||||
-------------------
 | 
			
		||||
 | 
			
		||||
Variations of this erratum are present on most Intel Core and Xeon processor
 | 
			
		||||
models. The erratum is not present on:
 | 
			
		||||
 | 
			
		||||
   - non-Intel processors
 | 
			
		||||
 | 
			
		||||
   - Some Atoms (Airmont, Bonnell, Goldmont, GoldmontPlus, Saltwell, Silvermont)
 | 
			
		||||
 | 
			
		||||
   - Intel processors that have the PSCHANGE_MC_NO bit set in the
 | 
			
		||||
     IA32_ARCH_CAPABILITIES MSR.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Related CVEs
 | 
			
		||||
------------
 | 
			
		||||
 | 
			
		||||
The following CVE entry is related to this issue:
 | 
			
		||||
 | 
			
		||||
   ==============  =================================================
 | 
			
		||||
   CVE-2018-12207  Machine Check Error Avoidance on Page Size Change
 | 
			
		||||
   ==============  =================================================
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Problem
 | 
			
		||||
-------
 | 
			
		||||
 | 
			
		||||
Privileged software, including OS and virtual machine managers (VMM), are in
 | 
			
		||||
charge of memory management. A key component in memory management is the control
 | 
			
		||||
of the page tables. Modern processors use virtual memory, a technique that creates
 | 
			
		||||
the illusion of a very large memory for processors. This virtual space is split
 | 
			
		||||
into pages of a given size. Page tables translate virtual addresses to physical
 | 
			
		||||
addresses.
 | 
			
		||||
 | 
			
		||||
To reduce latency when performing a virtual to physical address translation,
 | 
			
		||||
processors include a structure, called TLB, that caches recent translations.
 | 
			
		||||
There are separate TLBs for instruction (iTLB) and data (dTLB).
 | 
			
		||||
 | 
			
		||||
Under this errata, instructions are fetched from a linear address translated
 | 
			
		||||
using a 4 KB translation cached in the iTLB. Privileged software modifies the
 | 
			
		||||
paging structure so that the same linear address using large page size (2 MB, 4
 | 
			
		||||
MB, 1 GB) with a different physical address or memory type.  After the page
 | 
			
		||||
structure modification but before the software invalidates any iTLB entries for
 | 
			
		||||
the linear address, a code fetch that happens on the same linear address may
 | 
			
		||||
cause a machine-check error which can result in a system hang or shutdown.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Attack scenarios
 | 
			
		||||
----------------
 | 
			
		||||
 | 
			
		||||
Attacks against the iTLB multihit erratum can be mounted from malicious
 | 
			
		||||
guests in a virtualized system.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
iTLB multihit system information
 | 
			
		||||
--------------------------------
 | 
			
		||||
 | 
			
		||||
The Linux kernel provides a sysfs interface to enumerate the current iTLB
 | 
			
		||||
multihit status of the system:whether the system is vulnerable and which
 | 
			
		||||
mitigations are active. The relevant sysfs file is:
 | 
			
		||||
 | 
			
		||||
/sys/devices/system/cpu/vulnerabilities/itlb_multihit
 | 
			
		||||
 | 
			
		||||
The possible values in this file are:
 | 
			
		||||
 | 
			
		||||
.. list-table::
 | 
			
		||||
 | 
			
		||||
     * - Not affected
 | 
			
		||||
       - The processor is not vulnerable.
 | 
			
		||||
     * - KVM: Mitigation: Split huge pages
 | 
			
		||||
       - Software changes mitigate this issue.
 | 
			
		||||
     * - KVM: Vulnerable
 | 
			
		||||
       - The processor is vulnerable, but no mitigation enabled
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Enumeration of the erratum
 | 
			
		||||
--------------------------------
 | 
			
		||||
 | 
			
		||||
A new bit has been allocated in the IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) msr
 | 
			
		||||
and will be set on CPU's which are mitigated against this issue.
 | 
			
		||||
 | 
			
		||||
   =======================================   ===========   ===============================
 | 
			
		||||
   IA32_ARCH_CAPABILITIES MSR                Not present   Possibly vulnerable,check model
 | 
			
		||||
   IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO]    '0'           Likely vulnerable,check model
 | 
			
		||||
   IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO]    '1'           Not vulnerable
 | 
			
		||||
   =======================================   ===========   ===============================
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Mitigation mechanism
 | 
			
		||||
-------------------------
 | 
			
		||||
 | 
			
		||||
This erratum can be mitigated by restricting the use of large page sizes to
 | 
			
		||||
non-executable pages.  This forces all iTLB entries to be 4K, and removes
 | 
			
		||||
the possibility of multiple hits.
 | 
			
		||||
 | 
			
		||||
In order to mitigate the vulnerability, KVM initially marks all huge pages
 | 
			
		||||
as non-executable. If the guest attempts to execute in one of those pages,
 | 
			
		||||
the page is broken down into 4K pages, which are then marked executable.
 | 
			
		||||
 | 
			
		||||
If EPT is disabled or not available on the host, KVM is in control of TLB
 | 
			
		||||
flushes and the problematic situation cannot happen.  However, the shadow
 | 
			
		||||
EPT paging mechanism used by nested virtualization is vulnerable, because
 | 
			
		||||
the nested guest can trigger multiple iTLB hits by modifying its own
 | 
			
		||||
(non-nested) page tables.  For simplicity, KVM will make large pages
 | 
			
		||||
non-executable in all shadow paging modes.
 | 
			
		||||
 | 
			
		||||
Mitigation control on the kernel command line and KVM - module parameter
 | 
			
		||||
------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
The KVM hypervisor mitigation mechanism for marking huge pages as
 | 
			
		||||
non-executable can be controlled with a module parameter "nx_huge_pages=".
 | 
			
		||||
The kernel command line allows to control the iTLB multihit mitigations at
 | 
			
		||||
boot time with the option "kvm.nx_huge_pages=".
 | 
			
		||||
 | 
			
		||||
The valid arguments for these options are:
 | 
			
		||||
 | 
			
		||||
  ==========  ================================================================
 | 
			
		||||
  force       Mitigation is enabled. In this case, the mitigation implements
 | 
			
		||||
              non-executable huge pages in Linux kernel KVM module. All huge
 | 
			
		||||
              pages in the EPT are marked as non-executable.
 | 
			
		||||
              If a guest attempts to execute in one of those pages, the page is
 | 
			
		||||
              broken down into 4K pages, which are then marked executable.
 | 
			
		||||
 | 
			
		||||
  off	      Mitigation is disabled.
 | 
			
		||||
 | 
			
		||||
  auto        Enable mitigation only if the platform is affected and the kernel
 | 
			
		||||
              was not booted with the "mitigations=off" command line parameter.
 | 
			
		||||
	      This is the default option.
 | 
			
		||||
  ==========  ================================================================
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Mitigation selection guide
 | 
			
		||||
--------------------------
 | 
			
		||||
 | 
			
		||||
1. No virtualization in use
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
   The system is protected by the kernel unconditionally and no further
 | 
			
		||||
   action is required.
 | 
			
		||||
 | 
			
		||||
2. Virtualization with trusted guests
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
   If the guest comes from a trusted source, you may assume that the guest will
 | 
			
		||||
   not attempt to maliciously exploit these errata and no further action is
 | 
			
		||||
   required.
 | 
			
		||||
 | 
			
		||||
3. Virtualization with untrusted guests
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
   If the guest comes from an untrusted source, the guest host kernel will need
 | 
			
		||||
   to apply iTLB multihit mitigation via the kernel command line or kvm
 | 
			
		||||
   module parameter.
 | 
			
		||||
							
								
								
									
										276
									
								
								Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										276
									
								
								Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,276 @@
 | 
			
		||||
.. SPDX-License-Identifier: GPL-2.0
 | 
			
		||||
 | 
			
		||||
TAA - TSX Asynchronous Abort
 | 
			
		||||
======================================
 | 
			
		||||
 | 
			
		||||
TAA is a hardware vulnerability that allows unprivileged speculative access to
 | 
			
		||||
data which is available in various CPU internal buffers by using asynchronous
 | 
			
		||||
aborts within an Intel TSX transactional region.
 | 
			
		||||
 | 
			
		||||
Affected processors
 | 
			
		||||
-------------------
 | 
			
		||||
 | 
			
		||||
This vulnerability only affects Intel processors that support Intel
 | 
			
		||||
Transactional Synchronization Extensions (TSX) when the TAA_NO bit (bit 8)
 | 
			
		||||
is 0 in the IA32_ARCH_CAPABILITIES MSR.  On processors where the MDS_NO bit
 | 
			
		||||
(bit 5) is 0 in the IA32_ARCH_CAPABILITIES MSR, the existing MDS mitigations
 | 
			
		||||
also mitigate against TAA.
 | 
			
		||||
 | 
			
		||||
Whether a processor is affected or not can be read out from the TAA
 | 
			
		||||
vulnerability file in sysfs. See :ref:`tsx_async_abort_sys_info`.
 | 
			
		||||
 | 
			
		||||
Related CVEs
 | 
			
		||||
------------
 | 
			
		||||
 | 
			
		||||
The following CVE entry is related to this TAA issue:
 | 
			
		||||
 | 
			
		||||
   ==============  =====  ===================================================
 | 
			
		||||
   CVE-2019-11135  TAA    TSX Asynchronous Abort (TAA) condition on some
 | 
			
		||||
                          microprocessors utilizing speculative execution may
 | 
			
		||||
                          allow an authenticated user to potentially enable
 | 
			
		||||
                          information disclosure via a side channel with
 | 
			
		||||
                          local access.
 | 
			
		||||
   ==============  =====  ===================================================
 | 
			
		||||
 | 
			
		||||
Problem
 | 
			
		||||
-------
 | 
			
		||||
 | 
			
		||||
When performing store, load or L1 refill operations, processors write
 | 
			
		||||
data into temporary microarchitectural structures (buffers). The data in
 | 
			
		||||
those buffers can be forwarded to load operations as an optimization.
 | 
			
		||||
 | 
			
		||||
Intel TSX is an extension to the x86 instruction set architecture that adds
 | 
			
		||||
hardware transactional memory support to improve performance of multi-threaded
 | 
			
		||||
software. TSX lets the processor expose and exploit concurrency hidden in an
 | 
			
		||||
application due to dynamically avoiding unnecessary synchronization.
 | 
			
		||||
 | 
			
		||||
TSX supports atomic memory transactions that are either committed (success) or
 | 
			
		||||
aborted. During an abort, operations that happened within the transactional region
 | 
			
		||||
are rolled back. An asynchronous abort takes place, among other options, when a
 | 
			
		||||
different thread accesses a cache line that is also used within the transactional
 | 
			
		||||
region when that access might lead to a data race.
 | 
			
		||||
 | 
			
		||||
Immediately after an uncompleted asynchronous abort, certain speculatively
 | 
			
		||||
executed loads may read data from those internal buffers and pass it to dependent
 | 
			
		||||
operations. This can be then used to infer the value via a cache side channel
 | 
			
		||||
attack.
 | 
			
		||||
 | 
			
		||||
Because the buffers are potentially shared between Hyper-Threads cross
 | 
			
		||||
Hyper-Thread attacks are possible.
 | 
			
		||||
 | 
			
		||||
The victim of a malicious actor does not need to make use of TSX. Only the
 | 
			
		||||
attacker needs to begin a TSX transaction and raise an asynchronous abort
 | 
			
		||||
which in turn potenitally leaks data stored in the buffers.
 | 
			
		||||
 | 
			
		||||
More detailed technical information is available in the TAA specific x86
 | 
			
		||||
architecture section: :ref:`Documentation/x86/tsx_async_abort.rst <tsx_async_abort>`.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Attack scenarios
 | 
			
		||||
----------------
 | 
			
		||||
 | 
			
		||||
Attacks against the TAA vulnerability can be implemented from unprivileged
 | 
			
		||||
applications running on hosts or guests.
 | 
			
		||||
 | 
			
		||||
As for MDS, the attacker has no control over the memory addresses that can
 | 
			
		||||
be leaked. Only the victim is responsible for bringing data to the CPU. As
 | 
			
		||||
a result, the malicious actor has to sample as much data as possible and
 | 
			
		||||
then postprocess it to try to infer any useful information from it.
 | 
			
		||||
 | 
			
		||||
A potential attacker only has read access to the data. Also, there is no direct
 | 
			
		||||
privilege escalation by using this technique.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
.. _tsx_async_abort_sys_info:
 | 
			
		||||
 | 
			
		||||
TAA system information
 | 
			
		||||
-----------------------
 | 
			
		||||
 | 
			
		||||
The Linux kernel provides a sysfs interface to enumerate the current TAA status
 | 
			
		||||
of mitigated systems. The relevant sysfs file is:
 | 
			
		||||
 | 
			
		||||
/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
 | 
			
		||||
 | 
			
		||||
The possible values in this file are:
 | 
			
		||||
 | 
			
		||||
.. list-table::
 | 
			
		||||
 | 
			
		||||
   * - 'Vulnerable'
 | 
			
		||||
     - The CPU is affected by this vulnerability and the microcode and kernel mitigation are not applied.
 | 
			
		||||
   * - 'Vulnerable: Clear CPU buffers attempted, no microcode'
 | 
			
		||||
     - The system tries to clear the buffers but the microcode might not support the operation.
 | 
			
		||||
   * - 'Mitigation: Clear CPU buffers'
 | 
			
		||||
     - The microcode has been updated to clear the buffers. TSX is still enabled.
 | 
			
		||||
   * - 'Mitigation: TSX disabled'
 | 
			
		||||
     - TSX is disabled.
 | 
			
		||||
   * - 'Not affected'
 | 
			
		||||
     - The CPU is not affected by this issue.
 | 
			
		||||
 | 
			
		||||
.. _ucode_needed:
 | 
			
		||||
 | 
			
		||||
Best effort mitigation mode
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
If the processor is vulnerable, but the availability of the microcode-based
 | 
			
		||||
mitigation mechanism is not advertised via CPUID the kernel selects a best
 | 
			
		||||
effort mitigation mode.  This mode invokes the mitigation instructions
 | 
			
		||||
without a guarantee that they clear the CPU buffers.
 | 
			
		||||
 | 
			
		||||
This is done to address virtualization scenarios where the host has the
 | 
			
		||||
microcode update applied, but the hypervisor is not yet updated to expose the
 | 
			
		||||
CPUID to the guest. If the host has updated microcode the protection takes
 | 
			
		||||
effect; otherwise a few CPU cycles are wasted pointlessly.
 | 
			
		||||
 | 
			
		||||
The state in the tsx_async_abort sysfs file reflects this situation
 | 
			
		||||
accordingly.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Mitigation mechanism
 | 
			
		||||
--------------------
 | 
			
		||||
 | 
			
		||||
The kernel detects the affected CPUs and the presence of the microcode which is
 | 
			
		||||
required. If a CPU is affected and the microcode is available, then the kernel
 | 
			
		||||
enables the mitigation by default.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
The mitigation can be controlled at boot time via a kernel command line option.
 | 
			
		||||
See :ref:`taa_mitigation_control_command_line`.
 | 
			
		||||
 | 
			
		||||
.. _virt_mechanism:
 | 
			
		||||
 | 
			
		||||
Virtualization mitigation
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
Affected systems where the host has TAA microcode and TAA is mitigated by
 | 
			
		||||
having disabled TSX previously, are not vulnerable regardless of the status
 | 
			
		||||
of the VMs.
 | 
			
		||||
 | 
			
		||||
In all other cases, if the host either does not have the TAA microcode or
 | 
			
		||||
the kernel is not mitigated, the system might be vulnerable.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
.. _taa_mitigation_control_command_line:
 | 
			
		||||
 | 
			
		||||
Mitigation control on the kernel command line
 | 
			
		||||
---------------------------------------------
 | 
			
		||||
 | 
			
		||||
The kernel command line allows to control the TAA mitigations at boot time with
 | 
			
		||||
the option "tsx_async_abort=". The valid arguments for this option are:
 | 
			
		||||
 | 
			
		||||
  ============  =============================================================
 | 
			
		||||
  off		This option disables the TAA mitigation on affected platforms.
 | 
			
		||||
                If the system has TSX enabled (see next parameter) and the CPU
 | 
			
		||||
                is affected, the system is vulnerable.
 | 
			
		||||
 | 
			
		||||
  full	        TAA mitigation is enabled. If TSX is enabled, on an affected
 | 
			
		||||
                system it will clear CPU buffers on ring transitions. On
 | 
			
		||||
                systems which are MDS-affected and deploy MDS mitigation,
 | 
			
		||||
                TAA is also mitigated. Specifying this option on those
 | 
			
		||||
                systems will have no effect.
 | 
			
		||||
 | 
			
		||||
  full,nosmt    The same as tsx_async_abort=full, with SMT disabled on
 | 
			
		||||
                vulnerable CPUs that have TSX enabled. This is the complete
 | 
			
		||||
                mitigation. When TSX is disabled, SMT is not disabled because
 | 
			
		||||
                CPU is not vulnerable to cross-thread TAA attacks.
 | 
			
		||||
  ============  =============================================================
 | 
			
		||||
 | 
			
		||||
Not specifying this option is equivalent to "tsx_async_abort=full".
 | 
			
		||||
 | 
			
		||||
The kernel command line also allows to control the TSX feature using the
 | 
			
		||||
parameter "tsx=" on CPUs which support TSX control. MSR_IA32_TSX_CTRL is used
 | 
			
		||||
to control the TSX feature and the enumeration of the TSX feature bits (RTM
 | 
			
		||||
and HLE) in CPUID.
 | 
			
		||||
 | 
			
		||||
The valid options are:
 | 
			
		||||
 | 
			
		||||
  ============  =============================================================
 | 
			
		||||
  off		Disables TSX on the system.
 | 
			
		||||
 | 
			
		||||
                Note that this option takes effect only on newer CPUs which are
 | 
			
		||||
                not vulnerable to MDS, i.e., have MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1
 | 
			
		||||
                and which get the new IA32_TSX_CTRL MSR through a microcode
 | 
			
		||||
                update. This new MSR allows for the reliable deactivation of
 | 
			
		||||
                the TSX functionality.
 | 
			
		||||
 | 
			
		||||
  on		Enables TSX.
 | 
			
		||||
 | 
			
		||||
                Although there are mitigations for all known security
 | 
			
		||||
                vulnerabilities, TSX has been known to be an accelerator for
 | 
			
		||||
                several previous speculation-related CVEs, and so there may be
 | 
			
		||||
                unknown security risks associated with leaving it enabled.
 | 
			
		||||
 | 
			
		||||
  auto		Disables TSX if X86_BUG_TAA is present, otherwise enables TSX
 | 
			
		||||
                on the system.
 | 
			
		||||
  ============  =============================================================
 | 
			
		||||
 | 
			
		||||
Not specifying this option is equivalent to "tsx=off".
 | 
			
		||||
 | 
			
		||||
The following combinations of the "tsx_async_abort" and "tsx" are possible. For
 | 
			
		||||
affected platforms tsx=auto is equivalent to tsx=off and the result will be:
 | 
			
		||||
 | 
			
		||||
  =========  ==========================   =========================================
 | 
			
		||||
  tsx=on     tsx_async_abort=full         The system will use VERW to clear CPU
 | 
			
		||||
                                          buffers. Cross-thread attacks are still
 | 
			
		||||
					  possible on SMT machines.
 | 
			
		||||
  tsx=on     tsx_async_abort=full,nosmt   As above, cross-thread attacks on SMT
 | 
			
		||||
                                          mitigated.
 | 
			
		||||
  tsx=on     tsx_async_abort=off          The system is vulnerable.
 | 
			
		||||
  tsx=off    tsx_async_abort=full         TSX might be disabled if microcode
 | 
			
		||||
                                          provides a TSX control MSR. If so,
 | 
			
		||||
					  system is not vulnerable.
 | 
			
		||||
  tsx=off    tsx_async_abort=full,nosmt   Ditto
 | 
			
		||||
  tsx=off    tsx_async_abort=off          ditto
 | 
			
		||||
  =========  ==========================   =========================================
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
For unaffected platforms "tsx=on" and "tsx_async_abort=full" does not clear CPU
 | 
			
		||||
buffers.  For platforms without TSX control (MSR_IA32_ARCH_CAPABILITIES.MDS_NO=0)
 | 
			
		||||
"tsx" command line argument has no effect.
 | 
			
		||||
 | 
			
		||||
For the affected platforms below table indicates the mitigation status for the
 | 
			
		||||
combinations of CPUID bit MD_CLEAR and IA32_ARCH_CAPABILITIES MSR bits MDS_NO
 | 
			
		||||
and TSX_CTRL_MSR.
 | 
			
		||||
 | 
			
		||||
  =======  =========  =============  ========================================
 | 
			
		||||
  MDS_NO   MD_CLEAR   TSX_CTRL_MSR   Status
 | 
			
		||||
  =======  =========  =============  ========================================
 | 
			
		||||
    0          0            0        Vulnerable (needs microcode)
 | 
			
		||||
    0          1            0        MDS and TAA mitigated via VERW
 | 
			
		||||
    1          1            0        MDS fixed, TAA vulnerable if TSX enabled
 | 
			
		||||
                                     because MD_CLEAR has no meaning and
 | 
			
		||||
                                     VERW is not guaranteed to clear buffers
 | 
			
		||||
    1          X            1        MDS fixed, TAA can be mitigated by
 | 
			
		||||
                                     VERW or TSX_CTRL_MSR
 | 
			
		||||
  =======  =========  =============  ========================================
 | 
			
		||||
 | 
			
		||||
Mitigation selection guide
 | 
			
		||||
--------------------------
 | 
			
		||||
 | 
			
		||||
1. Trusted userspace and guests
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
If all user space applications are from a trusted source and do not execute
 | 
			
		||||
untrusted code which is supplied externally, then the mitigation can be
 | 
			
		||||
disabled. The same applies to virtualized environments with trusted guests.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
2. Untrusted userspace and guests
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
If there are untrusted applications or guests on the system, enabling TSX
 | 
			
		||||
might allow a malicious actor to leak data from the host or from other
 | 
			
		||||
processes running on the same physical core.
 | 
			
		||||
 | 
			
		||||
If the microcode is available and the TSX is disabled on the host, attacks
 | 
			
		||||
are prevented in a virtualized environment as well, even if the VMs do not
 | 
			
		||||
explicitly enable the mitigation.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
.. _taa_default_mitigations:
 | 
			
		||||
 | 
			
		||||
Default mitigations
 | 
			
		||||
-------------------
 | 
			
		||||
 | 
			
		||||
The kernel's default action for vulnerable processors is:
 | 
			
		||||
 | 
			
		||||
  - Deploy TSX disable mitigation (tsx_async_abort=full tsx=off).
 | 
			
		||||
@ -2055,6 +2055,25 @@
 | 
			
		||||
			KVM MMU at runtime.
 | 
			
		||||
			Default is 0 (off)
 | 
			
		||||
 | 
			
		||||
	kvm.nx_huge_pages=
 | 
			
		||||
			[KVM] Controls the software workaround for the
 | 
			
		||||
			X86_BUG_ITLB_MULTIHIT bug.
 | 
			
		||||
			force	: Always deploy workaround.
 | 
			
		||||
			off	: Never deploy workaround.
 | 
			
		||||
			auto    : Deploy workaround based on the presence of
 | 
			
		||||
				  X86_BUG_ITLB_MULTIHIT.
 | 
			
		||||
 | 
			
		||||
			Default is 'auto'.
 | 
			
		||||
 | 
			
		||||
			If the software workaround is enabled for the host,
 | 
			
		||||
			guests do need not to enable it for nested guests.
 | 
			
		||||
 | 
			
		||||
	kvm.nx_huge_pages_recovery_ratio=
 | 
			
		||||
			[KVM] Controls how many 4KiB pages are periodically zapped
 | 
			
		||||
			back to huge pages.  0 disables the recovery, otherwise if
 | 
			
		||||
			the value is N KVM will zap 1/Nth of the 4KiB pages every
 | 
			
		||||
			minute.  The default is 60.
 | 
			
		||||
 | 
			
		||||
	kvm-amd.nested=	[KVM,AMD] Allow nested virtualization in KVM/SVM.
 | 
			
		||||
			Default is 1 (enabled)
 | 
			
		||||
 | 
			
		||||
@ -2636,6 +2655,13 @@
 | 
			
		||||
					       ssbd=force-off [ARM64]
 | 
			
		||||
					       l1tf=off [X86]
 | 
			
		||||
					       mds=off [X86]
 | 
			
		||||
					       tsx_async_abort=off [X86]
 | 
			
		||||
					       kvm.nx_huge_pages=off [X86]
 | 
			
		||||
 | 
			
		||||
				Exceptions:
 | 
			
		||||
					       This does not have any effect on
 | 
			
		||||
					       kvm.nx_huge_pages when
 | 
			
		||||
					       kvm.nx_huge_pages=force.
 | 
			
		||||
 | 
			
		||||
			auto (default)
 | 
			
		||||
				Mitigate all CPU vulnerabilities, but leave SMT
 | 
			
		||||
@ -2651,6 +2677,7 @@
 | 
			
		||||
				be fully mitigated, even if it means losing SMT.
 | 
			
		||||
				Equivalent to: l1tf=flush,nosmt [X86]
 | 
			
		||||
					       mds=full,nosmt [X86]
 | 
			
		||||
					       tsx_async_abort=full,nosmt [X86]
 | 
			
		||||
 | 
			
		||||
	mminit_loglevel=
 | 
			
		||||
			[KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
 | 
			
		||||
@ -4848,6 +4875,71 @@
 | 
			
		||||
			interruptions from clocksource watchdog are not
 | 
			
		||||
			acceptable).
 | 
			
		||||
 | 
			
		||||
	tsx=		[X86] Control Transactional Synchronization
 | 
			
		||||
			Extensions (TSX) feature in Intel processors that
 | 
			
		||||
			support TSX control.
 | 
			
		||||
 | 
			
		||||
			This parameter controls the TSX feature. The options are:
 | 
			
		||||
 | 
			
		||||
			on	- Enable TSX on the system. Although there are
 | 
			
		||||
				mitigations for all known security vulnerabilities,
 | 
			
		||||
				TSX has been known to be an accelerator for
 | 
			
		||||
				several previous speculation-related CVEs, and
 | 
			
		||||
				so there may be unknown	security risks associated
 | 
			
		||||
				with leaving it enabled.
 | 
			
		||||
 | 
			
		||||
			off	- Disable TSX on the system. (Note that this
 | 
			
		||||
				option takes effect only on newer CPUs which are
 | 
			
		||||
				not vulnerable to MDS, i.e., have
 | 
			
		||||
				MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1 and which get
 | 
			
		||||
				the new IA32_TSX_CTRL MSR through a microcode
 | 
			
		||||
				update. This new MSR allows for the reliable
 | 
			
		||||
				deactivation of the TSX functionality.)
 | 
			
		||||
 | 
			
		||||
			auto	- Disable TSX if X86_BUG_TAA is present,
 | 
			
		||||
				  otherwise enable TSX on the system.
 | 
			
		||||
 | 
			
		||||
			Not specifying this option is equivalent to tsx=off.
 | 
			
		||||
 | 
			
		||||
			See Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
 | 
			
		||||
			for more details.
 | 
			
		||||
 | 
			
		||||
	tsx_async_abort= [X86,INTEL] Control mitigation for the TSX Async
 | 
			
		||||
			Abort (TAA) vulnerability.
 | 
			
		||||
 | 
			
		||||
			Similar to Micro-architectural Data Sampling (MDS)
 | 
			
		||||
			certain CPUs that support Transactional
 | 
			
		||||
			Synchronization Extensions (TSX) are vulnerable to an
 | 
			
		||||
			exploit against CPU internal buffers which can forward
 | 
			
		||||
			information to a disclosure gadget under certain
 | 
			
		||||
			conditions.
 | 
			
		||||
 | 
			
		||||
			In vulnerable processors, the speculatively forwarded
 | 
			
		||||
			data can be used in a cache side channel attack, to
 | 
			
		||||
			access data to which the attacker does not have direct
 | 
			
		||||
			access.
 | 
			
		||||
 | 
			
		||||
			This parameter controls the TAA mitigation.  The
 | 
			
		||||
			options are:
 | 
			
		||||
 | 
			
		||||
			full       - Enable TAA mitigation on vulnerable CPUs
 | 
			
		||||
				     if TSX is enabled.
 | 
			
		||||
 | 
			
		||||
			full,nosmt - Enable TAA mitigation and disable SMT on
 | 
			
		||||
				     vulnerable CPUs. If TSX is disabled, SMT
 | 
			
		||||
				     is not disabled because CPU is not
 | 
			
		||||
				     vulnerable to cross-thread TAA attacks.
 | 
			
		||||
			off        - Unconditionally disable TAA mitigation
 | 
			
		||||
 | 
			
		||||
			Not specifying this option is equivalent to
 | 
			
		||||
			tsx_async_abort=full.  On CPUs which are MDS affected
 | 
			
		||||
			and deploy MDS mitigation, TAA mitigation is not
 | 
			
		||||
			required and doesn't provide any additional
 | 
			
		||||
			mitigation.
 | 
			
		||||
 | 
			
		||||
			For details see:
 | 
			
		||||
			Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
 | 
			
		||||
 | 
			
		||||
	turbografx.map[2|3]=	[HW,JOY]
 | 
			
		||||
			TurboGraFX parallel port interface
 | 
			
		||||
			Format:
 | 
			
		||||
 | 
			
		||||
@ -436,6 +436,10 @@ by the driver:
 | 
			
		||||
   encryption.
 | 
			
		||||
 * ``tx_tls_ooo`` - number of TX packets which were part of a TLS stream
 | 
			
		||||
   but did not arrive in the expected order.
 | 
			
		||||
 * ``tx_tls_skip_no_sync_data`` - number of TX packets which were part of
 | 
			
		||||
   a TLS stream and arrived out-of-order, but skipped the HW offload routine
 | 
			
		||||
   and went to the regular transmit flow as they were retransmissions of the
 | 
			
		||||
   connection handshake.
 | 
			
		||||
 * ``tx_tls_drop_no_sync_data`` - number of TX packets which were part of
 | 
			
		||||
   a TLS stream dropped, because they arrived out of order and associated
 | 
			
		||||
   record could not be found.
 | 
			
		||||
 | 
			
		||||
@ -27,6 +27,7 @@ x86-specific Documentation
 | 
			
		||||
   mds
 | 
			
		||||
   microcode
 | 
			
		||||
   resctrl_ui
 | 
			
		||||
   tsx_async_abort
 | 
			
		||||
   usb-legacy-support
 | 
			
		||||
   i386/index
 | 
			
		||||
   x86_64/index
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										117
									
								
								Documentation/x86/tsx_async_abort.rst
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										117
									
								
								Documentation/x86/tsx_async_abort.rst
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,117 @@
 | 
			
		||||
.. SPDX-License-Identifier: GPL-2.0
 | 
			
		||||
 | 
			
		||||
TSX Async Abort (TAA) mitigation
 | 
			
		||||
================================
 | 
			
		||||
 | 
			
		||||
.. _tsx_async_abort:
 | 
			
		||||
 | 
			
		||||
Overview
 | 
			
		||||
--------
 | 
			
		||||
 | 
			
		||||
TSX Async Abort (TAA) is a side channel attack on internal buffers in some
 | 
			
		||||
Intel processors similar to Microachitectural Data Sampling (MDS).  In this
 | 
			
		||||
case certain loads may speculatively pass invalid data to dependent operations
 | 
			
		||||
when an asynchronous abort condition is pending in a Transactional
 | 
			
		||||
Synchronization Extensions (TSX) transaction.  This includes loads with no
 | 
			
		||||
fault or assist condition. Such loads may speculatively expose stale data from
 | 
			
		||||
the same uarch data structures as in MDS, with same scope of exposure i.e.
 | 
			
		||||
same-thread and cross-thread. This issue affects all current processors that
 | 
			
		||||
support TSX.
 | 
			
		||||
 | 
			
		||||
Mitigation strategy
 | 
			
		||||
-------------------
 | 
			
		||||
 | 
			
		||||
a) TSX disable - one of the mitigations is to disable TSX. A new MSR
 | 
			
		||||
IA32_TSX_CTRL will be available in future and current processors after
 | 
			
		||||
microcode update which can be used to disable TSX. In addition, it
 | 
			
		||||
controls the enumeration of the TSX feature bits (RTM and HLE) in CPUID.
 | 
			
		||||
 | 
			
		||||
b) Clear CPU buffers - similar to MDS, clearing the CPU buffers mitigates this
 | 
			
		||||
vulnerability. More details on this approach can be found in
 | 
			
		||||
:ref:`Documentation/admin-guide/hw-vuln/mds.rst <mds>`.
 | 
			
		||||
 | 
			
		||||
Kernel internal mitigation modes
 | 
			
		||||
--------------------------------
 | 
			
		||||
 | 
			
		||||
 =============    ============================================================
 | 
			
		||||
 off              Mitigation is disabled. Either the CPU is not affected or
 | 
			
		||||
                  tsx_async_abort=off is supplied on the kernel command line.
 | 
			
		||||
 | 
			
		||||
 tsx disabled     Mitigation is enabled. TSX feature is disabled by default at
 | 
			
		||||
                  bootup on processors that support TSX control.
 | 
			
		||||
 | 
			
		||||
 verw             Mitigation is enabled. CPU is affected and MD_CLEAR is
 | 
			
		||||
                  advertised in CPUID.
 | 
			
		||||
 | 
			
		||||
 ucode needed     Mitigation is enabled. CPU is affected and MD_CLEAR is not
 | 
			
		||||
                  advertised in CPUID. That is mainly for virtualization
 | 
			
		||||
                  scenarios where the host has the updated microcode but the
 | 
			
		||||
                  hypervisor does not expose MD_CLEAR in CPUID. It's a best
 | 
			
		||||
                  effort approach without guarantee.
 | 
			
		||||
 =============    ============================================================
 | 
			
		||||
 | 
			
		||||
If the CPU is affected and the "tsx_async_abort" kernel command line parameter is
 | 
			
		||||
not provided then the kernel selects an appropriate mitigation depending on the
 | 
			
		||||
status of RTM and MD_CLEAR CPUID bits.
 | 
			
		||||
 | 
			
		||||
Below tables indicate the impact of tsx=on|off|auto cmdline options on state of
 | 
			
		||||
TAA mitigation, VERW behavior and TSX feature for various combinations of
 | 
			
		||||
MSR_IA32_ARCH_CAPABILITIES bits.
 | 
			
		||||
 | 
			
		||||
1. "tsx=off"
 | 
			
		||||
 | 
			
		||||
=========  =========  ============  ============  ==============  ===================  ======================
 | 
			
		||||
MSR_IA32_ARCH_CAPABILITIES bits     Result with cmdline tsx=off
 | 
			
		||||
----------------------------------  -------------------------------------------------------------------------
 | 
			
		||||
TAA_NO     MDS_NO     TSX_CTRL_MSR  TSX state     VERW can clear  TAA mitigation       TAA mitigation
 | 
			
		||||
                                    after bootup  CPU buffers     tsx_async_abort=off  tsx_async_abort=full
 | 
			
		||||
=========  =========  ============  ============  ==============  ===================  ======================
 | 
			
		||||
    0          0           0         HW default         Yes           Same as MDS           Same as MDS
 | 
			
		||||
    0          0           1        Invalid case   Invalid case       Invalid case          Invalid case
 | 
			
		||||
    0          1           0         HW default         No         Need ucode update     Need ucode update
 | 
			
		||||
    0          1           1          Disabled          Yes           TSX disabled          TSX disabled
 | 
			
		||||
    1          X           1          Disabled           X             None needed           None needed
 | 
			
		||||
=========  =========  ============  ============  ==============  ===================  ======================
 | 
			
		||||
 | 
			
		||||
2. "tsx=on"
 | 
			
		||||
 | 
			
		||||
=========  =========  ============  ============  ==============  ===================  ======================
 | 
			
		||||
MSR_IA32_ARCH_CAPABILITIES bits     Result with cmdline tsx=on
 | 
			
		||||
----------------------------------  -------------------------------------------------------------------------
 | 
			
		||||
TAA_NO     MDS_NO     TSX_CTRL_MSR  TSX state     VERW can clear  TAA mitigation       TAA mitigation
 | 
			
		||||
                                    after bootup  CPU buffers     tsx_async_abort=off  tsx_async_abort=full
 | 
			
		||||
=========  =========  ============  ============  ==============  ===================  ======================
 | 
			
		||||
    0          0           0         HW default        Yes            Same as MDS          Same as MDS
 | 
			
		||||
    0          0           1        Invalid case   Invalid case       Invalid case         Invalid case
 | 
			
		||||
    0          1           0         HW default        No          Need ucode update     Need ucode update
 | 
			
		||||
    0          1           1          Enabled          Yes               None              Same as MDS
 | 
			
		||||
    1          X           1          Enabled          X              None needed          None needed
 | 
			
		||||
=========  =========  ============  ============  ==============  ===================  ======================
 | 
			
		||||
 | 
			
		||||
3. "tsx=auto"
 | 
			
		||||
 | 
			
		||||
=========  =========  ============  ============  ==============  ===================  ======================
 | 
			
		||||
MSR_IA32_ARCH_CAPABILITIES bits     Result with cmdline tsx=auto
 | 
			
		||||
----------------------------------  -------------------------------------------------------------------------
 | 
			
		||||
TAA_NO     MDS_NO     TSX_CTRL_MSR  TSX state     VERW can clear  TAA mitigation       TAA mitigation
 | 
			
		||||
                                    after bootup  CPU buffers     tsx_async_abort=off  tsx_async_abort=full
 | 
			
		||||
=========  =========  ============  ============  ==============  ===================  ======================
 | 
			
		||||
    0          0           0         HW default    Yes                Same as MDS           Same as MDS
 | 
			
		||||
    0          0           1        Invalid case  Invalid case        Invalid case          Invalid case
 | 
			
		||||
    0          1           0         HW default    No              Need ucode update     Need ucode update
 | 
			
		||||
    0          1           1          Disabled      Yes               TSX disabled          TSX disabled
 | 
			
		||||
    1          X           1          Enabled       X                 None needed           None needed
 | 
			
		||||
=========  =========  ============  ============  ==============  ===================  ======================
 | 
			
		||||
 | 
			
		||||
In the tables, TSX_CTRL_MSR is a new bit in MSR_IA32_ARCH_CAPABILITIES that
 | 
			
		||||
indicates whether MSR_IA32_TSX_CTRL is supported.
 | 
			
		||||
 | 
			
		||||
There are two control bits in IA32_TSX_CTRL MSR:
 | 
			
		||||
 | 
			
		||||
      Bit 0: When set it disables the Restricted Transactional Memory (RTM)
 | 
			
		||||
             sub-feature of TSX (will force all transactions to abort on the
 | 
			
		||||
             XBEGIN instruction).
 | 
			
		||||
 | 
			
		||||
      Bit 1: When set it disables the enumeration of the RTM and HLE feature
 | 
			
		||||
             (i.e. it will make CPUID(EAX=7).EBX{bit4} and
 | 
			
		||||
             CPUID(EAX=7).EBX{bit11} read as 0).
 | 
			
		||||
							
								
								
									
										15
									
								
								MAINTAINERS
									
									
									
									
									
								
							
							
						
						
									
										15
									
								
								MAINTAINERS
									
									
									
									
									
								
							@ -3053,6 +3053,7 @@ M:	Daniel Borkmann <daniel@iogearbox.net>
 | 
			
		||||
R:	Martin KaFai Lau <kafai@fb.com>
 | 
			
		||||
R:	Song Liu <songliubraving@fb.com>
 | 
			
		||||
R:	Yonghong Song <yhs@fb.com>
 | 
			
		||||
R:	Andrii Nakryiko <andriin@fb.com>
 | 
			
		||||
L:	netdev@vger.kernel.org
 | 
			
		||||
L:	bpf@vger.kernel.org
 | 
			
		||||
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git
 | 
			
		||||
@ -3260,7 +3261,6 @@ S:	Maintained
 | 
			
		||||
F:	drivers/cpufreq/bmips-cpufreq.c
 | 
			
		||||
 | 
			
		||||
BROADCOM BMIPS MIPS ARCHITECTURE
 | 
			
		||||
M:	Kevin Cernekee <cernekee@gmail.com>
 | 
			
		||||
M:	Florian Fainelli <f.fainelli@gmail.com>
 | 
			
		||||
L:	bcm-kernel-feedback-list@broadcom.com
 | 
			
		||||
L:	linux-mips@vger.kernel.org
 | 
			
		||||
@ -3737,7 +3737,6 @@ F:	drivers/crypto/cavium/cpt/
 | 
			
		||||
 | 
			
		||||
CAVIUM THUNDERX2 ARM64 SOC
 | 
			
		||||
M:	Robert Richter <rrichter@cavium.com>
 | 
			
		||||
M:	Jayachandran C <jnair@caviumnetworks.com>
 | 
			
		||||
L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 | 
			
		||||
S:	Maintained
 | 
			
		||||
F:	arch/arm64/boot/dts/cavium/thunder2-99xx*
 | 
			
		||||
@ -8299,11 +8298,14 @@ F:	drivers/hid/intel-ish-hid/
 | 
			
		||||
 | 
			
		||||
INTEL IOMMU (VT-d)
 | 
			
		||||
M:	David Woodhouse <dwmw2@infradead.org>
 | 
			
		||||
M:	Lu Baolu <baolu.lu@linux.intel.com>
 | 
			
		||||
L:	iommu@lists.linux-foundation.org
 | 
			
		||||
T:	git git://git.infradead.org/iommu-2.6.git
 | 
			
		||||
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
 | 
			
		||||
S:	Supported
 | 
			
		||||
F:	drivers/iommu/intel-iommu.c
 | 
			
		||||
F:	drivers/iommu/dmar.c
 | 
			
		||||
F:	drivers/iommu/intel*.[ch]
 | 
			
		||||
F:	include/linux/intel-iommu.h
 | 
			
		||||
F:	include/linux/intel-svm.h
 | 
			
		||||
 | 
			
		||||
INTEL IOP-ADMA DMA DRIVER
 | 
			
		||||
R:	Dan Williams <dan.j.williams@intel.com>
 | 
			
		||||
@ -10519,8 +10521,12 @@ F:	mm/memblock.c
 | 
			
		||||
F:	Documentation/core-api/boot-time-mm.rst
 | 
			
		||||
 | 
			
		||||
MEMORY MANAGEMENT
 | 
			
		||||
M:	Andrew Morton <akpm@linux-foundation.org>
 | 
			
		||||
L:	linux-mm@kvack.org
 | 
			
		||||
W:	http://www.linux-mm.org
 | 
			
		||||
T:	quilt https://ozlabs.org/~akpm/mmotm/
 | 
			
		||||
T:	quilt https://ozlabs.org/~akpm/mmots/
 | 
			
		||||
T:	git git://github.com/hnaz/linux-mm.git
 | 
			
		||||
S:	Maintained
 | 
			
		||||
F:	include/linux/mm.h
 | 
			
		||||
F:	include/linux/gfp.h
 | 
			
		||||
@ -18034,6 +18040,7 @@ F:	Documentation/vm/zsmalloc.rst
 | 
			
		||||
ZSWAP COMPRESSED SWAP CACHING
 | 
			
		||||
M:	Seth Jennings <sjenning@redhat.com>
 | 
			
		||||
M:	Dan Streetman <ddstreet@ieee.org>
 | 
			
		||||
M:	Vitaly Wool <vitaly.wool@konsulko.com>
 | 
			
		||||
L:	linux-mm@kvack.org
 | 
			
		||||
S:	Maintained
 | 
			
		||||
F:	mm/zswap.c
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										5
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								Makefile
									
									
									
									
									
								
							@ -2,7 +2,7 @@
 | 
			
		||||
VERSION = 5
 | 
			
		||||
PATCHLEVEL = 4
 | 
			
		||||
SUBLEVEL = 0
 | 
			
		||||
EXTRAVERSION = -rc6
 | 
			
		||||
EXTRAVERSION = -rc8
 | 
			
		||||
NAME = Kleptomaniac Octopus
 | 
			
		||||
 | 
			
		||||
# *DOCUMENTATION*
 | 
			
		||||
@ -917,6 +917,9 @@ ifeq ($(CONFIG_RELR),y)
 | 
			
		||||
LDFLAGS_vmlinux	+= --pack-dyn-relocs=relr
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
# make the checker run with the right architecture
 | 
			
		||||
CHECKFLAGS += --arch=$(ARCH)
 | 
			
		||||
 | 
			
		||||
# insure the checker run with the right endianness
 | 
			
		||||
CHECKFLAGS += $(if $(CONFIG_CPU_BIG_ENDIAN),-mbig-endian,-mlittle-endian)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -328,6 +328,10 @@
 | 
			
		||||
	pinctrl-0 = <&pinctrl_pwm3>;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
&snvs_pwrkey {
 | 
			
		||||
	status = "okay";
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
&ssi2 {
 | 
			
		||||
	status = "okay";
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@ -230,6 +230,8 @@
 | 
			
		||||
			accelerometer@1c {
 | 
			
		||||
				compatible = "fsl,mma8451";
 | 
			
		||||
				reg = <0x1c>;
 | 
			
		||||
				pinctrl-names = "default";
 | 
			
		||||
				pinctrl-0 = <&pinctrl_mma8451_int>;
 | 
			
		||||
				interrupt-parent = <&gpio6>;
 | 
			
		||||
				interrupts = <31 IRQ_TYPE_LEVEL_LOW>;
 | 
			
		||||
			};
 | 
			
		||||
@ -628,6 +630,12 @@
 | 
			
		||||
			>;
 | 
			
		||||
		};
 | 
			
		||||
 | 
			
		||||
		pinctrl_mma8451_int: mma8451intgrp {
 | 
			
		||||
			fsl,pins = <
 | 
			
		||||
				MX6QDL_PAD_EIM_BCLK__GPIO6_IO31		0xb0b1
 | 
			
		||||
			>;
 | 
			
		||||
		};
 | 
			
		||||
 | 
			
		||||
		pinctrl_pwm3: pwm1grp {
 | 
			
		||||
			fsl,pins = <
 | 
			
		||||
				MX6QDL_PAD_SD4_DAT1__PWM3_OUT		0x1b0b1
 | 
			
		||||
 | 
			
		||||
@ -183,14 +183,12 @@
 | 
			
		||||
 | 
			
		||||
	ov5640: camera@3c {
 | 
			
		||||
		compatible = "ovti,ov5640";
 | 
			
		||||
		pinctrl-names = "default";
 | 
			
		||||
		pinctrl-0 = <&ov5640_pins>;
 | 
			
		||||
		reg = <0x3c>;
 | 
			
		||||
		clocks = <&clk_ext_camera>;
 | 
			
		||||
		clock-names = "xclk";
 | 
			
		||||
		DOVDD-supply = <&v2v8>;
 | 
			
		||||
		powerdown-gpios = <&stmfx_pinctrl 18 GPIO_ACTIVE_HIGH>;
 | 
			
		||||
		reset-gpios = <&stmfx_pinctrl 19 GPIO_ACTIVE_LOW>;
 | 
			
		||||
		powerdown-gpios = <&stmfx_pinctrl 18 (GPIO_ACTIVE_HIGH | GPIO_PUSH_PULL)>;
 | 
			
		||||
		reset-gpios = <&stmfx_pinctrl 19 (GPIO_ACTIVE_LOW | GPIO_PUSH_PULL)>;
 | 
			
		||||
		rotation = <180>;
 | 
			
		||||
		status = "okay";
 | 
			
		||||
 | 
			
		||||
@ -223,15 +221,8 @@
 | 
			
		||||
 | 
			
		||||
			joystick_pins: joystick {
 | 
			
		||||
				pins = "gpio0", "gpio1", "gpio2", "gpio3", "gpio4";
 | 
			
		||||
				drive-push-pull;
 | 
			
		||||
				bias-pull-down;
 | 
			
		||||
			};
 | 
			
		||||
 | 
			
		||||
			ov5640_pins: camera {
 | 
			
		||||
				pins = "agpio2", "agpio3"; /* stmfx pins 18 & 19 */
 | 
			
		||||
				drive-push-pull;
 | 
			
		||||
				output-low;
 | 
			
		||||
			};
 | 
			
		||||
		};
 | 
			
		||||
	};
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@ -932,7 +932,7 @@
 | 
			
		||||
			interrupt-names = "int0", "int1";
 | 
			
		||||
			clocks = <&rcc CK_HSE>, <&rcc FDCAN_K>;
 | 
			
		||||
			clock-names = "hclk", "cclk";
 | 
			
		||||
			bosch,mram-cfg = <0x1400 0 0 32 0 0 2 2>;
 | 
			
		||||
			bosch,mram-cfg = <0x0 0 0 32 0 0 2 2>;
 | 
			
		||||
			status = "disabled";
 | 
			
		||||
		};
 | 
			
		||||
 | 
			
		||||
@ -945,7 +945,7 @@
 | 
			
		||||
			interrupt-names = "int0", "int1";
 | 
			
		||||
			clocks = <&rcc CK_HSE>, <&rcc FDCAN_K>;
 | 
			
		||||
			clock-names = "hclk", "cclk";
 | 
			
		||||
			bosch,mram-cfg = <0x0 0 0 32 0 0 2 2>;
 | 
			
		||||
			bosch,mram-cfg = <0x1400 0 0 32 0 0 2 2>;
 | 
			
		||||
			status = "disabled";
 | 
			
		||||
		};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -192,6 +192,7 @@
 | 
			
		||||
	vqmmc-supply = <®_dldo1>;
 | 
			
		||||
	non-removable;
 | 
			
		||||
	wakeup-source;
 | 
			
		||||
	keep-power-in-suspend;
 | 
			
		||||
	status = "okay";
 | 
			
		||||
 | 
			
		||||
	brcmf: wifi@1 {
 | 
			
		||||
 | 
			
		||||
@ -481,14 +481,18 @@ static void sunxi_mc_smp_cpu_die(unsigned int l_cpu)
 | 
			
		||||
static int sunxi_cpu_powerdown(unsigned int cpu, unsigned int cluster)
 | 
			
		||||
{
 | 
			
		||||
	u32 reg;
 | 
			
		||||
	int gating_bit = cpu;
 | 
			
		||||
 | 
			
		||||
	pr_debug("%s: cluster %u cpu %u\n", __func__, cluster, cpu);
 | 
			
		||||
	if (cpu >= SUNXI_CPUS_PER_CLUSTER || cluster >= SUNXI_NR_CLUSTERS)
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
 | 
			
		||||
	if (is_a83t && cpu == 0)
 | 
			
		||||
		gating_bit = 4;
 | 
			
		||||
 | 
			
		||||
	/* gate processor power */
 | 
			
		||||
	reg = readl(prcm_base + PRCM_PWROFF_GATING_REG(cluster));
 | 
			
		||||
	reg |= PRCM_PWROFF_GATING_REG_CORE(cpu);
 | 
			
		||||
	reg |= PRCM_PWROFF_GATING_REG_CORE(gating_bit);
 | 
			
		||||
	writel(reg, prcm_base + PRCM_PWROFF_GATING_REG(cluster));
 | 
			
		||||
	udelay(20);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -127,7 +127,7 @@
 | 
			
		||||
	status = "okay";
 | 
			
		||||
 | 
			
		||||
	i2c-mux@77 {
 | 
			
		||||
		compatible = "nxp,pca9847";
 | 
			
		||||
		compatible = "nxp,pca9547";
 | 
			
		||||
		reg = <0x77>;
 | 
			
		||||
		#address-cells = <1>;
 | 
			
		||||
		#size-cells = <0>;
 | 
			
		||||
 | 
			
		||||
@ -394,7 +394,7 @@
 | 
			
		||||
			};
 | 
			
		||||
 | 
			
		||||
			sdma2: dma-controller@302c0000 {
 | 
			
		||||
				compatible = "fsl,imx8mm-sdma", "fsl,imx7d-sdma";
 | 
			
		||||
				compatible = "fsl,imx8mm-sdma", "fsl,imx8mq-sdma";
 | 
			
		||||
				reg = <0x302c0000 0x10000>;
 | 
			
		||||
				interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>;
 | 
			
		||||
				clocks = <&clk IMX8MM_CLK_SDMA2_ROOT>,
 | 
			
		||||
@ -405,7 +405,7 @@
 | 
			
		||||
			};
 | 
			
		||||
 | 
			
		||||
			sdma3: dma-controller@302b0000 {
 | 
			
		||||
				compatible = "fsl,imx8mm-sdma", "fsl,imx7d-sdma";
 | 
			
		||||
				compatible = "fsl,imx8mm-sdma", "fsl,imx8mq-sdma";
 | 
			
		||||
				reg = <0x302b0000 0x10000>;
 | 
			
		||||
				interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
 | 
			
		||||
				clocks = <&clk IMX8MM_CLK_SDMA3_ROOT>,
 | 
			
		||||
@ -737,7 +737,7 @@
 | 
			
		||||
			};
 | 
			
		||||
 | 
			
		||||
			sdma1: dma-controller@30bd0000 {
 | 
			
		||||
				compatible = "fsl,imx8mm-sdma", "fsl,imx7d-sdma";
 | 
			
		||||
				compatible = "fsl,imx8mm-sdma", "fsl,imx8mq-sdma";
 | 
			
		||||
				reg = <0x30bd0000 0x10000>;
 | 
			
		||||
				interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
 | 
			
		||||
				clocks = <&clk IMX8MM_CLK_SDMA1_ROOT>,
 | 
			
		||||
 | 
			
		||||
@ -288,7 +288,7 @@
 | 
			
		||||
			};
 | 
			
		||||
 | 
			
		||||
			sdma3: dma-controller@302b0000 {
 | 
			
		||||
				compatible = "fsl,imx8mn-sdma", "fsl,imx7d-sdma";
 | 
			
		||||
				compatible = "fsl,imx8mn-sdma", "fsl,imx8mq-sdma";
 | 
			
		||||
				reg = <0x302b0000 0x10000>;
 | 
			
		||||
				interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
 | 
			
		||||
				clocks = <&clk IMX8MN_CLK_SDMA3_ROOT>,
 | 
			
		||||
@ -299,7 +299,7 @@
 | 
			
		||||
			};
 | 
			
		||||
 | 
			
		||||
			sdma2: dma-controller@302c0000 {
 | 
			
		||||
				compatible = "fsl,imx8mn-sdma", "fsl,imx7d-sdma";
 | 
			
		||||
				compatible = "fsl,imx8mn-sdma", "fsl,imx8mq-sdma";
 | 
			
		||||
				reg = <0x302c0000 0x10000>;
 | 
			
		||||
				interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>;
 | 
			
		||||
				clocks = <&clk IMX8MN_CLK_SDMA2_ROOT>,
 | 
			
		||||
@ -612,7 +612,7 @@
 | 
			
		||||
			};
 | 
			
		||||
 | 
			
		||||
			sdma1: dma-controller@30bd0000 {
 | 
			
		||||
				compatible = "fsl,imx8mn-sdma", "fsl,imx7d-sdma";
 | 
			
		||||
				compatible = "fsl,imx8mn-sdma", "fsl,imx8mq-sdma";
 | 
			
		||||
				reg = <0x30bd0000 0x10000>;
 | 
			
		||||
				interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
 | 
			
		||||
				clocks = <&clk IMX8MN_CLK_SDMA1_ROOT>,
 | 
			
		||||
 | 
			
		||||
@ -88,7 +88,7 @@
 | 
			
		||||
		regulator-name = "0V9_ARM";
 | 
			
		||||
		regulator-min-microvolt = <900000>;
 | 
			
		||||
		regulator-max-microvolt = <1000000>;
 | 
			
		||||
		gpios = <&gpio3 19 GPIO_ACTIVE_HIGH>;
 | 
			
		||||
		gpios = <&gpio3 16 GPIO_ACTIVE_HIGH>;
 | 
			
		||||
		states = <1000000 0x1
 | 
			
		||||
		           900000 0x0>;
 | 
			
		||||
		regulator-always-on;
 | 
			
		||||
 | 
			
		||||
@ -283,23 +283,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 | 
			
		||||
	set_pte(ptep, pte);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define __HAVE_ARCH_PTE_SAME
 | 
			
		||||
static inline int pte_same(pte_t pte_a, pte_t pte_b)
 | 
			
		||||
{
 | 
			
		||||
	pteval_t lhs, rhs;
 | 
			
		||||
 | 
			
		||||
	lhs = pte_val(pte_a);
 | 
			
		||||
	rhs = pte_val(pte_b);
 | 
			
		||||
 | 
			
		||||
	if (pte_present(pte_a))
 | 
			
		||||
		lhs &= ~PTE_RDONLY;
 | 
			
		||||
 | 
			
		||||
	if (pte_present(pte_b))
 | 
			
		||||
		rhs &= ~PTE_RDONLY;
 | 
			
		||||
 | 
			
		||||
	return (lhs == rhs);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Huge pte definitions.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
@ -30,13 +30,6 @@ int __arm64_get_clock_mode(struct timekeeper *tk)
 | 
			
		||||
}
 | 
			
		||||
#define __arch_get_clock_mode __arm64_get_clock_mode
 | 
			
		||||
 | 
			
		||||
static __always_inline
 | 
			
		||||
int __arm64_use_vsyscall(struct vdso_data *vdata)
 | 
			
		||||
{
 | 
			
		||||
	return !vdata[CS_HRES_COARSE].clock_mode;
 | 
			
		||||
}
 | 
			
		||||
#define __arch_use_vsyscall __arm64_use_vsyscall
 | 
			
		||||
 | 
			
		||||
static __always_inline
 | 
			
		||||
void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk)
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
@ -28,13 +28,6 @@ int __mips_get_clock_mode(struct timekeeper *tk)
 | 
			
		||||
}
 | 
			
		||||
#define __arch_get_clock_mode __mips_get_clock_mode
 | 
			
		||||
 | 
			
		||||
static __always_inline
 | 
			
		||||
int __mips_use_vsyscall(struct vdso_data *vdata)
 | 
			
		||||
{
 | 
			
		||||
	return (vdata[CS_HRES_COARSE].clock_mode != VDSO_CLOCK_NONE);
 | 
			
		||||
}
 | 
			
		||||
#define __arch_use_vsyscall __mips_use_vsyscall
 | 
			
		||||
 | 
			
		||||
/* The asm-generic header needs to be included after the definitions above */
 | 
			
		||||
#include <asm-generic/vdso/vsyscall.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -38,10 +38,3 @@ config REPLICATE_KTEXT
 | 
			
		||||
	  Say Y here to enable replicating the kernel text across multiple
 | 
			
		||||
	  nodes in a NUMA cluster.  This trades memory for speed.
 | 
			
		||||
 | 
			
		||||
config REPLICATE_EXHANDLERS
 | 
			
		||||
	bool "Exception handler replication support"
 | 
			
		||||
	depends on SGI_IP27
 | 
			
		||||
	help
 | 
			
		||||
	  Say Y here to enable replicating the kernel exception handlers
 | 
			
		||||
	  across multiple nodes in a NUMA cluster. This trades memory for
 | 
			
		||||
	  speed.
 | 
			
		||||
 | 
			
		||||
@ -69,23 +69,14 @@ static void per_hub_init(cnodeid_t cnode)
 | 
			
		||||
 | 
			
		||||
	hub_rtc_init(cnode);
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_REPLICATE_EXHANDLERS
 | 
			
		||||
	/*
 | 
			
		||||
	 * If this is not a headless node initialization,
 | 
			
		||||
	 * copy over the caliased exception handlers.
 | 
			
		||||
	 */
 | 
			
		||||
	if (get_compact_nodeid() == cnode) {
 | 
			
		||||
		extern char except_vec2_generic, except_vec3_generic;
 | 
			
		||||
		extern void build_tlb_refill_handler(void);
 | 
			
		||||
 | 
			
		||||
		memcpy((void *)(CKSEG0 + 0x100), &except_vec2_generic, 0x80);
 | 
			
		||||
		memcpy((void *)(CKSEG0 + 0x180), &except_vec3_generic, 0x80);
 | 
			
		||||
		build_tlb_refill_handler();
 | 
			
		||||
		memcpy((void *)(CKSEG0 + 0x100), (void *) CKSEG0, 0x80);
 | 
			
		||||
		memcpy((void *)(CKSEG0 + 0x180), &except_vec3_generic, 0x100);
 | 
			
		||||
	if (nasid) {
 | 
			
		||||
		/* copy exception handlers from first node to current node */
 | 
			
		||||
		memcpy((void *)NODE_OFFSET_TO_K0(nasid, 0),
 | 
			
		||||
		       (void *)CKSEG0, 0x200);
 | 
			
		||||
		__flush_cache_all();
 | 
			
		||||
		/* switch to node local exception handlers */
 | 
			
		||||
		REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_8K);
 | 
			
		||||
	}
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void per_cpu_init(void)
 | 
			
		||||
 | 
			
		||||
@ -332,11 +332,7 @@ static void __init mlreset(void)
 | 
			
		||||
		 * thinks it is a node 0 address.
 | 
			
		||||
		 */
 | 
			
		||||
		REMOTE_HUB_S(nasid, PI_REGION_PRESENT, (region_mask | 1));
 | 
			
		||||
#ifdef CONFIG_REPLICATE_EXHANDLERS
 | 
			
		||||
		REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_8K);
 | 
			
		||||
#else
 | 
			
		||||
		REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_0);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef LATER
 | 
			
		||||
		/*
 | 
			
		||||
 | 
			
		||||
@ -1141,6 +1141,19 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 | 
			
		||||
		goto out_addrs;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * If we have seen a tail call, we need a second pass.
 | 
			
		||||
	 * This is because bpf_jit_emit_common_epilogue() is called
 | 
			
		||||
	 * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
 | 
			
		||||
	 */
 | 
			
		||||
	if (cgctx.seen & SEEN_TAILCALL) {
 | 
			
		||||
		cgctx.idx = 0;
 | 
			
		||||
		if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
 | 
			
		||||
			fp = org_fp;
 | 
			
		||||
			goto out_addrs;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Pretend to build prologue, given the features we've seen.  This will
 | 
			
		||||
	 * update ctgtx.idx as it pretends to output instructions, then we can
 | 
			
		||||
 | 
			
		||||
@ -65,14 +65,14 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(SPARC_REG_CFLAGS
 | 
			
		||||
#
 | 
			
		||||
# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
 | 
			
		||||
#
 | 
			
		||||
CFLAGS_REMOVE_vdso-note.o = -pg
 | 
			
		||||
CFLAGS_REMOVE_vclock_gettime.o = -pg
 | 
			
		||||
CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
 | 
			
		||||
 | 
			
		||||
$(obj)/%.so: OBJCOPYFLAGS := -S
 | 
			
		||||
$(obj)/%.so: $(obj)/%.so.dbg FORCE
 | 
			
		||||
	$(call if_changed,objcopy)
 | 
			
		||||
 | 
			
		||||
CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
 | 
			
		||||
CPPFLAGS_vdso32/vdso32.lds = $(CPPFLAGS_vdso.lds)
 | 
			
		||||
VDSO_LDFLAGS_vdso32.lds = -m elf32_sparc -soname linux-gate.so.1
 | 
			
		||||
 | 
			
		||||
#This makes sure the $(obj) subdirectory exists even though vdso32/
 | 
			
		||||
 | 
			
		||||
@ -1940,6 +1940,51 @@ config X86_INTEL_MEMORY_PROTECTION_KEYS
 | 
			
		||||
 | 
			
		||||
	  If unsure, say y.
 | 
			
		||||
 | 
			
		||||
choice
 | 
			
		||||
	prompt "TSX enable mode"
 | 
			
		||||
	depends on CPU_SUP_INTEL
 | 
			
		||||
	default X86_INTEL_TSX_MODE_OFF
 | 
			
		||||
	help
 | 
			
		||||
	  Intel's TSX (Transactional Synchronization Extensions) feature
 | 
			
		||||
	  allows to optimize locking protocols through lock elision which
 | 
			
		||||
	  can lead to a noticeable performance boost.
 | 
			
		||||
 | 
			
		||||
	  On the other hand it has been shown that TSX can be exploited
 | 
			
		||||
	  to form side channel attacks (e.g. TAA) and chances are there
 | 
			
		||||
	  will be more of those attacks discovered in the future.
 | 
			
		||||
 | 
			
		||||
	  Therefore TSX is not enabled by default (aka tsx=off). An admin
 | 
			
		||||
	  might override this decision by tsx=on the command line parameter.
 | 
			
		||||
	  Even with TSX enabled, the kernel will attempt to enable the best
 | 
			
		||||
	  possible TAA mitigation setting depending on the microcode available
 | 
			
		||||
	  for the particular machine.
 | 
			
		||||
 | 
			
		||||
	  This option allows to set the default tsx mode between tsx=on, =off
 | 
			
		||||
	  and =auto. See Documentation/admin-guide/kernel-parameters.txt for more
 | 
			
		||||
	  details.
 | 
			
		||||
 | 
			
		||||
	  Say off if not sure, auto if TSX is in use but it should be used on safe
 | 
			
		||||
	  platforms or on if TSX is in use and the security aspect of tsx is not
 | 
			
		||||
	  relevant.
 | 
			
		||||
 | 
			
		||||
config X86_INTEL_TSX_MODE_OFF
 | 
			
		||||
	bool "off"
 | 
			
		||||
	help
 | 
			
		||||
	  TSX is disabled if possible - equals to tsx=off command line parameter.
 | 
			
		||||
 | 
			
		||||
config X86_INTEL_TSX_MODE_ON
 | 
			
		||||
	bool "on"
 | 
			
		||||
	help
 | 
			
		||||
	  TSX is always enabled on TSX capable HW - equals the tsx=on command
 | 
			
		||||
	  line parameter.
 | 
			
		||||
 | 
			
		||||
config X86_INTEL_TSX_MODE_AUTO
 | 
			
		||||
	bool "auto"
 | 
			
		||||
	help
 | 
			
		||||
	  TSX is enabled on TSX capable HW that is believed to be safe against
 | 
			
		||||
	  side channel attacks- equals the tsx=auto command line parameter.
 | 
			
		||||
endchoice
 | 
			
		||||
 | 
			
		||||
config EFI
 | 
			
		||||
	bool "EFI runtime service support"
 | 
			
		||||
	depends on ACPI
 | 
			
		||||
 | 
			
		||||
@ -399,5 +399,7 @@
 | 
			
		||||
#define X86_BUG_MDS			X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
 | 
			
		||||
#define X86_BUG_MSBDS_ONLY		X86_BUG(20) /* CPU is only affected by the  MSDBS variant of BUG_MDS */
 | 
			
		||||
#define X86_BUG_SWAPGS			X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
 | 
			
		||||
#define X86_BUG_TAA			X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
 | 
			
		||||
#define X86_BUG_ITLB_MULTIHIT		X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
 | 
			
		||||
 | 
			
		||||
#endif /* _ASM_X86_CPUFEATURES_H */
 | 
			
		||||
 | 
			
		||||
@ -312,9 +312,12 @@ struct kvm_rmap_head {
 | 
			
		||||
struct kvm_mmu_page {
 | 
			
		||||
	struct list_head link;
 | 
			
		||||
	struct hlist_node hash_link;
 | 
			
		||||
	struct list_head lpage_disallowed_link;
 | 
			
		||||
 | 
			
		||||
	bool unsync;
 | 
			
		||||
	u8 mmu_valid_gen;
 | 
			
		||||
	bool mmio_cached;
 | 
			
		||||
	bool lpage_disallowed; /* Can't be replaced by an equiv large page */
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * The following two entries are used to key the shadow page in the
 | 
			
		||||
@ -859,6 +862,7 @@ struct kvm_arch {
 | 
			
		||||
	 */
 | 
			
		||||
	struct list_head active_mmu_pages;
 | 
			
		||||
	struct list_head zapped_obsolete_pages;
 | 
			
		||||
	struct list_head lpage_disallowed_mmu_pages;
 | 
			
		||||
	struct kvm_page_track_notifier_node mmu_sp_tracker;
 | 
			
		||||
	struct kvm_page_track_notifier_head track_notifier_head;
 | 
			
		||||
 | 
			
		||||
@ -933,6 +937,7 @@ struct kvm_arch {
 | 
			
		||||
	bool exception_payload_enabled;
 | 
			
		||||
 | 
			
		||||
	struct kvm_pmu_event_filter *pmu_event_filter;
 | 
			
		||||
	struct task_struct *nx_lpage_recovery_thread;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct kvm_vm_stat {
 | 
			
		||||
@ -946,6 +951,7 @@ struct kvm_vm_stat {
 | 
			
		||||
	ulong mmu_unsync;
 | 
			
		||||
	ulong remote_tlb_flush;
 | 
			
		||||
	ulong lpages;
 | 
			
		||||
	ulong nx_lpage_splits;
 | 
			
		||||
	ulong max_mmu_page_hash_collisions;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -93,6 +93,18 @@
 | 
			
		||||
						  * Microarchitectural Data
 | 
			
		||||
						  * Sampling (MDS) vulnerabilities.
 | 
			
		||||
						  */
 | 
			
		||||
#define ARCH_CAP_PSCHANGE_MC_NO		BIT(6)	 /*
 | 
			
		||||
						  * The processor is not susceptible to a
 | 
			
		||||
						  * machine check error due to modifying the
 | 
			
		||||
						  * code page size along with either the
 | 
			
		||||
						  * physical address or cache type
 | 
			
		||||
						  * without TLB invalidation.
 | 
			
		||||
						  */
 | 
			
		||||
#define ARCH_CAP_TSX_CTRL_MSR		BIT(7)	/* MSR for TSX control is available. */
 | 
			
		||||
#define ARCH_CAP_TAA_NO			BIT(8)	/*
 | 
			
		||||
						 * Not susceptible to
 | 
			
		||||
						 * TSX Async Abort (TAA) vulnerabilities.
 | 
			
		||||
						 */
 | 
			
		||||
 | 
			
		||||
#define MSR_IA32_FLUSH_CMD		0x0000010b
 | 
			
		||||
#define L1D_FLUSH			BIT(0)	/*
 | 
			
		||||
@ -103,6 +115,10 @@
 | 
			
		||||
#define MSR_IA32_BBL_CR_CTL		0x00000119
 | 
			
		||||
#define MSR_IA32_BBL_CR_CTL3		0x0000011e
 | 
			
		||||
 | 
			
		||||
#define MSR_IA32_TSX_CTRL		0x00000122
 | 
			
		||||
#define TSX_CTRL_RTM_DISABLE		BIT(0)	/* Disable RTM feature */
 | 
			
		||||
#define TSX_CTRL_CPUID_CLEAR		BIT(1)	/* Disable TSX enumeration */
 | 
			
		||||
 | 
			
		||||
#define MSR_IA32_SYSENTER_CS		0x00000174
 | 
			
		||||
#define MSR_IA32_SYSENTER_ESP		0x00000175
 | 
			
		||||
#define MSR_IA32_SYSENTER_EIP		0x00000176
 | 
			
		||||
 | 
			
		||||
@ -314,7 +314,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
 | 
			
		||||
#include <asm/segment.h>
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * mds_clear_cpu_buffers - Mitigation for MDS vulnerability
 | 
			
		||||
 * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
 | 
			
		||||
 *
 | 
			
		||||
 * This uses the otherwise unused and obsolete VERW instruction in
 | 
			
		||||
 * combination with microcode which triggers a CPU buffer flush when the
 | 
			
		||||
@ -337,7 +337,7 @@ static inline void mds_clear_cpu_buffers(void)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability
 | 
			
		||||
 * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
 | 
			
		||||
 *
 | 
			
		||||
 * Clear CPU buffers if the corresponding static key is enabled
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
@ -988,4 +988,11 @@ enum mds_mitigations {
 | 
			
		||||
	MDS_MITIGATION_VMWERV,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
enum taa_mitigations {
 | 
			
		||||
	TAA_MITIGATION_OFF,
 | 
			
		||||
	TAA_MITIGATION_UCODE_NEEDED,
 | 
			
		||||
	TAA_MITIGATION_VERW,
 | 
			
		||||
	TAA_MITIGATION_TSX_DISABLED,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif /* _ASM_X86_PROCESSOR_H */
 | 
			
		||||
 | 
			
		||||
@ -1586,9 +1586,6 @@ static void setup_local_APIC(void)
 | 
			
		||||
{
 | 
			
		||||
	int cpu = smp_processor_id();
 | 
			
		||||
	unsigned int value;
 | 
			
		||||
#ifdef CONFIG_X86_32
 | 
			
		||||
	int logical_apicid, ldr_apicid;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
	if (disable_apic) {
 | 
			
		||||
		disable_ioapic_support();
 | 
			
		||||
@ -1626,16 +1623,21 @@ static void setup_local_APIC(void)
 | 
			
		||||
	apic->init_apic_ldr();
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_X86_32
 | 
			
		||||
	/*
 | 
			
		||||
	 * APIC LDR is initialized.  If logical_apicid mapping was
 | 
			
		||||
	 * initialized during get_smp_config(), make sure it matches the
 | 
			
		||||
	 * actual value.
 | 
			
		||||
	 */
 | 
			
		||||
	logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
 | 
			
		||||
	ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
 | 
			
		||||
	WARN_ON(logical_apicid != BAD_APICID && logical_apicid != ldr_apicid);
 | 
			
		||||
	/* always use the value from LDR */
 | 
			
		||||
	early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid;
 | 
			
		||||
	if (apic->dest_logical) {
 | 
			
		||||
		int logical_apicid, ldr_apicid;
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * APIC LDR is initialized.  If logical_apicid mapping was
 | 
			
		||||
		 * initialized during get_smp_config(), make sure it matches
 | 
			
		||||
		 * the actual value.
 | 
			
		||||
		 */
 | 
			
		||||
		logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
 | 
			
		||||
		ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
 | 
			
		||||
		if (logical_apicid != BAD_APICID)
 | 
			
		||||
			WARN_ON(logical_apicid != ldr_apicid);
 | 
			
		||||
		/* Always use the value from LDR. */
 | 
			
		||||
		early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid;
 | 
			
		||||
	}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
 | 
			
		||||
@ -30,7 +30,7 @@ obj-$(CONFIG_PROC_FS)	+= proc.o
 | 
			
		||||
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
 | 
			
		||||
 | 
			
		||||
ifdef CONFIG_CPU_SUP_INTEL
 | 
			
		||||
obj-y			+= intel.o intel_pconfig.o
 | 
			
		||||
obj-y			+= intel.o intel_pconfig.o tsx.o
 | 
			
		||||
obj-$(CONFIG_PM)	+= intel_epb.o
 | 
			
		||||
endif
 | 
			
		||||
obj-$(CONFIG_CPU_SUP_AMD)		+= amd.o
 | 
			
		||||
 | 
			
		||||
@ -39,6 +39,7 @@ static void __init spectre_v2_select_mitigation(void);
 | 
			
		||||
static void __init ssb_select_mitigation(void);
 | 
			
		||||
static void __init l1tf_select_mitigation(void);
 | 
			
		||||
static void __init mds_select_mitigation(void);
 | 
			
		||||
static void __init taa_select_mitigation(void);
 | 
			
		||||
 | 
			
		||||
/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
 | 
			
		||||
u64 x86_spec_ctrl_base;
 | 
			
		||||
@ -105,6 +106,7 @@ void __init check_bugs(void)
 | 
			
		||||
	ssb_select_mitigation();
 | 
			
		||||
	l1tf_select_mitigation();
 | 
			
		||||
	mds_select_mitigation();
 | 
			
		||||
	taa_select_mitigation();
 | 
			
		||||
 | 
			
		||||
	arch_smt_update();
 | 
			
		||||
 | 
			
		||||
@ -268,6 +270,100 @@ static int __init mds_cmdline(char *str)
 | 
			
		||||
}
 | 
			
		||||
early_param("mds", mds_cmdline);
 | 
			
		||||
 | 
			
		||||
#undef pr_fmt
 | 
			
		||||
#define pr_fmt(fmt)	"TAA: " fmt
 | 
			
		||||
 | 
			
		||||
/* Default mitigation for TAA-affected CPUs */
 | 
			
		||||
static enum taa_mitigations taa_mitigation __ro_after_init = TAA_MITIGATION_VERW;
 | 
			
		||||
static bool taa_nosmt __ro_after_init;
 | 
			
		||||
 | 
			
		||||
static const char * const taa_strings[] = {
 | 
			
		||||
	[TAA_MITIGATION_OFF]		= "Vulnerable",
 | 
			
		||||
	[TAA_MITIGATION_UCODE_NEEDED]	= "Vulnerable: Clear CPU buffers attempted, no microcode",
 | 
			
		||||
	[TAA_MITIGATION_VERW]		= "Mitigation: Clear CPU buffers",
 | 
			
		||||
	[TAA_MITIGATION_TSX_DISABLED]	= "Mitigation: TSX disabled",
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static void __init taa_select_mitigation(void)
 | 
			
		||||
{
 | 
			
		||||
	u64 ia32_cap;
 | 
			
		||||
 | 
			
		||||
	if (!boot_cpu_has_bug(X86_BUG_TAA)) {
 | 
			
		||||
		taa_mitigation = TAA_MITIGATION_OFF;
 | 
			
		||||
		return;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* TSX previously disabled by tsx=off */
 | 
			
		||||
	if (!boot_cpu_has(X86_FEATURE_RTM)) {
 | 
			
		||||
		taa_mitigation = TAA_MITIGATION_TSX_DISABLED;
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (cpu_mitigations_off()) {
 | 
			
		||||
		taa_mitigation = TAA_MITIGATION_OFF;
 | 
			
		||||
		return;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* TAA mitigation is turned off on the cmdline (tsx_async_abort=off) */
 | 
			
		||||
	if (taa_mitigation == TAA_MITIGATION_OFF)
 | 
			
		||||
		goto out;
 | 
			
		||||
 | 
			
		||||
	if (boot_cpu_has(X86_FEATURE_MD_CLEAR))
 | 
			
		||||
		taa_mitigation = TAA_MITIGATION_VERW;
 | 
			
		||||
	else
 | 
			
		||||
		taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * VERW doesn't clear the CPU buffers when MD_CLEAR=1 and MDS_NO=1.
 | 
			
		||||
	 * A microcode update fixes this behavior to clear CPU buffers. It also
 | 
			
		||||
	 * adds support for MSR_IA32_TSX_CTRL which is enumerated by the
 | 
			
		||||
	 * ARCH_CAP_TSX_CTRL_MSR bit.
 | 
			
		||||
	 *
 | 
			
		||||
	 * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode
 | 
			
		||||
	 * update is required.
 | 
			
		||||
	 */
 | 
			
		||||
	ia32_cap = x86_read_arch_cap_msr();
 | 
			
		||||
	if ( (ia32_cap & ARCH_CAP_MDS_NO) &&
 | 
			
		||||
	    !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR))
 | 
			
		||||
		taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * TSX is enabled, select alternate mitigation for TAA which is
 | 
			
		||||
	 * the same as MDS. Enable MDS static branch to clear CPU buffers.
 | 
			
		||||
	 *
 | 
			
		||||
	 * For guests that can't determine whether the correct microcode is
 | 
			
		||||
	 * present on host, enable the mitigation for UCODE_NEEDED as well.
 | 
			
		||||
	 */
 | 
			
		||||
	static_branch_enable(&mds_user_clear);
 | 
			
		||||
 | 
			
		||||
	if (taa_nosmt || cpu_mitigations_auto_nosmt())
 | 
			
		||||
		cpu_smt_disable(false);
 | 
			
		||||
 | 
			
		||||
out:
 | 
			
		||||
	pr_info("%s\n", taa_strings[taa_mitigation]);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int __init tsx_async_abort_parse_cmdline(char *str)
 | 
			
		||||
{
 | 
			
		||||
	if (!boot_cpu_has_bug(X86_BUG_TAA))
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	if (!str)
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
 | 
			
		||||
	if (!strcmp(str, "off")) {
 | 
			
		||||
		taa_mitigation = TAA_MITIGATION_OFF;
 | 
			
		||||
	} else if (!strcmp(str, "full")) {
 | 
			
		||||
		taa_mitigation = TAA_MITIGATION_VERW;
 | 
			
		||||
	} else if (!strcmp(str, "full,nosmt")) {
 | 
			
		||||
		taa_mitigation = TAA_MITIGATION_VERW;
 | 
			
		||||
		taa_nosmt = true;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
early_param("tsx_async_abort", tsx_async_abort_parse_cmdline);
 | 
			
		||||
 | 
			
		||||
#undef pr_fmt
 | 
			
		||||
#define pr_fmt(fmt)     "Spectre V1 : " fmt
 | 
			
		||||
 | 
			
		||||
@ -786,13 +882,10 @@ static void update_mds_branch_idle(void)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
 | 
			
		||||
#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"
 | 
			
		||||
 | 
			
		||||
void cpu_bugs_smt_update(void)
 | 
			
		||||
{
 | 
			
		||||
	/* Enhanced IBRS implies STIBP. No update required. */
 | 
			
		||||
	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&spec_ctrl_mutex);
 | 
			
		||||
 | 
			
		||||
	switch (spectre_v2_user) {
 | 
			
		||||
@ -819,6 +912,17 @@ void cpu_bugs_smt_update(void)
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	switch (taa_mitigation) {
 | 
			
		||||
	case TAA_MITIGATION_VERW:
 | 
			
		||||
	case TAA_MITIGATION_UCODE_NEEDED:
 | 
			
		||||
		if (sched_smt_active())
 | 
			
		||||
			pr_warn_once(TAA_MSG_SMT);
 | 
			
		||||
		break;
 | 
			
		||||
	case TAA_MITIGATION_TSX_DISABLED:
 | 
			
		||||
	case TAA_MITIGATION_OFF:
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	mutex_unlock(&spec_ctrl_mutex);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -1149,6 +1253,9 @@ void x86_spec_ctrl_setup_ap(void)
 | 
			
		||||
		x86_amd_ssb_disable();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool itlb_multihit_kvm_mitigation;
 | 
			
		||||
EXPORT_SYMBOL_GPL(itlb_multihit_kvm_mitigation);
 | 
			
		||||
 | 
			
		||||
#undef pr_fmt
 | 
			
		||||
#define pr_fmt(fmt)	"L1TF: " fmt
 | 
			
		||||
 | 
			
		||||
@ -1304,11 +1411,24 @@ static ssize_t l1tf_show_state(char *buf)
 | 
			
		||||
		       l1tf_vmx_states[l1tf_vmx_mitigation],
 | 
			
		||||
		       sched_smt_active() ? "vulnerable" : "disabled");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static ssize_t itlb_multihit_show_state(char *buf)
 | 
			
		||||
{
 | 
			
		||||
	if (itlb_multihit_kvm_mitigation)
 | 
			
		||||
		return sprintf(buf, "KVM: Mitigation: Split huge pages\n");
 | 
			
		||||
	else
 | 
			
		||||
		return sprintf(buf, "KVM: Vulnerable\n");
 | 
			
		||||
}
 | 
			
		||||
#else
 | 
			
		||||
static ssize_t l1tf_show_state(char *buf)
 | 
			
		||||
{
 | 
			
		||||
	return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static ssize_t itlb_multihit_show_state(char *buf)
 | 
			
		||||
{
 | 
			
		||||
	return sprintf(buf, "Processor vulnerable\n");
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
static ssize_t mds_show_state(char *buf)
 | 
			
		||||
@ -1328,6 +1448,21 @@ static ssize_t mds_show_state(char *buf)
 | 
			
		||||
		       sched_smt_active() ? "vulnerable" : "disabled");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static ssize_t tsx_async_abort_show_state(char *buf)
 | 
			
		||||
{
 | 
			
		||||
	if ((taa_mitigation == TAA_MITIGATION_TSX_DISABLED) ||
 | 
			
		||||
	    (taa_mitigation == TAA_MITIGATION_OFF))
 | 
			
		||||
		return sprintf(buf, "%s\n", taa_strings[taa_mitigation]);
 | 
			
		||||
 | 
			
		||||
	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
 | 
			
		||||
		return sprintf(buf, "%s; SMT Host state unknown\n",
 | 
			
		||||
			       taa_strings[taa_mitigation]);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return sprintf(buf, "%s; SMT %s\n", taa_strings[taa_mitigation],
 | 
			
		||||
		       sched_smt_active() ? "vulnerable" : "disabled");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static char *stibp_state(void)
 | 
			
		||||
{
 | 
			
		||||
	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
 | 
			
		||||
@ -1398,6 +1533,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
 | 
			
		||||
	case X86_BUG_MDS:
 | 
			
		||||
		return mds_show_state(buf);
 | 
			
		||||
 | 
			
		||||
	case X86_BUG_TAA:
 | 
			
		||||
		return tsx_async_abort_show_state(buf);
 | 
			
		||||
 | 
			
		||||
	case X86_BUG_ITLB_MULTIHIT:
 | 
			
		||||
		return itlb_multihit_show_state(buf);
 | 
			
		||||
 | 
			
		||||
	default:
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
@ -1434,4 +1575,14 @@ ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *bu
 | 
			
		||||
{
 | 
			
		||||
	return cpu_show_common(dev, attr, buf, X86_BUG_MDS);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
ssize_t cpu_show_tsx_async_abort(struct device *dev, struct device_attribute *attr, char *buf)
 | 
			
		||||
{
 | 
			
		||||
	return cpu_show_common(dev, attr, buf, X86_BUG_TAA);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf)
 | 
			
		||||
{
 | 
			
		||||
	return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT);
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
@ -1016,13 +1016,14 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define NO_SPECULATION	BIT(0)
 | 
			
		||||
#define NO_MELTDOWN	BIT(1)
 | 
			
		||||
#define NO_SSB		BIT(2)
 | 
			
		||||
#define NO_L1TF		BIT(3)
 | 
			
		||||
#define NO_MDS		BIT(4)
 | 
			
		||||
#define MSBDS_ONLY	BIT(5)
 | 
			
		||||
#define NO_SWAPGS	BIT(6)
 | 
			
		||||
#define NO_SPECULATION		BIT(0)
 | 
			
		||||
#define NO_MELTDOWN		BIT(1)
 | 
			
		||||
#define NO_SSB			BIT(2)
 | 
			
		||||
#define NO_L1TF			BIT(3)
 | 
			
		||||
#define NO_MDS			BIT(4)
 | 
			
		||||
#define MSBDS_ONLY		BIT(5)
 | 
			
		||||
#define NO_SWAPGS		BIT(6)
 | 
			
		||||
#define NO_ITLB_MULTIHIT	BIT(7)
 | 
			
		||||
 | 
			
		||||
#define VULNWL(_vendor, _family, _model, _whitelist)	\
 | 
			
		||||
	{ X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
 | 
			
		||||
@ -1043,27 +1044,27 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
 | 
			
		||||
	VULNWL(NSC,	5, X86_MODEL_ANY,	NO_SPECULATION),
 | 
			
		||||
 | 
			
		||||
	/* Intel Family 6 */
 | 
			
		||||
	VULNWL_INTEL(ATOM_SALTWELL,		NO_SPECULATION),
 | 
			
		||||
	VULNWL_INTEL(ATOM_SALTWELL_TABLET,	NO_SPECULATION),
 | 
			
		||||
	VULNWL_INTEL(ATOM_SALTWELL_MID,		NO_SPECULATION),
 | 
			
		||||
	VULNWL_INTEL(ATOM_BONNELL,		NO_SPECULATION),
 | 
			
		||||
	VULNWL_INTEL(ATOM_BONNELL_MID,		NO_SPECULATION),
 | 
			
		||||
	VULNWL_INTEL(ATOM_SALTWELL,		NO_SPECULATION | NO_ITLB_MULTIHIT),
 | 
			
		||||
	VULNWL_INTEL(ATOM_SALTWELL_TABLET,	NO_SPECULATION | NO_ITLB_MULTIHIT),
 | 
			
		||||
	VULNWL_INTEL(ATOM_SALTWELL_MID,		NO_SPECULATION | NO_ITLB_MULTIHIT),
 | 
			
		||||
	VULNWL_INTEL(ATOM_BONNELL,		NO_SPECULATION | NO_ITLB_MULTIHIT),
 | 
			
		||||
	VULNWL_INTEL(ATOM_BONNELL_MID,		NO_SPECULATION | NO_ITLB_MULTIHIT),
 | 
			
		||||
 | 
			
		||||
	VULNWL_INTEL(ATOM_SILVERMONT,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
 | 
			
		||||
	VULNWL_INTEL(ATOM_SILVERMONT_D,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
 | 
			
		||||
	VULNWL_INTEL(ATOM_SILVERMONT_MID,	NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
 | 
			
		||||
	VULNWL_INTEL(ATOM_AIRMONT,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
 | 
			
		||||
	VULNWL_INTEL(XEON_PHI_KNL,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
 | 
			
		||||
	VULNWL_INTEL(XEON_PHI_KNM,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
 | 
			
		||||
	VULNWL_INTEL(ATOM_SILVERMONT,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
 | 
			
		||||
	VULNWL_INTEL(ATOM_SILVERMONT_D,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
 | 
			
		||||
	VULNWL_INTEL(ATOM_SILVERMONT_MID,	NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
 | 
			
		||||
	VULNWL_INTEL(ATOM_AIRMONT,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
 | 
			
		||||
	VULNWL_INTEL(XEON_PHI_KNL,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
 | 
			
		||||
	VULNWL_INTEL(XEON_PHI_KNM,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
 | 
			
		||||
 | 
			
		||||
	VULNWL_INTEL(CORE_YONAH,		NO_SSB),
 | 
			
		||||
 | 
			
		||||
	VULNWL_INTEL(ATOM_AIRMONT_MID,		NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
 | 
			
		||||
	VULNWL_INTEL(ATOM_AIRMONT_NP,		NO_L1TF | NO_SWAPGS),
 | 
			
		||||
	VULNWL_INTEL(ATOM_AIRMONT_MID,		NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
 | 
			
		||||
	VULNWL_INTEL(ATOM_AIRMONT_NP,		NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
 | 
			
		||||
 | 
			
		||||
	VULNWL_INTEL(ATOM_GOLDMONT,		NO_MDS | NO_L1TF | NO_SWAPGS),
 | 
			
		||||
	VULNWL_INTEL(ATOM_GOLDMONT_D,		NO_MDS | NO_L1TF | NO_SWAPGS),
 | 
			
		||||
	VULNWL_INTEL(ATOM_GOLDMONT_PLUS,	NO_MDS | NO_L1TF | NO_SWAPGS),
 | 
			
		||||
	VULNWL_INTEL(ATOM_GOLDMONT,		NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
 | 
			
		||||
	VULNWL_INTEL(ATOM_GOLDMONT_D,		NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
 | 
			
		||||
	VULNWL_INTEL(ATOM_GOLDMONT_PLUS,	NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Technically, swapgs isn't serializing on AMD (despite it previously
 | 
			
		||||
@ -1073,15 +1074,17 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
 | 
			
		||||
	 * good enough for our purposes.
 | 
			
		||||
	 */
 | 
			
		||||
 | 
			
		||||
	VULNWL_INTEL(ATOM_TREMONT_D,		NO_ITLB_MULTIHIT),
 | 
			
		||||
 | 
			
		||||
	/* AMD Family 0xf - 0x12 */
 | 
			
		||||
	VULNWL_AMD(0x0f,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
 | 
			
		||||
	VULNWL_AMD(0x10,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
 | 
			
		||||
	VULNWL_AMD(0x11,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
 | 
			
		||||
	VULNWL_AMD(0x12,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
 | 
			
		||||
	VULNWL_AMD(0x0f,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
 | 
			
		||||
	VULNWL_AMD(0x10,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
 | 
			
		||||
	VULNWL_AMD(0x11,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
 | 
			
		||||
	VULNWL_AMD(0x12,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
 | 
			
		||||
 | 
			
		||||
	/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
 | 
			
		||||
	VULNWL_AMD(X86_FAMILY_ANY,	NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
 | 
			
		||||
	VULNWL_HYGON(X86_FAMILY_ANY,	NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
 | 
			
		||||
	VULNWL_AMD(X86_FAMILY_ANY,	NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
 | 
			
		||||
	VULNWL_HYGON(X86_FAMILY_ANY,	NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
 | 
			
		||||
	{}
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@ -1092,19 +1095,30 @@ static bool __init cpu_matches(unsigned long which)
 | 
			
		||||
	return m && !!(m->driver_data & which);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 | 
			
		||||
u64 x86_read_arch_cap_msr(void)
 | 
			
		||||
{
 | 
			
		||||
	u64 ia32_cap = 0;
 | 
			
		||||
 | 
			
		||||
	if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
 | 
			
		||||
		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
 | 
			
		||||
 | 
			
		||||
	return ia32_cap;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 | 
			
		||||
{
 | 
			
		||||
	u64 ia32_cap = x86_read_arch_cap_msr();
 | 
			
		||||
 | 
			
		||||
	/* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
 | 
			
		||||
	if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
 | 
			
		||||
		setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);
 | 
			
		||||
 | 
			
		||||
	if (cpu_matches(NO_SPECULATION))
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
 | 
			
		||||
	setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
 | 
			
		||||
 | 
			
		||||
	if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
 | 
			
		||||
		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
 | 
			
		||||
 | 
			
		||||
	if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) &&
 | 
			
		||||
	   !cpu_has(c, X86_FEATURE_AMD_SSB_NO))
 | 
			
		||||
		setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
 | 
			
		||||
@ -1121,6 +1135,21 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 | 
			
		||||
	if (!cpu_matches(NO_SWAPGS))
 | 
			
		||||
		setup_force_cpu_bug(X86_BUG_SWAPGS);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when:
 | 
			
		||||
	 *	- TSX is supported or
 | 
			
		||||
	 *	- TSX_CTRL is present
 | 
			
		||||
	 *
 | 
			
		||||
	 * TSX_CTRL check is needed for cases when TSX could be disabled before
 | 
			
		||||
	 * the kernel boot e.g. kexec.
 | 
			
		||||
	 * TSX_CTRL check alone is not sufficient for cases when the microcode
 | 
			
		||||
	 * update is not present or running as guest that don't get TSX_CTRL.
 | 
			
		||||
	 */
 | 
			
		||||
	if (!(ia32_cap & ARCH_CAP_TAA_NO) &&
 | 
			
		||||
	    (cpu_has(c, X86_FEATURE_RTM) ||
 | 
			
		||||
	     (ia32_cap & ARCH_CAP_TSX_CTRL_MSR)))
 | 
			
		||||
		setup_force_cpu_bug(X86_BUG_TAA);
 | 
			
		||||
 | 
			
		||||
	if (cpu_matches(NO_MELTDOWN))
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
@ -1554,6 +1583,8 @@ void __init identify_boot_cpu(void)
 | 
			
		||||
#endif
 | 
			
		||||
	cpu_detect_tlb(&boot_cpu_data);
 | 
			
		||||
	setup_cr_pinning();
 | 
			
		||||
 | 
			
		||||
	tsx_init();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void identify_secondary_cpu(struct cpuinfo_x86 *c)
 | 
			
		||||
 | 
			
		||||
@ -44,6 +44,22 @@ struct _tlb_table {
 | 
			
		||||
extern const struct cpu_dev *const __x86_cpu_dev_start[],
 | 
			
		||||
			    *const __x86_cpu_dev_end[];
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_CPU_SUP_INTEL
 | 
			
		||||
enum tsx_ctrl_states {
 | 
			
		||||
	TSX_CTRL_ENABLE,
 | 
			
		||||
	TSX_CTRL_DISABLE,
 | 
			
		||||
	TSX_CTRL_NOT_SUPPORTED,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state;
 | 
			
		||||
 | 
			
		||||
extern void __init tsx_init(void);
 | 
			
		||||
extern void tsx_enable(void);
 | 
			
		||||
extern void tsx_disable(void);
 | 
			
		||||
#else
 | 
			
		||||
static inline void tsx_init(void) { }
 | 
			
		||||
#endif /* CONFIG_CPU_SUP_INTEL */
 | 
			
		||||
 | 
			
		||||
extern void get_cpu_cap(struct cpuinfo_x86 *c);
 | 
			
		||||
extern void get_cpu_address_sizes(struct cpuinfo_x86 *c);
 | 
			
		||||
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
 | 
			
		||||
@ -62,4 +78,6 @@ unsigned int aperfmperf_get_khz(int cpu);
 | 
			
		||||
 | 
			
		||||
extern void x86_spec_ctrl_setup_ap(void);
 | 
			
		||||
 | 
			
		||||
extern u64 x86_read_arch_cap_msr(void);
 | 
			
		||||
 | 
			
		||||
#endif /* ARCH_X86_CPU_H */
 | 
			
		||||
 | 
			
		||||
@ -762,6 +762,11 @@ static void init_intel(struct cpuinfo_x86 *c)
 | 
			
		||||
		detect_tme(c);
 | 
			
		||||
 | 
			
		||||
	init_intel_misc_features(c);
 | 
			
		||||
 | 
			
		||||
	if (tsx_ctrl_state == TSX_CTRL_ENABLE)
 | 
			
		||||
		tsx_enable();
 | 
			
		||||
	if (tsx_ctrl_state == TSX_CTRL_DISABLE)
 | 
			
		||||
		tsx_disable();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_X86_32
 | 
			
		||||
 | 
			
		||||
@ -522,6 +522,10 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
 | 
			
		||||
	int ret = 0;
 | 
			
		||||
 | 
			
		||||
	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 | 
			
		||||
	if (!rdtgrp) {
 | 
			
		||||
		ret = -ENOENT;
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	md.priv = of->kn->priv;
 | 
			
		||||
	resid = md.u.rid;
 | 
			
		||||
 | 
			
		||||
@ -461,10 +461,8 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 | 
			
		||||
	rdt_last_cmd_clear();
 | 
			
		||||
	if (!rdtgrp) {
 | 
			
		||||
		ret = -ENOENT;
 | 
			
		||||
		rdt_last_cmd_puts("Directory was removed\n");
 | 
			
		||||
		goto unlock;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
@ -2648,10 +2646,8 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
 | 
			
		||||
	rdt_last_cmd_clear();
 | 
			
		||||
	if (!prdtgrp) {
 | 
			
		||||
		ret = -ENODEV;
 | 
			
		||||
		rdt_last_cmd_puts("Directory was removed\n");
 | 
			
		||||
		goto out_unlock;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										140
									
								
								arch/x86/kernel/cpu/tsx.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										140
									
								
								arch/x86/kernel/cpu/tsx.c
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,140 @@
 | 
			
		||||
// SPDX-License-Identifier: GPL-2.0
 | 
			
		||||
/*
 | 
			
		||||
 * Intel Transactional Synchronization Extensions (TSX) control.
 | 
			
		||||
 *
 | 
			
		||||
 * Copyright (C) 2019 Intel Corporation
 | 
			
		||||
 *
 | 
			
		||||
 * Author:
 | 
			
		||||
 *	Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include <linux/cpufeature.h>
 | 
			
		||||
 | 
			
		||||
#include <asm/cmdline.h>
 | 
			
		||||
 | 
			
		||||
#include "cpu.h"
 | 
			
		||||
 | 
			
		||||
enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED;
 | 
			
		||||
 | 
			
		||||
void tsx_disable(void)
 | 
			
		||||
{
 | 
			
		||||
	u64 tsx;
 | 
			
		||||
 | 
			
		||||
	rdmsrl(MSR_IA32_TSX_CTRL, tsx);
 | 
			
		||||
 | 
			
		||||
	/* Force all transactions to immediately abort */
 | 
			
		||||
	tsx |= TSX_CTRL_RTM_DISABLE;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Ensure TSX support is not enumerated in CPUID.
 | 
			
		||||
	 * This is visible to userspace and will ensure they
 | 
			
		||||
	 * do not waste resources trying TSX transactions that
 | 
			
		||||
	 * will always abort.
 | 
			
		||||
	 */
 | 
			
		||||
	tsx |= TSX_CTRL_CPUID_CLEAR;
 | 
			
		||||
 | 
			
		||||
	wrmsrl(MSR_IA32_TSX_CTRL, tsx);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void tsx_enable(void)
 | 
			
		||||
{
 | 
			
		||||
	u64 tsx;
 | 
			
		||||
 | 
			
		||||
	rdmsrl(MSR_IA32_TSX_CTRL, tsx);
 | 
			
		||||
 | 
			
		||||
	/* Enable the RTM feature in the cpu */
 | 
			
		||||
	tsx &= ~TSX_CTRL_RTM_DISABLE;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Ensure TSX support is enumerated in CPUID.
 | 
			
		||||
	 * This is visible to userspace and will ensure they
 | 
			
		||||
	 * can enumerate and use the TSX feature.
 | 
			
		||||
	 */
 | 
			
		||||
	tsx &= ~TSX_CTRL_CPUID_CLEAR;
 | 
			
		||||
 | 
			
		||||
	wrmsrl(MSR_IA32_TSX_CTRL, tsx);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool __init tsx_ctrl_is_supported(void)
 | 
			
		||||
{
 | 
			
		||||
	u64 ia32_cap = x86_read_arch_cap_msr();
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * TSX is controlled via MSR_IA32_TSX_CTRL.  However, support for this
 | 
			
		||||
	 * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES.
 | 
			
		||||
	 *
 | 
			
		||||
	 * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a
 | 
			
		||||
	 * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES
 | 
			
		||||
	 * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get
 | 
			
		||||
	 * MSR_IA32_TSX_CTRL support even after a microcode update. Thus,
 | 
			
		||||
	 * tsx= cmdline requests will do nothing on CPUs without
 | 
			
		||||
	 * MSR_IA32_TSX_CTRL support.
 | 
			
		||||
	 */
 | 
			
		||||
	return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static enum tsx_ctrl_states x86_get_tsx_auto_mode(void)
 | 
			
		||||
{
 | 
			
		||||
	if (boot_cpu_has_bug(X86_BUG_TAA))
 | 
			
		||||
		return TSX_CTRL_DISABLE;
 | 
			
		||||
 | 
			
		||||
	return TSX_CTRL_ENABLE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void __init tsx_init(void)
 | 
			
		||||
{
 | 
			
		||||
	char arg[5] = {};
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	if (!tsx_ctrl_is_supported())
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	ret = cmdline_find_option(boot_command_line, "tsx", arg, sizeof(arg));
 | 
			
		||||
	if (ret >= 0) {
 | 
			
		||||
		if (!strcmp(arg, "on")) {
 | 
			
		||||
			tsx_ctrl_state = TSX_CTRL_ENABLE;
 | 
			
		||||
		} else if (!strcmp(arg, "off")) {
 | 
			
		||||
			tsx_ctrl_state = TSX_CTRL_DISABLE;
 | 
			
		||||
		} else if (!strcmp(arg, "auto")) {
 | 
			
		||||
			tsx_ctrl_state = x86_get_tsx_auto_mode();
 | 
			
		||||
		} else {
 | 
			
		||||
			tsx_ctrl_state = TSX_CTRL_DISABLE;
 | 
			
		||||
			pr_err("tsx: invalid option, defaulting to off\n");
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
		/* tsx= not provided */
 | 
			
		||||
		if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_AUTO))
 | 
			
		||||
			tsx_ctrl_state = x86_get_tsx_auto_mode();
 | 
			
		||||
		else if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_OFF))
 | 
			
		||||
			tsx_ctrl_state = TSX_CTRL_DISABLE;
 | 
			
		||||
		else
 | 
			
		||||
			tsx_ctrl_state = TSX_CTRL_ENABLE;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (tsx_ctrl_state == TSX_CTRL_DISABLE) {
 | 
			
		||||
		tsx_disable();
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * tsx_disable() will change the state of the
 | 
			
		||||
		 * RTM CPUID bit.  Clear it here since it is now
 | 
			
		||||
		 * expected to be not set.
 | 
			
		||||
		 */
 | 
			
		||||
		setup_clear_cpu_cap(X86_FEATURE_RTM);
 | 
			
		||||
	} else if (tsx_ctrl_state == TSX_CTRL_ENABLE) {
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * HW defaults TSX to be enabled at bootup.
 | 
			
		||||
		 * We may still need the TSX enable support
 | 
			
		||||
		 * during init for special cases like
 | 
			
		||||
		 * kexec after TSX is disabled.
 | 
			
		||||
		 */
 | 
			
		||||
		tsx_enable();
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * tsx_enable() will change the state of the
 | 
			
		||||
		 * RTM CPUID bit.  Force it here since it is now
 | 
			
		||||
		 * expected to be set.
 | 
			
		||||
		 */
 | 
			
		||||
		setup_force_cpu_cap(X86_FEATURE_RTM);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
@ -94,6 +94,13 @@ static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
 | 
			
		||||
	BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
 | 
			
		||||
 | 
			
		||||
	begin = (unsigned long)__this_cpu_read(cea_exception_stacks);
 | 
			
		||||
	/*
 | 
			
		||||
	 * Handle the case where stack trace is collected _before_
 | 
			
		||||
	 * cea_exception_stacks had been initialized.
 | 
			
		||||
	 */
 | 
			
		||||
	if (!begin)
 | 
			
		||||
		return false;
 | 
			
		||||
 | 
			
		||||
	end = begin + sizeof(struct cea_exception_stacks);
 | 
			
		||||
	/* Bail if @stack is outside the exception stack area. */
 | 
			
		||||
	if (stk < begin || stk >= end)
 | 
			
		||||
 | 
			
		||||
@ -710,6 +710,8 @@ static struct chipset early_qrk[] __initdata = {
 | 
			
		||||
	 */
 | 
			
		||||
	{ PCI_VENDOR_ID_INTEL, 0x0f00,
 | 
			
		||||
		PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
 | 
			
		||||
	{ PCI_VENDOR_ID_INTEL, 0x3ec4,
 | 
			
		||||
		PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
 | 
			
		||||
	{ PCI_VENDOR_ID_BROADCOM, 0x4331,
 | 
			
		||||
	  PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset},
 | 
			
		||||
	{}
 | 
			
		||||
 | 
			
		||||
@ -1505,6 +1505,9 @@ void __init tsc_init(void)
 | 
			
		||||
		return;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (tsc_clocksource_reliable || no_tsc_watchdog)
 | 
			
		||||
		clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
 | 
			
		||||
 | 
			
		||||
	clocksource_register_khz(&clocksource_tsc_early, tsc_khz);
 | 
			
		||||
	detect_art();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -37,6 +37,7 @@
 | 
			
		||||
#include <linux/uaccess.h>
 | 
			
		||||
#include <linux/hash.h>
 | 
			
		||||
#include <linux/kern_levels.h>
 | 
			
		||||
#include <linux/kthread.h>
 | 
			
		||||
 | 
			
		||||
#include <asm/page.h>
 | 
			
		||||
#include <asm/pat.h>
 | 
			
		||||
@ -47,6 +48,35 @@
 | 
			
		||||
#include <asm/kvm_page_track.h>
 | 
			
		||||
#include "trace.h"
 | 
			
		||||
 | 
			
		||||
extern bool itlb_multihit_kvm_mitigation;
 | 
			
		||||
 | 
			
		||||
static int __read_mostly nx_huge_pages = -1;
 | 
			
		||||
#ifdef CONFIG_PREEMPT_RT
 | 
			
		||||
/* Recovery can cause latency spikes, disable it for PREEMPT_RT.  */
 | 
			
		||||
static uint __read_mostly nx_huge_pages_recovery_ratio = 0;
 | 
			
		||||
#else
 | 
			
		||||
static uint __read_mostly nx_huge_pages_recovery_ratio = 60;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
 | 
			
		||||
static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp);
 | 
			
		||||
 | 
			
		||||
static struct kernel_param_ops nx_huge_pages_ops = {
 | 
			
		||||
	.set = set_nx_huge_pages,
 | 
			
		||||
	.get = param_get_bool,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static struct kernel_param_ops nx_huge_pages_recovery_ratio_ops = {
 | 
			
		||||
	.set = set_nx_huge_pages_recovery_ratio,
 | 
			
		||||
	.get = param_get_uint,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644);
 | 
			
		||||
__MODULE_PARM_TYPE(nx_huge_pages, "bool");
 | 
			
		||||
module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops,
 | 
			
		||||
		&nx_huge_pages_recovery_ratio, 0644);
 | 
			
		||||
__MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint");
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * When setting this variable to true it enables Two-Dimensional-Paging
 | 
			
		||||
 * where the hardware walks 2 page tables:
 | 
			
		||||
@ -352,6 +382,11 @@ static inline bool spte_ad_need_write_protect(u64 spte)
 | 
			
		||||
	return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_ENABLED_MASK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool is_nx_huge_page_enabled(void)
 | 
			
		||||
{
 | 
			
		||||
	return READ_ONCE(nx_huge_pages);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline u64 spte_shadow_accessed_mask(u64 spte)
 | 
			
		||||
{
 | 
			
		||||
	MMU_WARN_ON(is_mmio_spte(spte));
 | 
			
		||||
@ -1190,6 +1225,17 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 | 
			
		||||
	kvm_mmu_gfn_disallow_lpage(slot, gfn);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 | 
			
		||||
{
 | 
			
		||||
	if (sp->lpage_disallowed)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	++kvm->stat.nx_lpage_splits;
 | 
			
		||||
	list_add_tail(&sp->lpage_disallowed_link,
 | 
			
		||||
		      &kvm->arch.lpage_disallowed_mmu_pages);
 | 
			
		||||
	sp->lpage_disallowed = true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 | 
			
		||||
{
 | 
			
		||||
	struct kvm_memslots *slots;
 | 
			
		||||
@ -1207,6 +1253,13 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 | 
			
		||||
	kvm_mmu_gfn_allow_lpage(slot, gfn);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 | 
			
		||||
{
 | 
			
		||||
	--kvm->stat.nx_lpage_splits;
 | 
			
		||||
	sp->lpage_disallowed = false;
 | 
			
		||||
	list_del(&sp->lpage_disallowed_link);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
 | 
			
		||||
					  struct kvm_memory_slot *slot)
 | 
			
		||||
{
 | 
			
		||||
@ -2792,6 +2845,9 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
 | 
			
		||||
			kvm_reload_remote_mmus(kvm);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (sp->lpage_disallowed)
 | 
			
		||||
		unaccount_huge_nx_page(kvm, sp);
 | 
			
		||||
 | 
			
		||||
	sp->role.invalid = 1;
 | 
			
		||||
	return list_unstable;
 | 
			
		||||
}
 | 
			
		||||
@ -3013,6 +3069,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 | 
			
		||||
	if (!speculative)
 | 
			
		||||
		spte |= spte_shadow_accessed_mask(spte);
 | 
			
		||||
 | 
			
		||||
	if (level > PT_PAGE_TABLE_LEVEL && (pte_access & ACC_EXEC_MASK) &&
 | 
			
		||||
	    is_nx_huge_page_enabled()) {
 | 
			
		||||
		pte_access &= ~ACC_EXEC_MASK;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (pte_access & ACC_EXEC_MASK)
 | 
			
		||||
		spte |= shadow_x_mask;
 | 
			
		||||
	else
 | 
			
		||||
@ -3233,9 +3294,32 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
 | 
			
		||||
	__direct_pte_prefetch(vcpu, sp, sptep);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it,
 | 
			
		||||
				       gfn_t gfn, kvm_pfn_t *pfnp, int *levelp)
 | 
			
		||||
{
 | 
			
		||||
	int level = *levelp;
 | 
			
		||||
	u64 spte = *it.sptep;
 | 
			
		||||
 | 
			
		||||
	if (it.level == level && level > PT_PAGE_TABLE_LEVEL &&
 | 
			
		||||
	    is_nx_huge_page_enabled() &&
 | 
			
		||||
	    is_shadow_present_pte(spte) &&
 | 
			
		||||
	    !is_large_pte(spte)) {
 | 
			
		||||
		/*
 | 
			
		||||
		 * A small SPTE exists for this pfn, but FNAME(fetch)
 | 
			
		||||
		 * and __direct_map would like to create a large PTE
 | 
			
		||||
		 * instead: just force them to go down another level,
 | 
			
		||||
		 * patching back for them into pfn the next 9 bits of
 | 
			
		||||
		 * the address.
 | 
			
		||||
		 */
 | 
			
		||||
		u64 page_mask = KVM_PAGES_PER_HPAGE(level) - KVM_PAGES_PER_HPAGE(level - 1);
 | 
			
		||||
		*pfnp |= gfn & page_mask;
 | 
			
		||||
		(*levelp)--;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
 | 
			
		||||
			int map_writable, int level, kvm_pfn_t pfn,
 | 
			
		||||
			bool prefault)
 | 
			
		||||
			bool prefault, bool lpage_disallowed)
 | 
			
		||||
{
 | 
			
		||||
	struct kvm_shadow_walk_iterator it;
 | 
			
		||||
	struct kvm_mmu_page *sp;
 | 
			
		||||
@ -3248,6 +3332,12 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
 | 
			
		||||
 | 
			
		||||
	trace_kvm_mmu_spte_requested(gpa, level, pfn);
 | 
			
		||||
	for_each_shadow_entry(vcpu, gpa, it) {
 | 
			
		||||
		/*
 | 
			
		||||
		 * We cannot overwrite existing page tables with an NX
 | 
			
		||||
		 * large page, as the leaf could be executable.
 | 
			
		||||
		 */
 | 
			
		||||
		disallowed_hugepage_adjust(it, gfn, &pfn, &level);
 | 
			
		||||
 | 
			
		||||
		base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
 | 
			
		||||
		if (it.level == level)
 | 
			
		||||
			break;
 | 
			
		||||
@ -3258,6 +3348,8 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
 | 
			
		||||
					      it.level - 1, true, ACC_ALL);
 | 
			
		||||
 | 
			
		||||
			link_shadow_page(vcpu, it.sptep, sp);
 | 
			
		||||
			if (lpage_disallowed)
 | 
			
		||||
				account_huge_nx_page(vcpu->kvm, sp);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
@ -3306,7 +3398,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
 | 
			
		||||
	 * here.
 | 
			
		||||
	 */
 | 
			
		||||
	if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) &&
 | 
			
		||||
	    level == PT_PAGE_TABLE_LEVEL &&
 | 
			
		||||
	    !kvm_is_zone_device_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL &&
 | 
			
		||||
	    PageTransCompoundMap(pfn_to_page(pfn)) &&
 | 
			
		||||
	    !mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) {
 | 
			
		||||
		unsigned long mask;
 | 
			
		||||
@ -3550,11 +3642,14 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
 | 
			
		||||
{
 | 
			
		||||
	int r;
 | 
			
		||||
	int level;
 | 
			
		||||
	bool force_pt_level = false;
 | 
			
		||||
	bool force_pt_level;
 | 
			
		||||
	kvm_pfn_t pfn;
 | 
			
		||||
	unsigned long mmu_seq;
 | 
			
		||||
	bool map_writable, write = error_code & PFERR_WRITE_MASK;
 | 
			
		||||
	bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
 | 
			
		||||
				is_nx_huge_page_enabled();
 | 
			
		||||
 | 
			
		||||
	force_pt_level = lpage_disallowed;
 | 
			
		||||
	level = mapping_level(vcpu, gfn, &force_pt_level);
 | 
			
		||||
	if (likely(!force_pt_level)) {
 | 
			
		||||
		/*
 | 
			
		||||
@ -3588,7 +3683,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
 | 
			
		||||
		goto out_unlock;
 | 
			
		||||
	if (likely(!force_pt_level))
 | 
			
		||||
		transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
 | 
			
		||||
	r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault);
 | 
			
		||||
	r = __direct_map(vcpu, v, write, map_writable, level, pfn,
 | 
			
		||||
			 prefault, false);
 | 
			
		||||
out_unlock:
 | 
			
		||||
	spin_unlock(&vcpu->kvm->mmu_lock);
 | 
			
		||||
	kvm_release_pfn_clean(pfn);
 | 
			
		||||
@ -4174,6 +4270,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
 | 
			
		||||
	unsigned long mmu_seq;
 | 
			
		||||
	int write = error_code & PFERR_WRITE_MASK;
 | 
			
		||||
	bool map_writable;
 | 
			
		||||
	bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
 | 
			
		||||
				is_nx_huge_page_enabled();
 | 
			
		||||
 | 
			
		||||
	MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));
 | 
			
		||||
 | 
			
		||||
@ -4184,8 +4282,9 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
 | 
			
		||||
	if (r)
 | 
			
		||||
		return r;
 | 
			
		||||
 | 
			
		||||
	force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn,
 | 
			
		||||
							   PT_DIRECTORY_LEVEL);
 | 
			
		||||
	force_pt_level =
 | 
			
		||||
		lpage_disallowed ||
 | 
			
		||||
		!check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL);
 | 
			
		||||
	level = mapping_level(vcpu, gfn, &force_pt_level);
 | 
			
		||||
	if (likely(!force_pt_level)) {
 | 
			
		||||
		if (level > PT_DIRECTORY_LEVEL &&
 | 
			
		||||
@ -4214,7 +4313,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
 | 
			
		||||
		goto out_unlock;
 | 
			
		||||
	if (likely(!force_pt_level))
 | 
			
		||||
		transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
 | 
			
		||||
	r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault);
 | 
			
		||||
	r = __direct_map(vcpu, gpa, write, map_writable, level, pfn,
 | 
			
		||||
			 prefault, lpage_disallowed);
 | 
			
		||||
out_unlock:
 | 
			
		||||
	spin_unlock(&vcpu->kvm->mmu_lock);
 | 
			
		||||
	kvm_release_pfn_clean(pfn);
 | 
			
		||||
@ -5914,9 +6014,9 @@ restart:
 | 
			
		||||
		 * the guest, and the guest page table is using 4K page size
 | 
			
		||||
		 * mapping if the indirect sp has level = 1.
 | 
			
		||||
		 */
 | 
			
		||||
		if (sp->role.direct &&
 | 
			
		||||
			!kvm_is_reserved_pfn(pfn) &&
 | 
			
		||||
			PageTransCompoundMap(pfn_to_page(pfn))) {
 | 
			
		||||
		if (sp->role.direct && !kvm_is_reserved_pfn(pfn) &&
 | 
			
		||||
		    !kvm_is_zone_device_pfn(pfn) &&
 | 
			
		||||
		    PageTransCompoundMap(pfn_to_page(pfn))) {
 | 
			
		||||
			pte_list_remove(rmap_head, sptep);
 | 
			
		||||
 | 
			
		||||
			if (kvm_available_flush_tlb_with_range())
 | 
			
		||||
@ -6155,10 +6255,59 @@ static void kvm_set_mmio_spte_mask(void)
 | 
			
		||||
	kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool get_nx_auto_mode(void)
 | 
			
		||||
{
 | 
			
		||||
	/* Return true when CPU has the bug, and mitigations are ON */
 | 
			
		||||
	return boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT) && !cpu_mitigations_off();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void __set_nx_huge_pages(bool val)
 | 
			
		||||
{
 | 
			
		||||
	nx_huge_pages = itlb_multihit_kvm_mitigation = val;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
 | 
			
		||||
{
 | 
			
		||||
	bool old_val = nx_huge_pages;
 | 
			
		||||
	bool new_val;
 | 
			
		||||
 | 
			
		||||
	/* In "auto" mode deploy workaround only if CPU has the bug. */
 | 
			
		||||
	if (sysfs_streq(val, "off"))
 | 
			
		||||
		new_val = 0;
 | 
			
		||||
	else if (sysfs_streq(val, "force"))
 | 
			
		||||
		new_val = 1;
 | 
			
		||||
	else if (sysfs_streq(val, "auto"))
 | 
			
		||||
		new_val = get_nx_auto_mode();
 | 
			
		||||
	else if (strtobool(val, &new_val) < 0)
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
 | 
			
		||||
	__set_nx_huge_pages(new_val);
 | 
			
		||||
 | 
			
		||||
	if (new_val != old_val) {
 | 
			
		||||
		struct kvm *kvm;
 | 
			
		||||
 | 
			
		||||
		mutex_lock(&kvm_lock);
 | 
			
		||||
 | 
			
		||||
		list_for_each_entry(kvm, &vm_list, vm_list) {
 | 
			
		||||
			mutex_lock(&kvm->slots_lock);
 | 
			
		||||
			kvm_mmu_zap_all_fast(kvm);
 | 
			
		||||
			mutex_unlock(&kvm->slots_lock);
 | 
			
		||||
 | 
			
		||||
			wake_up_process(kvm->arch.nx_lpage_recovery_thread);
 | 
			
		||||
		}
 | 
			
		||||
		mutex_unlock(&kvm_lock);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int kvm_mmu_module_init(void)
 | 
			
		||||
{
 | 
			
		||||
	int ret = -ENOMEM;
 | 
			
		||||
 | 
			
		||||
	if (nx_huge_pages == -1)
 | 
			
		||||
		__set_nx_huge_pages(get_nx_auto_mode());
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * MMU roles use union aliasing which is, generally speaking, an
 | 
			
		||||
	 * undefined behavior. However, we supposedly know how compilers behave
 | 
			
		||||
@ -6238,3 +6387,116 @@ void kvm_mmu_module_exit(void)
 | 
			
		||||
	unregister_shrinker(&mmu_shrinker);
 | 
			
		||||
	mmu_audit_disable();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp)
 | 
			
		||||
{
 | 
			
		||||
	unsigned int old_val;
 | 
			
		||||
	int err;
 | 
			
		||||
 | 
			
		||||
	old_val = nx_huge_pages_recovery_ratio;
 | 
			
		||||
	err = param_set_uint(val, kp);
 | 
			
		||||
	if (err)
 | 
			
		||||
		return err;
 | 
			
		||||
 | 
			
		||||
	if (READ_ONCE(nx_huge_pages) &&
 | 
			
		||||
	    !old_val && nx_huge_pages_recovery_ratio) {
 | 
			
		||||
		struct kvm *kvm;
 | 
			
		||||
 | 
			
		||||
		mutex_lock(&kvm_lock);
 | 
			
		||||
 | 
			
		||||
		list_for_each_entry(kvm, &vm_list, vm_list)
 | 
			
		||||
			wake_up_process(kvm->arch.nx_lpage_recovery_thread);
 | 
			
		||||
 | 
			
		||||
		mutex_unlock(&kvm_lock);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return err;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void kvm_recover_nx_lpages(struct kvm *kvm)
 | 
			
		||||
{
 | 
			
		||||
	int rcu_idx;
 | 
			
		||||
	struct kvm_mmu_page *sp;
 | 
			
		||||
	unsigned int ratio;
 | 
			
		||||
	LIST_HEAD(invalid_list);
 | 
			
		||||
	ulong to_zap;
 | 
			
		||||
 | 
			
		||||
	rcu_idx = srcu_read_lock(&kvm->srcu);
 | 
			
		||||
	spin_lock(&kvm->mmu_lock);
 | 
			
		||||
 | 
			
		||||
	ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
 | 
			
		||||
	to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0;
 | 
			
		||||
	while (to_zap && !list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) {
 | 
			
		||||
		/*
 | 
			
		||||
		 * We use a separate list instead of just using active_mmu_pages
 | 
			
		||||
		 * because the number of lpage_disallowed pages is expected to
 | 
			
		||||
		 * be relatively small compared to the total.
 | 
			
		||||
		 */
 | 
			
		||||
		sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages,
 | 
			
		||||
				      struct kvm_mmu_page,
 | 
			
		||||
				      lpage_disallowed_link);
 | 
			
		||||
		WARN_ON_ONCE(!sp->lpage_disallowed);
 | 
			
		||||
		kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
 | 
			
		||||
		WARN_ON_ONCE(sp->lpage_disallowed);
 | 
			
		||||
 | 
			
		||||
		if (!--to_zap || need_resched() || spin_needbreak(&kvm->mmu_lock)) {
 | 
			
		||||
			kvm_mmu_commit_zap_page(kvm, &invalid_list);
 | 
			
		||||
			if (to_zap)
 | 
			
		||||
				cond_resched_lock(&kvm->mmu_lock);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	spin_unlock(&kvm->mmu_lock);
 | 
			
		||||
	srcu_read_unlock(&kvm->srcu, rcu_idx);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static long get_nx_lpage_recovery_timeout(u64 start_time)
 | 
			
		||||
{
 | 
			
		||||
	return READ_ONCE(nx_huge_pages) && READ_ONCE(nx_huge_pages_recovery_ratio)
 | 
			
		||||
		? start_time + 60 * HZ - get_jiffies_64()
 | 
			
		||||
		: MAX_SCHEDULE_TIMEOUT;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data)
 | 
			
		||||
{
 | 
			
		||||
	u64 start_time;
 | 
			
		||||
	long remaining_time;
 | 
			
		||||
 | 
			
		||||
	while (true) {
 | 
			
		||||
		start_time = get_jiffies_64();
 | 
			
		||||
		remaining_time = get_nx_lpage_recovery_timeout(start_time);
 | 
			
		||||
 | 
			
		||||
		set_current_state(TASK_INTERRUPTIBLE);
 | 
			
		||||
		while (!kthread_should_stop() && remaining_time > 0) {
 | 
			
		||||
			schedule_timeout(remaining_time);
 | 
			
		||||
			remaining_time = get_nx_lpage_recovery_timeout(start_time);
 | 
			
		||||
			set_current_state(TASK_INTERRUPTIBLE);
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		set_current_state(TASK_RUNNING);
 | 
			
		||||
 | 
			
		||||
		if (kthread_should_stop())
 | 
			
		||||
			return 0;
 | 
			
		||||
 | 
			
		||||
		kvm_recover_nx_lpages(kvm);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int kvm_mmu_post_init_vm(struct kvm *kvm)
 | 
			
		||||
{
 | 
			
		||||
	int err;
 | 
			
		||||
 | 
			
		||||
	err = kvm_vm_create_worker_thread(kvm, kvm_nx_lpage_recovery_worker, 0,
 | 
			
		||||
					  "kvm-nx-lpage-recovery",
 | 
			
		||||
					  &kvm->arch.nx_lpage_recovery_thread);
 | 
			
		||||
	if (!err)
 | 
			
		||||
		kthread_unpark(kvm->arch.nx_lpage_recovery_thread);
 | 
			
		||||
 | 
			
		||||
	return err;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
 | 
			
		||||
{
 | 
			
		||||
	if (kvm->arch.nx_lpage_recovery_thread)
 | 
			
		||||
		kthread_stop(kvm->arch.nx_lpage_recovery_thread);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -210,4 +210,8 @@ void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
 | 
			
		||||
bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
 | 
			
		||||
				    struct kvm_memory_slot *slot, u64 gfn);
 | 
			
		||||
int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
 | 
			
		||||
 | 
			
		||||
int kvm_mmu_post_init_vm(struct kvm *kvm);
 | 
			
		||||
void kvm_mmu_pre_destroy_vm(struct kvm *kvm);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
@ -614,13 +614,14 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
 | 
			
		||||
static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 | 
			
		||||
			 struct guest_walker *gw,
 | 
			
		||||
			 int write_fault, int hlevel,
 | 
			
		||||
			 kvm_pfn_t pfn, bool map_writable, bool prefault)
 | 
			
		||||
			 kvm_pfn_t pfn, bool map_writable, bool prefault,
 | 
			
		||||
			 bool lpage_disallowed)
 | 
			
		||||
{
 | 
			
		||||
	struct kvm_mmu_page *sp = NULL;
 | 
			
		||||
	struct kvm_shadow_walk_iterator it;
 | 
			
		||||
	unsigned direct_access, access = gw->pt_access;
 | 
			
		||||
	int top_level, ret;
 | 
			
		||||
	gfn_t base_gfn;
 | 
			
		||||
	gfn_t gfn, base_gfn;
 | 
			
		||||
 | 
			
		||||
	direct_access = gw->pte_access;
 | 
			
		||||
 | 
			
		||||
@ -665,13 +666,25 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 | 
			
		||||
			link_shadow_page(vcpu, it.sptep, sp);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	base_gfn = gw->gfn;
 | 
			
		||||
	/*
 | 
			
		||||
	 * FNAME(page_fault) might have clobbered the bottom bits of
 | 
			
		||||
	 * gw->gfn, restore them from the virtual address.
 | 
			
		||||
	 */
 | 
			
		||||
	gfn = gw->gfn | ((addr & PT_LVL_OFFSET_MASK(gw->level)) >> PAGE_SHIFT);
 | 
			
		||||
	base_gfn = gfn;
 | 
			
		||||
 | 
			
		||||
	trace_kvm_mmu_spte_requested(addr, gw->level, pfn);
 | 
			
		||||
 | 
			
		||||
	for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
 | 
			
		||||
		clear_sp_write_flooding_count(it.sptep);
 | 
			
		||||
		base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * We cannot overwrite existing page tables with an NX
 | 
			
		||||
		 * large page, as the leaf could be executable.
 | 
			
		||||
		 */
 | 
			
		||||
		disallowed_hugepage_adjust(it, gfn, &pfn, &hlevel);
 | 
			
		||||
 | 
			
		||||
		base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
 | 
			
		||||
		if (it.level == hlevel)
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
@ -683,6 +696,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 | 
			
		||||
			sp = kvm_mmu_get_page(vcpu, base_gfn, addr,
 | 
			
		||||
					      it.level - 1, true, direct_access);
 | 
			
		||||
			link_shadow_page(vcpu, it.sptep, sp);
 | 
			
		||||
			if (lpage_disallowed)
 | 
			
		||||
				account_huge_nx_page(vcpu->kvm, sp);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
@ -759,9 +774,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
 | 
			
		||||
	int r;
 | 
			
		||||
	kvm_pfn_t pfn;
 | 
			
		||||
	int level = PT_PAGE_TABLE_LEVEL;
 | 
			
		||||
	bool force_pt_level = false;
 | 
			
		||||
	unsigned long mmu_seq;
 | 
			
		||||
	bool map_writable, is_self_change_mapping;
 | 
			
		||||
	bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
 | 
			
		||||
				is_nx_huge_page_enabled();
 | 
			
		||||
	bool force_pt_level = lpage_disallowed;
 | 
			
		||||
 | 
			
		||||
	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
 | 
			
		||||
 | 
			
		||||
@ -851,7 +868,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
 | 
			
		||||
	if (!force_pt_level)
 | 
			
		||||
		transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level);
 | 
			
		||||
	r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
 | 
			
		||||
			 level, pfn, map_writable, prefault);
 | 
			
		||||
			 level, pfn, map_writable, prefault, lpage_disallowed);
 | 
			
		||||
	kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
 | 
			
		||||
 | 
			
		||||
out_unlock:
 | 
			
		||||
 | 
			
		||||
@ -1268,6 +1268,18 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
 | 
			
		||||
	if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
 | 
			
		||||
	 * PI.NDST: pi_post_block is the one expected to change PID.NDST and the
 | 
			
		||||
	 * wakeup handler expects the vCPU to be on the blocked_vcpu_list that
 | 
			
		||||
	 * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up
 | 
			
		||||
	 * correctly.
 | 
			
		||||
	 */
 | 
			
		||||
	if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) {
 | 
			
		||||
		pi_clear_sn(pi_desc);
 | 
			
		||||
		goto after_clear_sn;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* The full case.  */
 | 
			
		||||
	do {
 | 
			
		||||
		old.control = new.control = pi_desc->control;
 | 
			
		||||
@ -1283,6 +1295,8 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
 | 
			
		||||
	} while (cmpxchg64(&pi_desc->control, old.control,
 | 
			
		||||
			   new.control) != old.control);
 | 
			
		||||
 | 
			
		||||
after_clear_sn:
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Clear SN before reading the bitmap.  The VT-d firmware
 | 
			
		||||
	 * writes the bitmap and reads SN atomically (5.2.3 in the
 | 
			
		||||
@ -1291,7 +1305,7 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
 | 
			
		||||
	 */
 | 
			
		||||
	smp_mb__after_atomic();
 | 
			
		||||
 | 
			
		||||
	if (!bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS))
 | 
			
		||||
	if (!pi_is_pir_empty(pi_desc))
 | 
			
		||||
		pi_set_on(pi_desc);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -6137,7 +6151,7 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
 | 
			
		||||
	if (pi_test_on(&vmx->pi_desc)) {
 | 
			
		||||
		pi_clear_on(&vmx->pi_desc);
 | 
			
		||||
		/*
 | 
			
		||||
		 * IOMMU can write to PIR.ON, so the barrier matters even on UP.
 | 
			
		||||
		 * IOMMU can write to PID.ON, so the barrier matters even on UP.
 | 
			
		||||
		 * But on x86 this is just a compiler barrier anyway.
 | 
			
		||||
		 */
 | 
			
		||||
		smp_mb__after_atomic();
 | 
			
		||||
@ -6167,7 +6181,10 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
 | 
			
		||||
 | 
			
		||||
static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
 | 
			
		||||
{
 | 
			
		||||
	return pi_test_on(vcpu_to_pi_desc(vcpu));
 | 
			
		||||
	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
 | 
			
		||||
 | 
			
		||||
	return pi_test_on(pi_desc) ||
 | 
			
		||||
		(pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 | 
			
		||||
 | 
			
		||||
@ -355,6 +355,11 @@ static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
 | 
			
		||||
	return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline bool pi_is_pir_empty(struct pi_desc *pi_desc)
 | 
			
		||||
{
 | 
			
		||||
	return bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void pi_set_sn(struct pi_desc *pi_desc)
 | 
			
		||||
{
 | 
			
		||||
	set_bit(POSTED_INTR_SN,
 | 
			
		||||
@ -373,6 +378,12 @@ static inline void pi_clear_on(struct pi_desc *pi_desc)
 | 
			
		||||
		(unsigned long *)&pi_desc->control);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void pi_clear_sn(struct pi_desc *pi_desc)
 | 
			
		||||
{
 | 
			
		||||
	clear_bit(POSTED_INTR_SN,
 | 
			
		||||
		(unsigned long *)&pi_desc->control);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline int pi_test_on(struct pi_desc *pi_desc)
 | 
			
		||||
{
 | 
			
		||||
	return test_bit(POSTED_INTR_ON,
 | 
			
		||||
 | 
			
		||||
@ -213,6 +213,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 | 
			
		||||
	{ "mmu_unsync", VM_STAT(mmu_unsync) },
 | 
			
		||||
	{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
 | 
			
		||||
	{ "largepages", VM_STAT(lpages, .mode = 0444) },
 | 
			
		||||
	{ "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) },
 | 
			
		||||
	{ "max_mmu_page_hash_collisions",
 | 
			
		||||
		VM_STAT(max_mmu_page_hash_collisions) },
 | 
			
		||||
	{ NULL }
 | 
			
		||||
@ -1132,13 +1133,15 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
 | 
			
		||||
 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
 | 
			
		||||
 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
 | 
			
		||||
 *
 | 
			
		||||
 * This list is modified at module load time to reflect the
 | 
			
		||||
 * The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features)
 | 
			
		||||
 * extract the supported MSRs from the related const lists.
 | 
			
		||||
 * msrs_to_save is selected from the msrs_to_save_all to reflect the
 | 
			
		||||
 * capabilities of the host cpu. This capabilities test skips MSRs that are
 | 
			
		||||
 * kvm-specific. Those are put in emulated_msrs; filtering of emulated_msrs
 | 
			
		||||
 * kvm-specific. Those are put in emulated_msrs_all; filtering of emulated_msrs
 | 
			
		||||
 * may depend on host virtualization features rather than host cpu features.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
static u32 msrs_to_save[] = {
 | 
			
		||||
static const u32 msrs_to_save_all[] = {
 | 
			
		||||
	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
 | 
			
		||||
	MSR_STAR,
 | 
			
		||||
#ifdef CONFIG_X86_64
 | 
			
		||||
@ -1179,9 +1182,10 @@ static u32 msrs_to_save[] = {
 | 
			
		||||
	MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
 | 
			
		||||
static unsigned num_msrs_to_save;
 | 
			
		||||
 | 
			
		||||
static u32 emulated_msrs[] = {
 | 
			
		||||
static const u32 emulated_msrs_all[] = {
 | 
			
		||||
	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
 | 
			
		||||
	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
 | 
			
		||||
	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
 | 
			
		||||
@ -1220,7 +1224,7 @@ static u32 emulated_msrs[] = {
 | 
			
		||||
	 * by arch/x86/kvm/vmx/nested.c based on CPUID or other MSRs.
 | 
			
		||||
	 * We always support the "true" VMX control MSRs, even if the host
 | 
			
		||||
	 * processor does not, so I am putting these registers here rather
 | 
			
		||||
	 * than in msrs_to_save.
 | 
			
		||||
	 * than in msrs_to_save_all.
 | 
			
		||||
	 */
 | 
			
		||||
	MSR_IA32_VMX_BASIC,
 | 
			
		||||
	MSR_IA32_VMX_TRUE_PINBASED_CTLS,
 | 
			
		||||
@ -1239,13 +1243,14 @@ static u32 emulated_msrs[] = {
 | 
			
		||||
	MSR_KVM_POLL_CONTROL,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
 | 
			
		||||
static unsigned num_emulated_msrs;
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * List of msr numbers which are used to expose MSR-based features that
 | 
			
		||||
 * can be used by a hypervisor to validate requested CPU features.
 | 
			
		||||
 */
 | 
			
		||||
static u32 msr_based_features[] = {
 | 
			
		||||
static const u32 msr_based_features_all[] = {
 | 
			
		||||
	MSR_IA32_VMX_BASIC,
 | 
			
		||||
	MSR_IA32_VMX_TRUE_PINBASED_CTLS,
 | 
			
		||||
	MSR_IA32_VMX_PINBASED_CTLS,
 | 
			
		||||
@ -1270,6 +1275,7 @@ static u32 msr_based_features[] = {
 | 
			
		||||
	MSR_IA32_ARCH_CAPABILITIES,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
 | 
			
		||||
static unsigned int num_msr_based_features;
 | 
			
		||||
 | 
			
		||||
static u64 kvm_get_arch_capabilities(void)
 | 
			
		||||
@ -1279,6 +1285,14 @@ static u64 kvm_get_arch_capabilities(void)
 | 
			
		||||
	if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
 | 
			
		||||
		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * If nx_huge_pages is enabled, KVM's shadow paging will ensure that
 | 
			
		||||
	 * the nested hypervisor runs with NX huge pages.  If it is not,
 | 
			
		||||
	 * L1 is anyway vulnerable to ITLB_MULTIHIT explots from other
 | 
			
		||||
	 * L1 guests, so it need not worry about its own (L2) guests.
 | 
			
		||||
	 */
 | 
			
		||||
	data |= ARCH_CAP_PSCHANGE_MC_NO;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * If we're doing cache flushes (either "always" or "cond")
 | 
			
		||||
	 * we will do one whenever the guest does a vmlaunch/vmresume.
 | 
			
		||||
@ -1298,6 +1312,25 @@ static u64 kvm_get_arch_capabilities(void)
 | 
			
		||||
	if (!boot_cpu_has_bug(X86_BUG_MDS))
 | 
			
		||||
		data |= ARCH_CAP_MDS_NO;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * On TAA affected systems, export MDS_NO=0 when:
 | 
			
		||||
	 *	- TSX is enabled on the host, i.e. X86_FEATURE_RTM=1.
 | 
			
		||||
	 *	- Updated microcode is present. This is detected by
 | 
			
		||||
	 *	  the presence of ARCH_CAP_TSX_CTRL_MSR and ensures
 | 
			
		||||
	 *	  that VERW clears CPU buffers.
 | 
			
		||||
	 *
 | 
			
		||||
	 * When MDS_NO=0 is exported, guests deploy clear CPU buffer
 | 
			
		||||
	 * mitigation and don't complain:
 | 
			
		||||
	 *
 | 
			
		||||
	 *	"Vulnerable: Clear CPU buffers attempted, no microcode"
 | 
			
		||||
	 *
 | 
			
		||||
	 * If TSX is disabled on the system, guests are also mitigated against
 | 
			
		||||
	 * TAA and clear CPU buffer mitigation is not required for guests.
 | 
			
		||||
	 */
 | 
			
		||||
	if (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM) &&
 | 
			
		||||
	    (data & ARCH_CAP_TSX_CTRL_MSR))
 | 
			
		||||
		data &= ~ARCH_CAP_MDS_NO;
 | 
			
		||||
 | 
			
		||||
	return data;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -5090,22 +5123,26 @@ static void kvm_init_msr_list(void)
 | 
			
		||||
{
 | 
			
		||||
	struct x86_pmu_capability x86_pmu;
 | 
			
		||||
	u32 dummy[2];
 | 
			
		||||
	unsigned i, j;
 | 
			
		||||
	unsigned i;
 | 
			
		||||
 | 
			
		||||
	BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4,
 | 
			
		||||
			 "Please update the fixed PMCs in msrs_to_save[]");
 | 
			
		||||
			 "Please update the fixed PMCs in msrs_to_saved_all[]");
 | 
			
		||||
 | 
			
		||||
	perf_get_x86_pmu_capability(&x86_pmu);
 | 
			
		||||
 | 
			
		||||
	for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
 | 
			
		||||
		if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
 | 
			
		||||
	num_msrs_to_save = 0;
 | 
			
		||||
	num_emulated_msrs = 0;
 | 
			
		||||
	num_msr_based_features = 0;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < ARRAY_SIZE(msrs_to_save_all); i++) {
 | 
			
		||||
		if (rdmsr_safe(msrs_to_save_all[i], &dummy[0], &dummy[1]) < 0)
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * Even MSRs that are valid in the host may not be exposed
 | 
			
		||||
		 * to the guests in some cases.
 | 
			
		||||
		 */
 | 
			
		||||
		switch (msrs_to_save[i]) {
 | 
			
		||||
		switch (msrs_to_save_all[i]) {
 | 
			
		||||
		case MSR_IA32_BNDCFGS:
 | 
			
		||||
			if (!kvm_mpx_supported())
 | 
			
		||||
				continue;
 | 
			
		||||
@ -5133,17 +5170,17 @@ static void kvm_init_msr_list(void)
 | 
			
		||||
			break;
 | 
			
		||||
		case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: {
 | 
			
		||||
			if (!kvm_x86_ops->pt_supported() ||
 | 
			
		||||
				msrs_to_save[i] - MSR_IA32_RTIT_ADDR0_A >=
 | 
			
		||||
				msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >=
 | 
			
		||||
				intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
 | 
			
		||||
				continue;
 | 
			
		||||
			break;
 | 
			
		||||
		case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17:
 | 
			
		||||
			if (msrs_to_save[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
 | 
			
		||||
			if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
 | 
			
		||||
			    min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
 | 
			
		||||
				continue;
 | 
			
		||||
			break;
 | 
			
		||||
		case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17:
 | 
			
		||||
			if (msrs_to_save[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
 | 
			
		||||
			if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
 | 
			
		||||
			    min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
 | 
			
		||||
				continue;
 | 
			
		||||
		}
 | 
			
		||||
@ -5151,34 +5188,25 @@ static void kvm_init_msr_list(void)
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (j < i)
 | 
			
		||||
			msrs_to_save[j] = msrs_to_save[i];
 | 
			
		||||
		j++;
 | 
			
		||||
		msrs_to_save[num_msrs_to_save++] = msrs_to_save_all[i];
 | 
			
		||||
	}
 | 
			
		||||
	num_msrs_to_save = j;
 | 
			
		||||
 | 
			
		||||
	for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
 | 
			
		||||
		if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i]))
 | 
			
		||||
	for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) {
 | 
			
		||||
		if (!kvm_x86_ops->has_emulated_msr(emulated_msrs_all[i]))
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		if (j < i)
 | 
			
		||||
			emulated_msrs[j] = emulated_msrs[i];
 | 
			
		||||
		j++;
 | 
			
		||||
		emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
 | 
			
		||||
	}
 | 
			
		||||
	num_emulated_msrs = j;
 | 
			
		||||
 | 
			
		||||
	for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) {
 | 
			
		||||
	for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) {
 | 
			
		||||
		struct kvm_msr_entry msr;
 | 
			
		||||
 | 
			
		||||
		msr.index = msr_based_features[i];
 | 
			
		||||
		msr.index = msr_based_features_all[i];
 | 
			
		||||
		if (kvm_get_msr_feature(&msr))
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		if (j < i)
 | 
			
		||||
			msr_based_features[j] = msr_based_features[i];
 | 
			
		||||
		j++;
 | 
			
		||||
		msr_based_features[num_msr_based_features++] = msr_based_features_all[i];
 | 
			
		||||
	}
 | 
			
		||||
	num_msr_based_features = j;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
 | 
			
		||||
@ -9428,6 +9456,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 | 
			
		||||
	INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
 | 
			
		||||
	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
 | 
			
		||||
	INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
 | 
			
		||||
	INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages);
 | 
			
		||||
	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
 | 
			
		||||
	atomic_set(&kvm->arch.noncoherent_dma_count, 0);
 | 
			
		||||
 | 
			
		||||
@ -9456,6 +9485,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 | 
			
		||||
	return kvm_x86_ops->vm_init(kvm);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int kvm_arch_post_init_vm(struct kvm *kvm)
 | 
			
		||||
{
 | 
			
		||||
	return kvm_mmu_post_init_vm(kvm);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
 | 
			
		||||
{
 | 
			
		||||
	vcpu_load(vcpu);
 | 
			
		||||
@ -9557,6 +9591,11 @@ int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
 | 
			
		||||
}
 | 
			
		||||
EXPORT_SYMBOL_GPL(x86_set_memory_region);
 | 
			
		||||
 | 
			
		||||
void kvm_arch_pre_destroy_vm(struct kvm *kvm)
 | 
			
		||||
{
 | 
			
		||||
	kvm_mmu_pre_destroy_vm(kvm);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void kvm_arch_destroy_vm(struct kvm *kvm)
 | 
			
		||||
{
 | 
			
		||||
	if (current->mm == kvm->mm) {
 | 
			
		||||
 | 
			
		||||
@ -2713,6 +2713,28 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static
 | 
			
		||||
void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 | 
			
		||||
{
 | 
			
		||||
	/*
 | 
			
		||||
	 * To prevent bfqq's service guarantees from being violated,
 | 
			
		||||
	 * bfqq may be left busy, i.e., queued for service, even if
 | 
			
		||||
	 * empty (see comments in __bfq_bfqq_expire() for
 | 
			
		||||
	 * details). But, if no process will send requests to bfqq any
 | 
			
		||||
	 * longer, then there is no point in keeping bfqq queued for
 | 
			
		||||
	 * service. In addition, keeping bfqq queued for service, but
 | 
			
		||||
	 * with no process ref any longer, may have caused bfqq to be
 | 
			
		||||
	 * freed when dequeued from service. But this is assumed to
 | 
			
		||||
	 * never happen.
 | 
			
		||||
	 */
 | 
			
		||||
	if (bfq_bfqq_busy(bfqq) && RB_EMPTY_ROOT(&bfqq->sort_list) &&
 | 
			
		||||
	    bfqq != bfqd->in_service_queue)
 | 
			
		||||
		bfq_del_bfqq_busy(bfqd, bfqq, false);
 | 
			
		||||
 | 
			
		||||
	bfq_put_queue(bfqq);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void
 | 
			
		||||
bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
 | 
			
		||||
		struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
 | 
			
		||||
@ -2783,8 +2805,7 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
 | 
			
		||||
	 */
 | 
			
		||||
	new_bfqq->pid = -1;
 | 
			
		||||
	bfqq->bic = NULL;
 | 
			
		||||
	/* release process reference to bfqq */
 | 
			
		||||
	bfq_put_queue(bfqq);
 | 
			
		||||
	bfq_release_process_ref(bfqd, bfqq);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
 | 
			
		||||
@ -4899,7 +4920,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 | 
			
		||||
 | 
			
		||||
	bfq_put_cooperator(bfqq);
 | 
			
		||||
 | 
			
		||||
	bfq_put_queue(bfqq); /* release process reference */
 | 
			
		||||
	bfq_release_process_ref(bfqd, bfqq);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync)
 | 
			
		||||
@ -5001,8 +5022,7 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio)
 | 
			
		||||
 | 
			
		||||
	bfqq = bic_to_bfqq(bic, false);
 | 
			
		||||
	if (bfqq) {
 | 
			
		||||
		/* release process reference on this queue */
 | 
			
		||||
		bfq_put_queue(bfqq);
 | 
			
		||||
		bfq_release_process_ref(bfqd, bfqq);
 | 
			
		||||
		bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic);
 | 
			
		||||
		bic_set_bfqq(bic, bfqq, false);
 | 
			
		||||
	}
 | 
			
		||||
@ -5963,7 +5983,7 @@ bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
 | 
			
		||||
 | 
			
		||||
	bfq_put_cooperator(bfqq);
 | 
			
		||||
 | 
			
		||||
	bfq_put_queue(bfqq);
 | 
			
		||||
	bfq_release_process_ref(bfqq->bfqd, bfqq);
 | 
			
		||||
	return NULL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -751,7 +751,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page,
 | 
			
		||||
	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
 | 
			
		||||
		return false;
 | 
			
		||||
 | 
			
		||||
	if (bio->bi_vcnt > 0) {
 | 
			
		||||
	if (bio->bi_vcnt > 0 && !bio_full(bio, len)) {
 | 
			
		||||
		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
 | 
			
		||||
 | 
			
		||||
		if (page_is_mergeable(bv, page, len, off, same_page)) {
 | 
			
		||||
 | 
			
		||||
@ -934,9 +934,14 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
 | 
			
		||||
		int i;
 | 
			
		||||
		bool has_stats = false;
 | 
			
		||||
 | 
			
		||||
		spin_lock_irq(&blkg->q->queue_lock);
 | 
			
		||||
 | 
			
		||||
		if (!blkg->online)
 | 
			
		||||
			goto skip;
 | 
			
		||||
 | 
			
		||||
		dname = blkg_dev_name(blkg);
 | 
			
		||||
		if (!dname)
 | 
			
		||||
			continue;
 | 
			
		||||
			goto skip;
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * Hooray string manipulation, count is the size written NOT
 | 
			
		||||
@ -946,8 +951,6 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
 | 
			
		||||
		 */
 | 
			
		||||
		off += scnprintf(buf+off, size-off, "%s ", dname);
 | 
			
		||||
 | 
			
		||||
		spin_lock_irq(&blkg->q->queue_lock);
 | 
			
		||||
 | 
			
		||||
		blkg_rwstat_recursive_sum(blkg, NULL,
 | 
			
		||||
				offsetof(struct blkcg_gq, stat_bytes), &rwstat);
 | 
			
		||||
		rbytes = rwstat.cnt[BLKG_RWSTAT_READ];
 | 
			
		||||
@ -960,8 +963,6 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
 | 
			
		||||
		wios = rwstat.cnt[BLKG_RWSTAT_WRITE];
 | 
			
		||||
		dios = rwstat.cnt[BLKG_RWSTAT_DISCARD];
 | 
			
		||||
 | 
			
		||||
		spin_unlock_irq(&blkg->q->queue_lock);
 | 
			
		||||
 | 
			
		||||
		if (rbytes || wbytes || rios || wios) {
 | 
			
		||||
			has_stats = true;
 | 
			
		||||
			off += scnprintf(buf+off, size-off,
 | 
			
		||||
@ -999,6 +1000,8 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
 | 
			
		||||
				seq_commit(sf, -1);
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	skip:
 | 
			
		||||
		spin_unlock_irq(&blkg->q->queue_lock);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	rcu_read_unlock();
 | 
			
		||||
 | 
			
		||||
@ -1057,9 +1057,12 @@ static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now)
 | 
			
		||||
	atomic64_set(&iocg->active_period, cur_period);
 | 
			
		||||
 | 
			
		||||
	/* already activated or breaking leaf-only constraint? */
 | 
			
		||||
	for (i = iocg->level; i > 0; i--)
 | 
			
		||||
		if (!list_empty(&iocg->active_list))
 | 
			
		||||
	if (!list_empty(&iocg->active_list))
 | 
			
		||||
		goto succeed_unlock;
 | 
			
		||||
	for (i = iocg->level - 1; i > 0; i--)
 | 
			
		||||
		if (!list_empty(&iocg->ancestors[i]->active_list))
 | 
			
		||||
			goto fail_unlock;
 | 
			
		||||
 | 
			
		||||
	if (iocg->child_active_sum)
 | 
			
		||||
		goto fail_unlock;
 | 
			
		||||
 | 
			
		||||
@ -1101,6 +1104,7 @@ static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now)
 | 
			
		||||
		ioc_start_period(ioc, now);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
succeed_unlock:
 | 
			
		||||
	spin_unlock_irq(&ioc->lock);
 | 
			
		||||
	return true;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -554,12 +554,27 @@ ssize_t __weak cpu_show_mds(struct device *dev,
 | 
			
		||||
	return sprintf(buf, "Not affected\n");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
ssize_t __weak cpu_show_tsx_async_abort(struct device *dev,
 | 
			
		||||
					struct device_attribute *attr,
 | 
			
		||||
					char *buf)
 | 
			
		||||
{
 | 
			
		||||
	return sprintf(buf, "Not affected\n");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
ssize_t __weak cpu_show_itlb_multihit(struct device *dev,
 | 
			
		||||
			    struct device_attribute *attr, char *buf)
 | 
			
		||||
{
 | 
			
		||||
	return sprintf(buf, "Not affected\n");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
 | 
			
		||||
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
 | 
			
		||||
static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
 | 
			
		||||
static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
 | 
			
		||||
static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
 | 
			
		||||
static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
 | 
			
		||||
static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
 | 
			
		||||
static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
 | 
			
		||||
 | 
			
		||||
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
 | 
			
		||||
	&dev_attr_meltdown.attr,
 | 
			
		||||
@ -568,6 +583,8 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
 | 
			
		||||
	&dev_attr_spec_store_bypass.attr,
 | 
			
		||||
	&dev_attr_l1tf.attr,
 | 
			
		||||
	&dev_attr_mds.attr,
 | 
			
		||||
	&dev_attr_tsx_async_abort.attr,
 | 
			
		||||
	&dev_attr_itlb_multihit.attr,
 | 
			
		||||
	NULL
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -872,3 +872,39 @@ int walk_memory_blocks(unsigned long start, unsigned long size,
 | 
			
		||||
	}
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct for_each_memory_block_cb_data {
 | 
			
		||||
	walk_memory_blocks_func_t func;
 | 
			
		||||
	void *arg;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static int for_each_memory_block_cb(struct device *dev, void *data)
 | 
			
		||||
{
 | 
			
		||||
	struct memory_block *mem = to_memory_block(dev);
 | 
			
		||||
	struct for_each_memory_block_cb_data *cb_data = data;
 | 
			
		||||
 | 
			
		||||
	return cb_data->func(mem, cb_data->arg);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * for_each_memory_block - walk through all present memory blocks
 | 
			
		||||
 *
 | 
			
		||||
 * @arg: argument passed to func
 | 
			
		||||
 * @func: callback for each memory block walked
 | 
			
		||||
 *
 | 
			
		||||
 * This function walks through all present memory blocks, calling func on
 | 
			
		||||
 * each memory block.
 | 
			
		||||
 *
 | 
			
		||||
 * In case func() returns an error, walking is aborted and the error is
 | 
			
		||||
 * returned.
 | 
			
		||||
 */
 | 
			
		||||
int for_each_memory_block(void *arg, walk_memory_blocks_func_t func)
 | 
			
		||||
{
 | 
			
		||||
	struct for_each_memory_block_cb_data cb_data = {
 | 
			
		||||
		.func = func,
 | 
			
		||||
		.arg = arg,
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	return bus_for_each_dev(&memory_subsys, NULL, &cb_data,
 | 
			
		||||
				for_each_memory_block_cb);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -786,7 +786,6 @@ int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cm
 | 
			
		||||
 | 
			
		||||
	if (nc->tentative && connection->agreed_pro_version < 92) {
 | 
			
		||||
		rcu_read_unlock();
 | 
			
		||||
		mutex_unlock(&sock->mutex);
 | 
			
		||||
		drbd_err(connection, "--dry-run is not supported by peer");
 | 
			
		||||
		return -EOPNOTSUPP;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
@ -2087,7 +2087,7 @@ static int rbd_object_map_update_finish(struct rbd_obj_request *obj_req,
 | 
			
		||||
	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
 | 
			
		||||
	struct ceph_osd_data *osd_data;
 | 
			
		||||
	u64 objno;
 | 
			
		||||
	u8 state, new_state, current_state;
 | 
			
		||||
	u8 state, new_state, uninitialized_var(current_state);
 | 
			
		||||
	bool has_current_state;
 | 
			
		||||
	void *p;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -1000,8 +1000,10 @@ static void rsxx_pci_remove(struct pci_dev *dev)
 | 
			
		||||
 | 
			
		||||
	cancel_work_sync(&card->event_work);
 | 
			
		||||
 | 
			
		||||
	destroy_workqueue(card->event_wq);
 | 
			
		||||
	rsxx_destroy_dev(card);
 | 
			
		||||
	rsxx_dma_destroy(card);
 | 
			
		||||
	destroy_workqueue(card->creg_ctrl.creg_wq);
 | 
			
		||||
 | 
			
		||||
	spin_lock_irqsave(&card->irq_lock, flags);
 | 
			
		||||
	rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
 | 
			
		||||
 | 
			
		||||
@ -13,7 +13,6 @@
 | 
			
		||||
#include <linux/delay.h>
 | 
			
		||||
#include <linux/device.h>
 | 
			
		||||
#include <linux/err.h>
 | 
			
		||||
#include <linux/freezer.h>
 | 
			
		||||
#include <linux/fs.h>
 | 
			
		||||
#include <linux/hw_random.h>
 | 
			
		||||
#include <linux/kernel.h>
 | 
			
		||||
@ -422,9 +421,7 @@ static int hwrng_fillfn(void *unused)
 | 
			
		||||
{
 | 
			
		||||
	long rc;
 | 
			
		||||
 | 
			
		||||
	set_freezable();
 | 
			
		||||
 | 
			
		||||
	while (!kthread_freezable_should_stop(NULL)) {
 | 
			
		||||
	while (!kthread_should_stop()) {
 | 
			
		||||
		struct hwrng *rng;
 | 
			
		||||
 | 
			
		||||
		rng = get_current_rng();
 | 
			
		||||
 | 
			
		||||
@ -327,7 +327,6 @@
 | 
			
		||||
#include <linux/percpu.h>
 | 
			
		||||
#include <linux/cryptohash.h>
 | 
			
		||||
#include <linux/fips.h>
 | 
			
		||||
#include <linux/freezer.h>
 | 
			
		||||
#include <linux/ptrace.h>
 | 
			
		||||
#include <linux/workqueue.h>
 | 
			
		||||
#include <linux/irq.h>
 | 
			
		||||
@ -2500,8 +2499,7 @@ void add_hwgenerator_randomness(const char *buffer, size_t count,
 | 
			
		||||
	 * We'll be woken up again once below random_write_wakeup_thresh,
 | 
			
		||||
	 * or when the calling thread is about to terminate.
 | 
			
		||||
	 */
 | 
			
		||||
	wait_event_freezable(random_write_wait,
 | 
			
		||||
			kthread_should_stop() ||
 | 
			
		||||
	wait_event_interruptible(random_write_wait, kthread_should_stop() ||
 | 
			
		||||
			ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits);
 | 
			
		||||
	mix_pool_bytes(poolp, buffer, count);
 | 
			
		||||
	credit_entropy_bits(poolp, entropy);
 | 
			
		||||
 | 
			
		||||
@ -297,7 +297,10 @@ static int clk_main_probe_frequency(struct regmap *regmap)
 | 
			
		||||
		regmap_read(regmap, AT91_CKGR_MCFR, &mcfr);
 | 
			
		||||
		if (mcfr & AT91_PMC_MAINRDY)
 | 
			
		||||
			return 0;
 | 
			
		||||
		usleep_range(MAINF_LOOP_MIN_WAIT, MAINF_LOOP_MAX_WAIT);
 | 
			
		||||
		if (system_state < SYSTEM_RUNNING)
 | 
			
		||||
			udelay(MAINF_LOOP_MIN_WAIT);
 | 
			
		||||
		else
 | 
			
		||||
			usleep_range(MAINF_LOOP_MIN_WAIT, MAINF_LOOP_MAX_WAIT);
 | 
			
		||||
	} while (time_before(prep_time, timeout));
 | 
			
		||||
 | 
			
		||||
	return -ETIMEDOUT;
 | 
			
		||||
 | 
			
		||||
@ -43,6 +43,7 @@ static const struct clk_pll_characteristics upll_characteristics = {
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static const struct clk_programmable_layout sam9x60_programmable_layout = {
 | 
			
		||||
	.pres_mask = 0xff,
 | 
			
		||||
	.pres_shift = 8,
 | 
			
		||||
	.css_mask = 0x1f,
 | 
			
		||||
	.have_slck_mck = 0,
 | 
			
		||||
 | 
			
		||||
@ -76,7 +76,10 @@ static int clk_slow_osc_prepare(struct clk_hw *hw)
 | 
			
		||||
 | 
			
		||||
	writel(tmp | osc->bits->cr_osc32en, sckcr);
 | 
			
		||||
 | 
			
		||||
	usleep_range(osc->startup_usec, osc->startup_usec + 1);
 | 
			
		||||
	if (system_state < SYSTEM_RUNNING)
 | 
			
		||||
		udelay(osc->startup_usec);
 | 
			
		||||
	else
 | 
			
		||||
		usleep_range(osc->startup_usec, osc->startup_usec + 1);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
@ -187,7 +190,10 @@ static int clk_slow_rc_osc_prepare(struct clk_hw *hw)
 | 
			
		||||
 | 
			
		||||
	writel(readl(sckcr) | osc->bits->cr_rcen, sckcr);
 | 
			
		||||
 | 
			
		||||
	usleep_range(osc->startup_usec, osc->startup_usec + 1);
 | 
			
		||||
	if (system_state < SYSTEM_RUNNING)
 | 
			
		||||
		udelay(osc->startup_usec);
 | 
			
		||||
	else
 | 
			
		||||
		usleep_range(osc->startup_usec, osc->startup_usec + 1);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
@ -288,7 +294,10 @@ static int clk_sam9x5_slow_set_parent(struct clk_hw *hw, u8 index)
 | 
			
		||||
 | 
			
		||||
	writel(tmp, sckcr);
 | 
			
		||||
 | 
			
		||||
	usleep_range(SLOWCK_SW_TIME_USEC, SLOWCK_SW_TIME_USEC + 1);
 | 
			
		||||
	if (system_state < SYSTEM_RUNNING)
 | 
			
		||||
		udelay(SLOWCK_SW_TIME_USEC);
 | 
			
		||||
	else
 | 
			
		||||
		usleep_range(SLOWCK_SW_TIME_USEC, SLOWCK_SW_TIME_USEC + 1);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
@ -533,7 +542,10 @@ static int clk_sama5d4_slow_osc_prepare(struct clk_hw *hw)
 | 
			
		||||
		return 0;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	usleep_range(osc->startup_usec, osc->startup_usec + 1);
 | 
			
		||||
	if (system_state < SYSTEM_RUNNING)
 | 
			
		||||
		udelay(osc->startup_usec);
 | 
			
		||||
	else
 | 
			
		||||
		usleep_range(osc->startup_usec, osc->startup_usec + 1);
 | 
			
		||||
	osc->prepared = true;
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
 | 
			
		||||
@ -266,10 +266,11 @@ static int aspeed_g6_clk_enable(struct clk_hw *hw)
 | 
			
		||||
 | 
			
		||||
	/* Enable clock */
 | 
			
		||||
	if (gate->flags & CLK_GATE_SET_TO_DISABLE) {
 | 
			
		||||
		regmap_write(gate->map, get_clock_reg(gate), clk);
 | 
			
		||||
	} else {
 | 
			
		||||
		/* Use set to clear register */
 | 
			
		||||
		/* Clock is clear to enable, so use set to clear register */
 | 
			
		||||
		regmap_write(gate->map, get_clock_reg(gate) + 0x04, clk);
 | 
			
		||||
	} else {
 | 
			
		||||
		/* Clock is set to enable, so use write to set register */
 | 
			
		||||
		regmap_write(gate->map, get_clock_reg(gate), clk);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (gate->reset_idx >= 0) {
 | 
			
		||||
 | 
			
		||||
@ -638,7 +638,7 @@ static int imx8mm_clocks_probe(struct platform_device *pdev)
 | 
			
		||||
					   clks[IMX8MM_CLK_A53_DIV],
 | 
			
		||||
					   clks[IMX8MM_CLK_A53_SRC],
 | 
			
		||||
					   clks[IMX8MM_ARM_PLL_OUT],
 | 
			
		||||
					   clks[IMX8MM_CLK_24M]);
 | 
			
		||||
					   clks[IMX8MM_SYS_PLL1_800M]);
 | 
			
		||||
 | 
			
		||||
	imx_check_clocks(clks, ARRAY_SIZE(clks));
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -610,7 +610,7 @@ static int imx8mn_clocks_probe(struct platform_device *pdev)
 | 
			
		||||
					   clks[IMX8MN_CLK_A53_DIV],
 | 
			
		||||
					   clks[IMX8MN_CLK_A53_SRC],
 | 
			
		||||
					   clks[IMX8MN_ARM_PLL_OUT],
 | 
			
		||||
					   clks[IMX8MN_CLK_24M]);
 | 
			
		||||
					   clks[IMX8MN_SYS_PLL1_800M]);
 | 
			
		||||
 | 
			
		||||
	imx_check_clocks(clks, ARRAY_SIZE(clks));
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -343,6 +343,7 @@ static struct clk_regmap g12a_cpu_clk_premux0 = {
 | 
			
		||||
		.offset = HHI_SYS_CPU_CLK_CNTL0,
 | 
			
		||||
		.mask = 0x3,
 | 
			
		||||
		.shift = 0,
 | 
			
		||||
		.flags = CLK_MUX_ROUND_CLOSEST,
 | 
			
		||||
	},
 | 
			
		||||
	.hw.init = &(struct clk_init_data){
 | 
			
		||||
		.name = "cpu_clk_dyn0_sel",
 | 
			
		||||
@ -353,8 +354,7 @@ static struct clk_regmap g12a_cpu_clk_premux0 = {
 | 
			
		||||
			{ .hw = &g12a_fclk_div3.hw },
 | 
			
		||||
		},
 | 
			
		||||
		.num_parents = 3,
 | 
			
		||||
		/* This sub-tree is used a parking clock */
 | 
			
		||||
		.flags = CLK_SET_RATE_NO_REPARENT,
 | 
			
		||||
		.flags = CLK_SET_RATE_PARENT,
 | 
			
		||||
	},
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@ -410,6 +410,7 @@ static struct clk_regmap g12a_cpu_clk_postmux0 = {
 | 
			
		||||
		.offset = HHI_SYS_CPU_CLK_CNTL0,
 | 
			
		||||
		.mask = 0x1,
 | 
			
		||||
		.shift = 2,
 | 
			
		||||
		.flags = CLK_MUX_ROUND_CLOSEST,
 | 
			
		||||
	},
 | 
			
		||||
	.hw.init = &(struct clk_init_data){
 | 
			
		||||
		.name = "cpu_clk_dyn0",
 | 
			
		||||
@ -466,6 +467,7 @@ static struct clk_regmap g12a_cpu_clk_dyn = {
 | 
			
		||||
		.offset = HHI_SYS_CPU_CLK_CNTL0,
 | 
			
		||||
		.mask = 0x1,
 | 
			
		||||
		.shift = 10,
 | 
			
		||||
		.flags = CLK_MUX_ROUND_CLOSEST,
 | 
			
		||||
	},
 | 
			
		||||
	.hw.init = &(struct clk_init_data){
 | 
			
		||||
		.name = "cpu_clk_dyn",
 | 
			
		||||
@ -485,6 +487,7 @@ static struct clk_regmap g12a_cpu_clk = {
 | 
			
		||||
		.offset = HHI_SYS_CPU_CLK_CNTL0,
 | 
			
		||||
		.mask = 0x1,
 | 
			
		||||
		.shift = 11,
 | 
			
		||||
		.flags = CLK_MUX_ROUND_CLOSEST,
 | 
			
		||||
	},
 | 
			
		||||
	.hw.init = &(struct clk_init_data){
 | 
			
		||||
		.name = "cpu_clk",
 | 
			
		||||
@ -504,6 +507,7 @@ static struct clk_regmap g12b_cpu_clk = {
 | 
			
		||||
		.offset = HHI_SYS_CPU_CLK_CNTL0,
 | 
			
		||||
		.mask = 0x1,
 | 
			
		||||
		.shift = 11,
 | 
			
		||||
		.flags = CLK_MUX_ROUND_CLOSEST,
 | 
			
		||||
	},
 | 
			
		||||
	.hw.init = &(struct clk_init_data){
 | 
			
		||||
		.name = "cpu_clk",
 | 
			
		||||
@ -523,6 +527,7 @@ static struct clk_regmap g12b_cpub_clk_premux0 = {
 | 
			
		||||
		.offset = HHI_SYS_CPUB_CLK_CNTL,
 | 
			
		||||
		.mask = 0x3,
 | 
			
		||||
		.shift = 0,
 | 
			
		||||
		.flags = CLK_MUX_ROUND_CLOSEST,
 | 
			
		||||
	},
 | 
			
		||||
	.hw.init = &(struct clk_init_data){
 | 
			
		||||
		.name = "cpub_clk_dyn0_sel",
 | 
			
		||||
@ -533,6 +538,7 @@ static struct clk_regmap g12b_cpub_clk_premux0 = {
 | 
			
		||||
			{ .hw = &g12a_fclk_div3.hw },
 | 
			
		||||
		},
 | 
			
		||||
		.num_parents = 3,
 | 
			
		||||
		.flags = CLK_SET_RATE_PARENT,
 | 
			
		||||
	},
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@ -567,6 +573,7 @@ static struct clk_regmap g12b_cpub_clk_postmux0 = {
 | 
			
		||||
		.offset = HHI_SYS_CPUB_CLK_CNTL,
 | 
			
		||||
		.mask = 0x1,
 | 
			
		||||
		.shift = 2,
 | 
			
		||||
		.flags = CLK_MUX_ROUND_CLOSEST,
 | 
			
		||||
	},
 | 
			
		||||
	.hw.init = &(struct clk_init_data){
 | 
			
		||||
		.name = "cpub_clk_dyn0",
 | 
			
		||||
@ -644,6 +651,7 @@ static struct clk_regmap g12b_cpub_clk_dyn = {
 | 
			
		||||
		.offset = HHI_SYS_CPUB_CLK_CNTL,
 | 
			
		||||
		.mask = 0x1,
 | 
			
		||||
		.shift = 10,
 | 
			
		||||
		.flags = CLK_MUX_ROUND_CLOSEST,
 | 
			
		||||
	},
 | 
			
		||||
	.hw.init = &(struct clk_init_data){
 | 
			
		||||
		.name = "cpub_clk_dyn",
 | 
			
		||||
@ -663,6 +671,7 @@ static struct clk_regmap g12b_cpub_clk = {
 | 
			
		||||
		.offset = HHI_SYS_CPUB_CLK_CNTL,
 | 
			
		||||
		.mask = 0x1,
 | 
			
		||||
		.shift = 11,
 | 
			
		||||
		.flags = CLK_MUX_ROUND_CLOSEST,
 | 
			
		||||
	},
 | 
			
		||||
	.hw.init = &(struct clk_init_data){
 | 
			
		||||
		.name = "cpub_clk",
 | 
			
		||||
 | 
			
		||||
@ -935,6 +935,7 @@ static struct clk_regmap gxbb_sar_adc_clk_div = {
 | 
			
		||||
			&gxbb_sar_adc_clk_sel.hw
 | 
			
		||||
		},
 | 
			
		||||
		.num_parents = 1,
 | 
			
		||||
		.flags = CLK_SET_RATE_PARENT,
 | 
			
		||||
	},
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -165,12 +165,18 @@ static const unsigned long exynos5x_clk_regs[] __initconst = {
 | 
			
		||||
	GATE_BUS_CPU,
 | 
			
		||||
	GATE_SCLK_CPU,
 | 
			
		||||
	CLKOUT_CMU_CPU,
 | 
			
		||||
	CPLL_CON0,
 | 
			
		||||
	DPLL_CON0,
 | 
			
		||||
	EPLL_CON0,
 | 
			
		||||
	EPLL_CON1,
 | 
			
		||||
	EPLL_CON2,
 | 
			
		||||
	RPLL_CON0,
 | 
			
		||||
	RPLL_CON1,
 | 
			
		||||
	RPLL_CON2,
 | 
			
		||||
	IPLL_CON0,
 | 
			
		||||
	SPLL_CON0,
 | 
			
		||||
	VPLL_CON0,
 | 
			
		||||
	MPLL_CON0,
 | 
			
		||||
	SRC_TOP0,
 | 
			
		||||
	SRC_TOP1,
 | 
			
		||||
	SRC_TOP2,
 | 
			
		||||
@ -1172,8 +1178,6 @@ static const struct samsung_gate_clock exynos5x_gate_clks[] __initconst = {
 | 
			
		||||
	GATE(CLK_SCLK_ISP_SENSOR2, "sclk_isp_sensor2", "dout_isp_sensor2",
 | 
			
		||||
			GATE_TOP_SCLK_ISP, 12, CLK_SET_RATE_PARENT, 0),
 | 
			
		||||
 | 
			
		||||
	GATE(CLK_G3D, "g3d", "mout_user_aclk_g3d", GATE_IP_G3D, 9, 0, 0),
 | 
			
		||||
 | 
			
		||||
	/* CDREX */
 | 
			
		||||
	GATE(CLK_CLKM_PHY0, "clkm_phy0", "dout_sclk_cdrex",
 | 
			
		||||
			GATE_BUS_CDREX0, 0, 0, 0),
 | 
			
		||||
@ -1248,6 +1252,15 @@ static struct exynos5_subcmu_reg_dump exynos5x_gsc_suspend_regs[] = {
 | 
			
		||||
	{ DIV2_RATIO0, 0, 0x30 },	/* DIV dout_gscl_blk_300 */
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static const struct samsung_gate_clock exynos5x_g3d_gate_clks[] __initconst = {
 | 
			
		||||
	GATE(CLK_G3D, "g3d", "mout_user_aclk_g3d", GATE_IP_G3D, 9, 0, 0),
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static struct exynos5_subcmu_reg_dump exynos5x_g3d_suspend_regs[] = {
 | 
			
		||||
	{ GATE_IP_G3D, 0x3ff, 0x3ff },	/* G3D gates */
 | 
			
		||||
	{ SRC_TOP5, 0, BIT(16) },	/* MUX mout_user_aclk_g3d */
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static const struct samsung_div_clock exynos5x_mfc_div_clks[] __initconst = {
 | 
			
		||||
	DIV(0, "dout_mfc_blk", "mout_user_aclk333", DIV4_RATIO, 0, 2),
 | 
			
		||||
};
 | 
			
		||||
@ -1320,6 +1333,14 @@ static const struct exynos5_subcmu_info exynos5x_gsc_subcmu = {
 | 
			
		||||
	.pd_name	= "GSC",
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static const struct exynos5_subcmu_info exynos5x_g3d_subcmu = {
 | 
			
		||||
	.gate_clks	= exynos5x_g3d_gate_clks,
 | 
			
		||||
	.nr_gate_clks	= ARRAY_SIZE(exynos5x_g3d_gate_clks),
 | 
			
		||||
	.suspend_regs	= exynos5x_g3d_suspend_regs,
 | 
			
		||||
	.nr_suspend_regs = ARRAY_SIZE(exynos5x_g3d_suspend_regs),
 | 
			
		||||
	.pd_name	= "G3D",
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static const struct exynos5_subcmu_info exynos5x_mfc_subcmu = {
 | 
			
		||||
	.div_clks	= exynos5x_mfc_div_clks,
 | 
			
		||||
	.nr_div_clks	= ARRAY_SIZE(exynos5x_mfc_div_clks),
 | 
			
		||||
@ -1351,6 +1372,7 @@ static const struct exynos5_subcmu_info exynos5800_mau_subcmu = {
 | 
			
		||||
static const struct exynos5_subcmu_info *exynos5x_subcmus[] = {
 | 
			
		||||
	&exynos5x_disp_subcmu,
 | 
			
		||||
	&exynos5x_gsc_subcmu,
 | 
			
		||||
	&exynos5x_g3d_subcmu,
 | 
			
		||||
	&exynos5x_mfc_subcmu,
 | 
			
		||||
	&exynos5x_mscl_subcmu,
 | 
			
		||||
};
 | 
			
		||||
@ -1358,6 +1380,7 @@ static const struct exynos5_subcmu_info *exynos5x_subcmus[] = {
 | 
			
		||||
static const struct exynos5_subcmu_info *exynos5800_subcmus[] = {
 | 
			
		||||
	&exynos5x_disp_subcmu,
 | 
			
		||||
	&exynos5x_gsc_subcmu,
 | 
			
		||||
	&exynos5x_g3d_subcmu,
 | 
			
		||||
	&exynos5x_mfc_subcmu,
 | 
			
		||||
	&exynos5x_mscl_subcmu,
 | 
			
		||||
	&exynos5800_mau_subcmu,
 | 
			
		||||
 | 
			
		||||
@ -13,6 +13,7 @@
 | 
			
		||||
#include <linux/of_device.h>
 | 
			
		||||
#include <linux/platform_device.h>
 | 
			
		||||
#include <linux/pm_runtime.h>
 | 
			
		||||
#include <linux/slab.h>
 | 
			
		||||
 | 
			
		||||
#include <dt-bindings/clock/exynos5433.h>
 | 
			
		||||
 | 
			
		||||
@ -5584,6 +5585,8 @@ static int __init exynos5433_cmu_probe(struct platform_device *pdev)
 | 
			
		||||
 | 
			
		||||
	data->clk_save = samsung_clk_alloc_reg_dump(info->clk_regs,
 | 
			
		||||
						    info->nr_clk_regs);
 | 
			
		||||
	if (!data->clk_save)
 | 
			
		||||
		return -ENOMEM;
 | 
			
		||||
	data->nr_clk_save = info->nr_clk_regs;
 | 
			
		||||
	data->clk_suspend = info->suspend_regs;
 | 
			
		||||
	data->nr_clk_suspend = info->nr_suspend_regs;
 | 
			
		||||
@ -5592,12 +5595,19 @@ static int __init exynos5433_cmu_probe(struct platform_device *pdev)
 | 
			
		||||
	if (data->nr_pclks > 0) {
 | 
			
		||||
		data->pclks = devm_kcalloc(dev, sizeof(struct clk *),
 | 
			
		||||
					   data->nr_pclks, GFP_KERNEL);
 | 
			
		||||
 | 
			
		||||
		if (!data->pclks) {
 | 
			
		||||
			kfree(data->clk_save);
 | 
			
		||||
			return -ENOMEM;
 | 
			
		||||
		}
 | 
			
		||||
		for (i = 0; i < data->nr_pclks; i++) {
 | 
			
		||||
			struct clk *clk = of_clk_get(dev->of_node, i);
 | 
			
		||||
 | 
			
		||||
			if (IS_ERR(clk))
 | 
			
		||||
			if (IS_ERR(clk)) {
 | 
			
		||||
				kfree(data->clk_save);
 | 
			
		||||
				while (--i >= 0)
 | 
			
		||||
					clk_put(data->pclks[i]);
 | 
			
		||||
				return PTR_ERR(clk);
 | 
			
		||||
			}
 | 
			
		||||
			data->pclks[i] = clk;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
@ -1224,7 +1224,7 @@ static int sun9i_a80_ccu_probe(struct platform_device *pdev)
 | 
			
		||||
 | 
			
		||||
	/* Enforce d1 = 0, d2 = 0 for Audio PLL */
 | 
			
		||||
	val = readl(reg + SUN9I_A80_PLL_AUDIO_REG);
 | 
			
		||||
	val &= (BIT(16) & BIT(18));
 | 
			
		||||
	val &= ~(BIT(16) | BIT(18));
 | 
			
		||||
	writel(val, reg + SUN9I_A80_PLL_AUDIO_REG);
 | 
			
		||||
 | 
			
		||||
	/* Enforce P = 1 for both CPU cluster PLLs */
 | 
			
		||||
 | 
			
		||||
@ -1080,8 +1080,8 @@ static struct clk ** __init sunxi_divs_clk_setup(struct device_node *node,
 | 
			
		||||
						 rate_hw, rate_ops,
 | 
			
		||||
						 gate_hw, &clk_gate_ops,
 | 
			
		||||
						 clkflags |
 | 
			
		||||
						 data->div[i].critical ?
 | 
			
		||||
							CLK_IS_CRITICAL : 0);
 | 
			
		||||
						 (data->div[i].critical ?
 | 
			
		||||
							CLK_IS_CRITICAL : 0));
 | 
			
		||||
 | 
			
		||||
		WARN_ON(IS_ERR(clk_data->clks[i]));
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
@ -174,7 +174,6 @@ static void __init of_dra7_atl_clock_setup(struct device_node *node)
 | 
			
		||||
	struct clk_init_data init = { NULL };
 | 
			
		||||
	const char **parent_names = NULL;
 | 
			
		||||
	struct clk *clk;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	clk_hw = kzalloc(sizeof(*clk_hw), GFP_KERNEL);
 | 
			
		||||
	if (!clk_hw) {
 | 
			
		||||
@ -207,11 +206,6 @@ static void __init of_dra7_atl_clock_setup(struct device_node *node)
 | 
			
		||||
	clk = ti_clk_register(NULL, &clk_hw->hw, node->name);
 | 
			
		||||
 | 
			
		||||
	if (!IS_ERR(clk)) {
 | 
			
		||||
		ret = ti_clk_add_alias(NULL, clk, node->name);
 | 
			
		||||
		if (ret) {
 | 
			
		||||
			clk_unregister(clk);
 | 
			
		||||
			goto cleanup;
 | 
			
		||||
		}
 | 
			
		||||
		of_clk_add_provider(node, of_clk_src_simple_get, clk);
 | 
			
		||||
		kfree(parent_names);
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
@ -100,11 +100,12 @@ static bool _omap4_is_timeout(union omap4_timeout *time, u32 timeout)
 | 
			
		||||
	 * can be from a timer that requires pm_runtime access, which
 | 
			
		||||
	 * will eventually bring us here with timekeeping_suspended,
 | 
			
		||||
	 * during both suspend entry and resume paths. This happens
 | 
			
		||||
	 * at least on am43xx platform.
 | 
			
		||||
	 * at least on am43xx platform. Account for flakeyness
 | 
			
		||||
	 * with udelay() by multiplying the timeout value by 2.
 | 
			
		||||
	 */
 | 
			
		||||
	if (unlikely(_early_timeout || timekeeping_suspended)) {
 | 
			
		||||
		if (time->cycles++ < timeout) {
 | 
			
		||||
			udelay(1);
 | 
			
		||||
			udelay(1 * 2);
 | 
			
		||||
			return false;
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
 | 
			
		||||
@ -328,12 +328,13 @@ static int sh_mtu2_register(struct sh_mtu2_channel *ch, const char *name)
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static const unsigned int sh_mtu2_channel_offsets[] = {
 | 
			
		||||
	0x300, 0x380, 0x000,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static int sh_mtu2_setup_channel(struct sh_mtu2_channel *ch, unsigned int index,
 | 
			
		||||
				 struct sh_mtu2_device *mtu)
 | 
			
		||||
{
 | 
			
		||||
	static const unsigned int channel_offsets[] = {
 | 
			
		||||
		0x300, 0x380, 0x000,
 | 
			
		||||
	};
 | 
			
		||||
	char name[6];
 | 
			
		||||
	int irq;
 | 
			
		||||
	int ret;
 | 
			
		||||
@ -356,7 +357,7 @@ static int sh_mtu2_setup_channel(struct sh_mtu2_channel *ch, unsigned int index,
 | 
			
		||||
		return ret;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ch->base = mtu->mapbase + channel_offsets[index];
 | 
			
		||||
	ch->base = mtu->mapbase + sh_mtu2_channel_offsets[index];
 | 
			
		||||
	ch->index = index;
 | 
			
		||||
 | 
			
		||||
	return sh_mtu2_register(ch, dev_name(&mtu->pdev->dev));
 | 
			
		||||
@ -408,7 +409,12 @@ static int sh_mtu2_setup(struct sh_mtu2_device *mtu,
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* Allocate and setup the channels. */
 | 
			
		||||
	mtu->num_channels = 3;
 | 
			
		||||
	ret = platform_irq_count(pdev);
 | 
			
		||||
	if (ret < 0)
 | 
			
		||||
		goto err_unmap;
 | 
			
		||||
 | 
			
		||||
	mtu->num_channels = min_t(unsigned int, ret,
 | 
			
		||||
				  ARRAY_SIZE(sh_mtu2_channel_offsets));
 | 
			
		||||
 | 
			
		||||
	mtu->channels = kcalloc(mtu->num_channels, sizeof(*mtu->channels),
 | 
			
		||||
				GFP_KERNEL);
 | 
			
		||||
 | 
			
		||||
@ -268,15 +268,12 @@ static int __init mtk_syst_init(struct device_node *node)
 | 
			
		||||
 | 
			
		||||
	ret = timer_of_init(node, &to);
 | 
			
		||||
	if (ret)
 | 
			
		||||
		goto err;
 | 
			
		||||
		return ret;
 | 
			
		||||
 | 
			
		||||
	clockevents_config_and_register(&to.clkevt, timer_of_rate(&to),
 | 
			
		||||
					TIMER_SYNC_TICKS, 0xffffffff);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
err:
 | 
			
		||||
	timer_of_cleanup(&to);
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int __init mtk_gpt_init(struct device_node *node)
 | 
			
		||||
@ -293,7 +290,7 @@ static int __init mtk_gpt_init(struct device_node *node)
 | 
			
		||||
 | 
			
		||||
	ret = timer_of_init(node, &to);
 | 
			
		||||
	if (ret)
 | 
			
		||||
		goto err;
 | 
			
		||||
		return ret;
 | 
			
		||||
 | 
			
		||||
	/* Configure clock source */
 | 
			
		||||
	mtk_gpt_setup(&to, TIMER_CLK_SRC, GPT_CTRL_OP_FREERUN);
 | 
			
		||||
@ -311,9 +308,6 @@ static int __init mtk_gpt_init(struct device_node *node)
 | 
			
		||||
	mtk_gpt_enable_irq(&to, TIMER_CLK_EVT);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
err:
 | 
			
		||||
	timer_of_cleanup(&to);
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
TIMER_OF_DECLARE(mtk_mt6577, "mediatek,mt6577-timer", mtk_gpt_init);
 | 
			
		||||
TIMER_OF_DECLARE(mtk_mt6765, "mediatek,mt6765-timer", mtk_syst_init);
 | 
			
		||||
 | 
			
		||||
@ -847,11 +847,9 @@ static void intel_pstate_hwp_force_min_perf(int cpu)
 | 
			
		||||
	value |= HWP_MAX_PERF(min_perf);
 | 
			
		||||
	value |= HWP_MIN_PERF(min_perf);
 | 
			
		||||
 | 
			
		||||
	/* Set EPP/EPB to min */
 | 
			
		||||
	/* Set EPP to min */
 | 
			
		||||
	if (boot_cpu_has(X86_FEATURE_HWP_EPP))
 | 
			
		||||
		value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE);
 | 
			
		||||
	else
 | 
			
		||||
		intel_pstate_set_epb(cpu, HWP_EPP_BALANCE_POWERSAVE);
 | 
			
		||||
 | 
			
		||||
	wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -362,9 +362,8 @@ static void mrfld_irq_handler(struct irq_desc *desc)
 | 
			
		||||
	chained_irq_exit(irqchip, desc);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int mrfld_irq_init_hw(struct gpio_chip *chip)
 | 
			
		||||
static void mrfld_irq_init_hw(struct mrfld_gpio *priv)
 | 
			
		||||
{
 | 
			
		||||
	struct mrfld_gpio *priv = gpiochip_get_data(chip);
 | 
			
		||||
	void __iomem *reg;
 | 
			
		||||
	unsigned int base;
 | 
			
		||||
 | 
			
		||||
@ -376,8 +375,6 @@ static int mrfld_irq_init_hw(struct gpio_chip *chip)
 | 
			
		||||
		reg = gpio_reg(&priv->chip, base, GFER);
 | 
			
		||||
		writel(0, reg);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static const char *mrfld_gpio_get_pinctrl_dev_name(struct mrfld_gpio *priv)
 | 
			
		||||
@ -400,7 +397,6 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id
 | 
			
		||||
{
 | 
			
		||||
	const struct mrfld_gpio_pinrange *range;
 | 
			
		||||
	const char *pinctrl_dev_name;
 | 
			
		||||
	struct gpio_irq_chip *girq;
 | 
			
		||||
	struct mrfld_gpio *priv;
 | 
			
		||||
	u32 gpio_base, irq_base;
 | 
			
		||||
	void __iomem *base;
 | 
			
		||||
@ -448,21 +444,6 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id
 | 
			
		||||
 | 
			
		||||
	raw_spin_lock_init(&priv->lock);
 | 
			
		||||
 | 
			
		||||
	girq = &priv->chip.irq;
 | 
			
		||||
	girq->chip = &mrfld_irqchip;
 | 
			
		||||
	girq->init_hw = mrfld_irq_init_hw;
 | 
			
		||||
	girq->parent_handler = mrfld_irq_handler;
 | 
			
		||||
	girq->num_parents = 1;
 | 
			
		||||
	girq->parents = devm_kcalloc(&pdev->dev, girq->num_parents,
 | 
			
		||||
				     sizeof(*girq->parents),
 | 
			
		||||
				     GFP_KERNEL);
 | 
			
		||||
	if (!girq->parents)
 | 
			
		||||
		return -ENOMEM;
 | 
			
		||||
	girq->parents[0] = pdev->irq;
 | 
			
		||||
	girq->first = irq_base;
 | 
			
		||||
	girq->default_type = IRQ_TYPE_NONE;
 | 
			
		||||
	girq->handler = handle_bad_irq;
 | 
			
		||||
 | 
			
		||||
	pci_set_drvdata(pdev, priv);
 | 
			
		||||
	retval = devm_gpiochip_add_data(&pdev->dev, &priv->chip, priv);
 | 
			
		||||
	if (retval) {
 | 
			
		||||
@ -484,6 +465,18 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	retval = gpiochip_irqchip_add(&priv->chip, &mrfld_irqchip, irq_base,
 | 
			
		||||
				      handle_bad_irq, IRQ_TYPE_NONE);
 | 
			
		||||
	if (retval) {
 | 
			
		||||
		dev_err(&pdev->dev, "could not connect irqchip to gpiochip\n");
 | 
			
		||||
		return retval;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	mrfld_irq_init_hw(priv);
 | 
			
		||||
 | 
			
		||||
	gpiochip_set_chained_irqchip(&priv->chip, &mrfld_irqchip, pdev->irq,
 | 
			
		||||
				     mrfld_irq_handler);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -604,8 +604,11 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 | 
			
		||||
			continue;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		for (i = 0; i < num_entities; i++)
 | 
			
		||||
		for (i = 0; i < num_entities; i++) {
 | 
			
		||||
			mutex_lock(&ctx->adev->lock_reset);
 | 
			
		||||
			drm_sched_entity_fini(&ctx->entities[0][i].entity);
 | 
			
		||||
			mutex_unlock(&ctx->adev->lock_reset);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -2885,6 +2885,13 @@ fence_driver_init:
 | 
			
		||||
			DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
 | 
			
		||||
	 * Otherwise the mgpu fan boost feature will be skipped due to the
 | 
			
		||||
	 * gpu instance is counted less.
 | 
			
		||||
	 */
 | 
			
		||||
	amdgpu_register_gpu_instance(adev);
 | 
			
		||||
 | 
			
		||||
	/* enable clockgating, etc. after ib tests, etc. since some blocks require
 | 
			
		||||
	 * explicit gating rather than handling it automatically.
 | 
			
		||||
	 */
 | 
			
		||||
 | 
			
		||||
@ -1016,6 +1016,7 @@ static const struct pci_device_id pciidlist[] = {
 | 
			
		||||
	{0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT},
 | 
			
		||||
	{0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT},
 | 
			
		||||
	{0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT},
 | 
			
		||||
	{0x1002, 0x734F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT},
 | 
			
		||||
 | 
			
		||||
	/* Renoir */
 | 
			
		||||
	{0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU|AMD_EXP_HW_SUPPORT},
 | 
			
		||||
 | 
			
		||||
@ -289,6 +289,7 @@ struct amdgpu_gfx {
 | 
			
		||||
	uint32_t			mec2_feature_version;
 | 
			
		||||
	bool				mec_fw_write_wait;
 | 
			
		||||
	bool				me_fw_write_wait;
 | 
			
		||||
	bool				cp_fw_write_wait;
 | 
			
		||||
	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
 | 
			
		||||
	unsigned			num_gfx_rings;
 | 
			
		||||
	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
 | 
			
		||||
 | 
			
		||||
@ -190,7 +190,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
 | 
			
		||||
		pm_runtime_put_autosuspend(dev->dev);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	amdgpu_register_gpu_instance(adev);
 | 
			
		||||
out:
 | 
			
		||||
	if (r) {
 | 
			
		||||
		/* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */
 | 
			
		||||
 | 
			
		||||
@ -950,21 +950,7 @@ static void psp_print_fw_hdr(struct psp_context *psp,
 | 
			
		||||
			     struct amdgpu_firmware_info *ucode)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_device *adev = psp->adev;
 | 
			
		||||
	const struct sdma_firmware_header_v1_0 *sdma_hdr =
 | 
			
		||||
		(const struct sdma_firmware_header_v1_0 *)
 | 
			
		||||
		adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data;
 | 
			
		||||
	const struct gfx_firmware_header_v1_0 *ce_hdr =
 | 
			
		||||
		(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
 | 
			
		||||
	const struct gfx_firmware_header_v1_0 *pfp_hdr =
 | 
			
		||||
		(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
 | 
			
		||||
	const struct gfx_firmware_header_v1_0 *me_hdr =
 | 
			
		||||
		(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
 | 
			
		||||
	const struct gfx_firmware_header_v1_0 *mec_hdr =
 | 
			
		||||
		(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
 | 
			
		||||
	const struct rlc_firmware_header_v2_0 *rlc_hdr =
 | 
			
		||||
		(const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
 | 
			
		||||
	const struct smc_firmware_header_v1_0 *smc_hdr =
 | 
			
		||||
		(const struct smc_firmware_header_v1_0 *)adev->pm.fw->data;
 | 
			
		||||
	struct common_firmware_header *hdr;
 | 
			
		||||
 | 
			
		||||
	switch (ucode->ucode_id) {
 | 
			
		||||
	case AMDGPU_UCODE_ID_SDMA0:
 | 
			
		||||
@ -975,25 +961,33 @@ static void psp_print_fw_hdr(struct psp_context *psp,
 | 
			
		||||
	case AMDGPU_UCODE_ID_SDMA5:
 | 
			
		||||
	case AMDGPU_UCODE_ID_SDMA6:
 | 
			
		||||
	case AMDGPU_UCODE_ID_SDMA7:
 | 
			
		||||
		amdgpu_ucode_print_sdma_hdr(&sdma_hdr->header);
 | 
			
		||||
		hdr = (struct common_firmware_header *)
 | 
			
		||||
			adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data;
 | 
			
		||||
		amdgpu_ucode_print_sdma_hdr(hdr);
 | 
			
		||||
		break;
 | 
			
		||||
	case AMDGPU_UCODE_ID_CP_CE:
 | 
			
		||||
		amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
 | 
			
		||||
		hdr = (struct common_firmware_header *)adev->gfx.ce_fw->data;
 | 
			
		||||
		amdgpu_ucode_print_gfx_hdr(hdr);
 | 
			
		||||
		break;
 | 
			
		||||
	case AMDGPU_UCODE_ID_CP_PFP:
 | 
			
		||||
		amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
 | 
			
		||||
		hdr = (struct common_firmware_header *)adev->gfx.pfp_fw->data;
 | 
			
		||||
		amdgpu_ucode_print_gfx_hdr(hdr);
 | 
			
		||||
		break;
 | 
			
		||||
	case AMDGPU_UCODE_ID_CP_ME:
 | 
			
		||||
		amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
 | 
			
		||||
		hdr = (struct common_firmware_header *)adev->gfx.me_fw->data;
 | 
			
		||||
		amdgpu_ucode_print_gfx_hdr(hdr);
 | 
			
		||||
		break;
 | 
			
		||||
	case AMDGPU_UCODE_ID_CP_MEC1:
 | 
			
		||||
		amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
 | 
			
		||||
		hdr = (struct common_firmware_header *)adev->gfx.mec_fw->data;
 | 
			
		||||
		amdgpu_ucode_print_gfx_hdr(hdr);
 | 
			
		||||
		break;
 | 
			
		||||
	case AMDGPU_UCODE_ID_RLC_G:
 | 
			
		||||
		amdgpu_ucode_print_rlc_hdr(&rlc_hdr->header);
 | 
			
		||||
		hdr = (struct common_firmware_header *)adev->gfx.rlc_fw->data;
 | 
			
		||||
		amdgpu_ucode_print_rlc_hdr(hdr);
 | 
			
		||||
		break;
 | 
			
		||||
	case AMDGPU_UCODE_ID_SMC:
 | 
			
		||||
		amdgpu_ucode_print_smc_hdr(&smc_hdr->header);
 | 
			
		||||
		hdr = (struct common_firmware_header *)adev->pm.fw->data;
 | 
			
		||||
		amdgpu_ucode_print_smc_hdr(hdr);
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		break;
 | 
			
		||||
 | 
			
		||||
@ -564,6 +564,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
 | 
			
		||||
	kfree(adev->gfx.rlc.register_list_format);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	adev->gfx.cp_fw_write_wait = false;
 | 
			
		||||
 | 
			
		||||
	switch (adev->asic_type) {
 | 
			
		||||
	case CHIP_NAVI10:
 | 
			
		||||
	case CHIP_NAVI12:
 | 
			
		||||
	case CHIP_NAVI14:
 | 
			
		||||
		if ((adev->gfx.me_fw_version >= 0x00000046) &&
 | 
			
		||||
		    (adev->gfx.me_feature_version >= 27) &&
 | 
			
		||||
		    (adev->gfx.pfp_fw_version >= 0x00000068) &&
 | 
			
		||||
		    (adev->gfx.pfp_feature_version >= 27) &&
 | 
			
		||||
		    (adev->gfx.mec_fw_version >= 0x0000005b) &&
 | 
			
		||||
		    (adev->gfx.mec_feature_version >= 27))
 | 
			
		||||
			adev->gfx.cp_fw_write_wait = true;
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (adev->gfx.cp_fw_write_wait == false)
 | 
			
		||||
		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
 | 
			
		||||
			      GRBM requires 1-cycle delay in cp firmware\n");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
 | 
			
		||||
{
 | 
			
		||||
	const struct rlc_firmware_header_v2_1 *rlc_hdr;
 | 
			
		||||
@ -832,6 +858,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	gfx_v10_0_check_fw_write_wait(adev);
 | 
			
		||||
out:
 | 
			
		||||
	if (err) {
 | 
			
		||||
		dev_err(adev->dev,
 | 
			
		||||
@ -4765,6 +4792,24 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
 | 
			
		||||
	gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
 | 
			
		||||
						   uint32_t reg0, uint32_t reg1,
 | 
			
		||||
						   uint32_t ref, uint32_t mask)
 | 
			
		||||
{
 | 
			
		||||
	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
 | 
			
		||||
	struct amdgpu_device *adev = ring->adev;
 | 
			
		||||
	bool fw_version_ok = false;
 | 
			
		||||
 | 
			
		||||
	fw_version_ok = adev->gfx.cp_fw_write_wait;
 | 
			
		||||
 | 
			
		||||
	if (fw_version_ok)
 | 
			
		||||
		gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
 | 
			
		||||
				       ref, mask, 0x20);
 | 
			
		||||
	else
 | 
			
		||||
		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
 | 
			
		||||
							   ref, mask);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void
 | 
			
		||||
gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
 | 
			
		||||
				      uint32_t me, uint32_t pipe,
 | 
			
		||||
@ -5155,6 +5200,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
 | 
			
		||||
	.emit_tmz = gfx_v10_0_ring_emit_tmz,
 | 
			
		||||
	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 | 
			
		||||
	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
 | 
			
		||||
	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
 | 
			
		||||
@ -5188,6 +5234,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
 | 
			
		||||
	.pad_ib = amdgpu_ring_generic_pad_ib,
 | 
			
		||||
	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 | 
			
		||||
	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
 | 
			
		||||
	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
 | 
			
		||||
@ -5218,6 +5265,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
 | 
			
		||||
	.emit_rreg = gfx_v10_0_ring_emit_rreg,
 | 
			
		||||
	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 | 
			
		||||
	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
 | 
			
		||||
	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
 | 
			
		||||
 | 
			
		||||
@ -973,6 +973,13 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
 | 
			
		||||
	adev->gfx.me_fw_write_wait = false;
 | 
			
		||||
	adev->gfx.mec_fw_write_wait = false;
 | 
			
		||||
 | 
			
		||||
	if ((adev->gfx.mec_fw_version < 0x000001a5) ||
 | 
			
		||||
	    (adev->gfx.mec_feature_version < 46) ||
 | 
			
		||||
	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
 | 
			
		||||
	    (adev->gfx.pfp_feature_version < 46))
 | 
			
		||||
		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
 | 
			
		||||
			      GRBM requires 1-cycle delay in cp firmware\n");
 | 
			
		||||
 | 
			
		||||
	switch (adev->asic_type) {
 | 
			
		||||
	case CHIP_VEGA10:
 | 
			
		||||
		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
 | 
			
		||||
@ -1039,6 +1046,12 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
 | 
			
		||||
			    !adev->gfx.rlc.is_rlc_v2_1))
 | 
			
		||||
			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
 | 
			
		||||
 | 
			
		||||
		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
 | 
			
		||||
			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
 | 
			
		||||
				AMD_PG_SUPPORT_CP |
 | 
			
		||||
				AMD_PG_SUPPORT_RLC_SMU_HS;
 | 
			
		||||
		break;
 | 
			
		||||
	case CHIP_RENOIR:
 | 
			
		||||
		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
 | 
			
		||||
			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
 | 
			
		||||
				AMD_PG_SUPPORT_CP |
 | 
			
		||||
 | 
			
		||||
@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 | 
			
		||||
	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
 | 
			
		||||
			      upper_32_bits(pd_addr));
 | 
			
		||||
 | 
			
		||||
	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
 | 
			
		||||
 | 
			
		||||
	/* wait for the invalidate to complete */
 | 
			
		||||
	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
 | 
			
		||||
				  1 << vmid, 1 << vmid);
 | 
			
		||||
	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
 | 
			
		||||
					    hub->vm_inv_eng0_ack + eng,
 | 
			
		||||
					    req, 1 << vmid);
 | 
			
		||||
 | 
			
		||||
	return pd_addr;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -219,6 +219,15 @@ static void mmhub_v9_4_init_cache_regs(struct amdgpu_device *adev, int hubid)
 | 
			
		||||
			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
 | 
			
		||||
 | 
			
		||||
	tmp = mmVML2PF0_VM_L2_CNTL3_DEFAULT;
 | 
			
		||||
	if (adev->gmc.translate_further) {
 | 
			
		||||
		tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, BANK_SELECT, 12);
 | 
			
		||||
		tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3,
 | 
			
		||||
				    L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
 | 
			
		||||
	} else {
 | 
			
		||||
		tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, BANK_SELECT, 9);
 | 
			
		||||
		tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3,
 | 
			
		||||
				    L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
 | 
			
		||||
	}
 | 
			
		||||
	WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3,
 | 
			
		||||
			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -1173,6 +1173,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
 | 
			
		||||
			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
 | 
			
		||||
						   uint32_t reg0, uint32_t reg1,
 | 
			
		||||
						   uint32_t ref, uint32_t mask)
 | 
			
		||||
{
 | 
			
		||||
	amdgpu_ring_emit_wreg(ring, reg0, ref);
 | 
			
		||||
	/* wait for a cycle to reset vm_inv_eng*_ack */
 | 
			
		||||
	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
 | 
			
		||||
	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int sdma_v5_0_early_init(void *handle)
 | 
			
		||||
{
 | 
			
		||||
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 | 
			
		||||
@ -1588,7 +1598,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
 | 
			
		||||
		6 + /* sdma_v5_0_ring_emit_pipeline_sync */
 | 
			
		||||
		/* sdma_v5_0_ring_emit_vm_flush */
 | 
			
		||||
		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
 | 
			
		||||
		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
 | 
			
		||||
		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
 | 
			
		||||
		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
 | 
			
		||||
	.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
 | 
			
		||||
	.emit_ib = sdma_v5_0_ring_emit_ib,
 | 
			
		||||
@ -1602,6 +1612,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
 | 
			
		||||
	.pad_ib = sdma_v5_0_ring_pad_ib,
 | 
			
		||||
	.emit_wreg = sdma_v5_0_ring_emit_wreg,
 | 
			
		||||
	.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
 | 
			
		||||
	.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
 | 
			
		||||
	.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
 | 
			
		||||
	.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
 | 
			
		||||
	.preempt_ib = sdma_v5_0_ring_preempt_ib,
 | 
			
		||||
 | 
			
		||||
@ -1186,11 +1186,6 @@ static int soc15_common_early_init(void *handle)
 | 
			
		||||
				 AMD_PG_SUPPORT_VCN |
 | 
			
		||||
				 AMD_PG_SUPPORT_VCN_DPG;
 | 
			
		||||
		adev->external_rev_id = adev->rev_id + 0x91;
 | 
			
		||||
 | 
			
		||||
		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
 | 
			
		||||
			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
 | 
			
		||||
				AMD_PG_SUPPORT_CP |
 | 
			
		||||
				AMD_PG_SUPPORT_RLC_SMU_HS;
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		/* FIXME: not supported yet */
 | 
			
		||||
 | 
			
		||||
@ -2767,15 +2767,6 @@ void core_link_enable_stream(
 | 
			
		||||
					CONTROLLER_DP_TEST_PATTERN_VIDEOMODE,
 | 
			
		||||
					COLOR_DEPTH_UNDEFINED);
 | 
			
		||||
 | 
			
		||||
		/* This second call is needed to reconfigure the DIG
 | 
			
		||||
		 * as a workaround for the incorrect value being applied
 | 
			
		||||
		 * from transmitter control.
 | 
			
		||||
		 */
 | 
			
		||||
		if (!dc_is_virtual_signal(pipe_ctx->stream->signal))
 | 
			
		||||
			stream->link->link_enc->funcs->setup(
 | 
			
		||||
				stream->link->link_enc,
 | 
			
		||||
				pipe_ctx->stream->signal);
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
 | 
			
		||||
		if (pipe_ctx->stream->timing.flags.DSC) {
 | 
			
		||||
			if (dc_is_dp_signal(pipe_ctx->stream->signal) ||
 | 
			
		||||
 | 
			
		||||
@ -1107,6 +1107,11 @@ struct stream_encoder *dcn20_stream_encoder_create(
 | 
			
		||||
	if (!enc1)
 | 
			
		||||
		return NULL;
 | 
			
		||||
 | 
			
		||||
	if (ASICREV_IS_NAVI14_M(ctx->asic_id.hw_internal_rev)) {
 | 
			
		||||
		if (eng_id >= ENGINE_ID_DIGD)
 | 
			
		||||
			eng_id++;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	dcn20_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id,
 | 
			
		||||
					&stream_enc_regs[eng_id],
 | 
			
		||||
					&se_shift, &se_mask);
 | 
			
		||||
 | 
			
		||||
@ -205,7 +205,7 @@ static struct smu_11_0_cmn2aisc_mapping navi10_workload_map[PP_SMC_POWER_PROFILE
 | 
			
		||||
	WORKLOAD_MAP(PP_SMC_POWER_PROFILE_POWERSAVING,		WORKLOAD_PPLIB_POWER_SAVING_BIT),
 | 
			
		||||
	WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VIDEO,		WORKLOAD_PPLIB_VIDEO_BIT),
 | 
			
		||||
	WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VR,			WORKLOAD_PPLIB_VR_BIT),
 | 
			
		||||
	WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE,		WORKLOAD_PPLIB_CUSTOM_BIT),
 | 
			
		||||
	WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE,		WORKLOAD_PPLIB_COMPUTE_BIT),
 | 
			
		||||
	WORKLOAD_MAP(PP_SMC_POWER_PROFILE_CUSTOM,		WORKLOAD_PPLIB_CUSTOM_BIT),
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -219,7 +219,7 @@ static struct smu_11_0_cmn2aisc_mapping vega20_workload_map[PP_SMC_POWER_PROFILE
 | 
			
		||||
	WORKLOAD_MAP(PP_SMC_POWER_PROFILE_POWERSAVING,		WORKLOAD_PPLIB_POWER_SAVING_BIT),
 | 
			
		||||
	WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VIDEO,		WORKLOAD_PPLIB_VIDEO_BIT),
 | 
			
		||||
	WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VR,			WORKLOAD_PPLIB_VR_BIT),
 | 
			
		||||
	WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE,		WORKLOAD_PPLIB_CUSTOM_BIT),
 | 
			
		||||
	WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE,		WORKLOAD_PPLIB_COMPUTE_BIT),
 | 
			
		||||
	WORKLOAD_MAP(PP_SMC_POWER_PROFILE_CUSTOM,		WORKLOAD_PPLIB_CUSTOM_BIT),
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -1581,8 +1581,11 @@ static void commit_tail(struct drm_atomic_state *old_state)
 | 
			
		||||
{
 | 
			
		||||
	struct drm_device *dev = old_state->dev;
 | 
			
		||||
	const struct drm_mode_config_helper_funcs *funcs;
 | 
			
		||||
	struct drm_crtc_state *new_crtc_state;
 | 
			
		||||
	struct drm_crtc *crtc;
 | 
			
		||||
	ktime_t start;
 | 
			
		||||
	s64 commit_time_ms;
 | 
			
		||||
	unsigned int i, new_self_refresh_mask = 0;
 | 
			
		||||
 | 
			
		||||
	funcs = dev->mode_config.helper_private;
 | 
			
		||||
 | 
			
		||||
@ -1602,6 +1605,15 @@ static void commit_tail(struct drm_atomic_state *old_state)
 | 
			
		||||
 | 
			
		||||
	drm_atomic_helper_wait_for_dependencies(old_state);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * We cannot safely access new_crtc_state after
 | 
			
		||||
	 * drm_atomic_helper_commit_hw_done() so figure out which crtc's have
 | 
			
		||||
	 * self-refresh active beforehand:
 | 
			
		||||
	 */
 | 
			
		||||
	for_each_new_crtc_in_state(old_state, crtc, new_crtc_state, i)
 | 
			
		||||
		if (new_crtc_state->self_refresh_active)
 | 
			
		||||
			new_self_refresh_mask |= BIT(i);
 | 
			
		||||
 | 
			
		||||
	if (funcs && funcs->atomic_commit_tail)
 | 
			
		||||
		funcs->atomic_commit_tail(old_state);
 | 
			
		||||
	else
 | 
			
		||||
@ -1610,7 +1622,8 @@ static void commit_tail(struct drm_atomic_state *old_state)
 | 
			
		||||
	commit_time_ms = ktime_ms_delta(ktime_get(), start);
 | 
			
		||||
	if (commit_time_ms > 0)
 | 
			
		||||
		drm_self_refresh_helper_update_avg_times(old_state,
 | 
			
		||||
						 (unsigned long)commit_time_ms);
 | 
			
		||||
						 (unsigned long)commit_time_ms,
 | 
			
		||||
						 new_self_refresh_mask);
 | 
			
		||||
 | 
			
		||||
	drm_atomic_helper_commit_cleanup_done(old_state);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -133,29 +133,33 @@ out_drop_locks:
 | 
			
		||||
 * drm_self_refresh_helper_update_avg_times - Updates a crtc's SR time averages
 | 
			
		||||
 * @state: the state which has just been applied to hardware
 | 
			
		||||
 * @commit_time_ms: the amount of time in ms that this commit took to complete
 | 
			
		||||
 * @new_self_refresh_mask: bitmask of crtc's that have self_refresh_active in
 | 
			
		||||
 *    new state
 | 
			
		||||
 *
 | 
			
		||||
 * Called after &drm_mode_config_funcs.atomic_commit_tail, this function will
 | 
			
		||||
 * update the average entry/exit self refresh times on self refresh transitions.
 | 
			
		||||
 * These averages will be used when calculating how long to delay before
 | 
			
		||||
 * entering self refresh mode after activity.
 | 
			
		||||
 */
 | 
			
		||||
void drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state,
 | 
			
		||||
					      unsigned int commit_time_ms)
 | 
			
		||||
void
 | 
			
		||||
drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state,
 | 
			
		||||
					 unsigned int commit_time_ms,
 | 
			
		||||
					 unsigned int new_self_refresh_mask)
 | 
			
		||||
{
 | 
			
		||||
	struct drm_crtc *crtc;
 | 
			
		||||
	struct drm_crtc_state *old_crtc_state, *new_crtc_state;
 | 
			
		||||
	struct drm_crtc_state *old_crtc_state;
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
 | 
			
		||||
				      new_crtc_state, i) {
 | 
			
		||||
	for_each_old_crtc_in_state(state, crtc, old_crtc_state, i) {
 | 
			
		||||
		bool new_self_refresh_active = new_self_refresh_mask & BIT(i);
 | 
			
		||||
		struct drm_self_refresh_data *sr_data = crtc->self_refresh_data;
 | 
			
		||||
		struct ewma_psr_time *time;
 | 
			
		||||
 | 
			
		||||
		if (old_crtc_state->self_refresh_active ==
 | 
			
		||||
		    new_crtc_state->self_refresh_active)
 | 
			
		||||
		    new_self_refresh_active)
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		if (new_crtc_state->self_refresh_active)
 | 
			
		||||
		if (new_self_refresh_active)
 | 
			
		||||
			time = &sr_data->entry_avg_ms;
 | 
			
		||||
		else
 | 
			
		||||
			time = &sr_data->exit_avg_ms;
 | 
			
		||||
 | 
			
		||||
Some files were not shown because too many files have changed in this diff Show More
		Loading…
	
		Reference in New Issue
	
	Block a user