x86:
- Support for userspace to emulate Xen hypercalls - Raise the maximum number of user memslots - Scalability improvements for the new MMU. Instead of the complex "fast page fault" logic that is used in mmu.c, tdp_mmu.c uses an rwlock so that page faults are concurrent, but the code that can run against page faults is limited. Right now only page faults take the lock for reading; in the future this will be extended to some cases of page table destruction. I hope to switch the default MMU around 5.12-rc3 (some testing was delayed due to Chinese New Year). - Cleanups for MAXPHYADDR checks - Use static calls for vendor-specific callbacks - On AMD, use VMLOAD/VMSAVE to save and restore host state - Stop using deprecated jump label APIs - Workaround for AMD erratum that made nested virtualization unreliable - Support for LBR emulation in the guest - Support for communicating bus lock vmexits to userspace - Add support for SEV attestation command - Miscellaneous cleanups PPC: - Support for second data watchpoint on POWER10 - Remove some complex workarounds for buggy early versions of POWER9 - Guest entry/exit fixes ARM64 - Make the nVHE EL2 object relocatable - Cleanups for concurrent translation faults hitting the same page - Support for the standard TRNG hypervisor call - A bunch of small PMU/Debug fixes - Simplification of the early init hypercall handling Non-KVM changes (with acks): - Detection of contended rwlocks (implemented only for qrwlocks, because KVM only needs it for x86) - Allow __DISABLE_EXPORTS from assembly code - Provide a saner follow_pfn replacements for modules -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmApSRgUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroOc7wf9FnlinKoTFaSk7oeuuhF/CoCVwSFs Z9+A2sNI99tWHQxFR6dyDkEFeQoXnqSxfLHtUVIdH/JnTg0FkEvFz3NK+0PzY1PF PnGNbSoyhP58mSBG4gbBAxdF3ZJZMB8GBgYPeR62PvMX2dYbcHqVBNhlf6W4MQK4 5mAUuAnbf19O5N267sND+sIg3wwJYwOZpRZB7PlwvfKAGKf18gdBz5dQ/6Ej+apf P7GODZITjqM5Iho7SDm/sYJlZprFZT81KqffwJQHWFMEcxFgwzrnYPx7J3gFwRTR eeh9E61eCBDyCTPpHROLuNTVBqrAioCqXLdKOtO5gKvZI3zmomvAsZ8uXQ== =uFZU -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull KVM updates from Paolo Bonzini: "x86: - Support for userspace to emulate Xen hypercalls - Raise the maximum number of user memslots - Scalability improvements for the new MMU. Instead of the complex "fast page fault" logic that is used in mmu.c, tdp_mmu.c uses an rwlock so that page faults are concurrent, but the code that can run against page faults is limited. Right now only page faults take the lock for reading; in the future this will be extended to some cases of page table destruction. I hope to switch the default MMU around 5.12-rc3 (some testing was delayed due to Chinese New Year). - Cleanups for MAXPHYADDR checks - Use static calls for vendor-specific callbacks - On AMD, use VMLOAD/VMSAVE to save and restore host state - Stop using deprecated jump label APIs - Workaround for AMD erratum that made nested virtualization unreliable - Support for LBR emulation in the guest - Support for communicating bus lock vmexits to userspace - Add support for SEV attestation command - Miscellaneous cleanups PPC: - Support for second data watchpoint on POWER10 - Remove some complex workarounds for buggy early versions of POWER9 - Guest entry/exit fixes ARM64: - Make the nVHE EL2 object relocatable - Cleanups for concurrent translation faults hitting the same page - Support for the standard TRNG hypervisor call - A bunch of small PMU/Debug fixes - Simplification of the early init hypercall handling Non-KVM changes (with acks): - Detection of contended rwlocks (implemented only for qrwlocks, because KVM only needs it for x86) - Allow __DISABLE_EXPORTS from assembly code - Provide a saner follow_pfn replacements for modules" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (192 commits) KVM: x86/xen: Explicitly pad struct compat_vcpu_info to 64 bytes KVM: selftests: Don't bother mapping GVA for Xen shinfo test KVM: selftests: Fix hex vs. decimal snafu in Xen test KVM: selftests: Fix size of memslots created by Xen tests KVM: selftests: Ignore recently added Xen tests' build output KVM: selftests: Add missing header file needed by xAPIC IPI tests KVM: selftests: Add operand to vmsave/vmload/vmrun in svm.c KVM: SVM: Make symbol 'svm_gp_erratum_intercept' static locking/arch: Move qrwlock.h include after qspinlock.h KVM: PPC: Book3S HV: Fix host radix SLB optimisation with hash guests KVM: PPC: Book3S HV: Ensure radix guest has no SLB entries KVM: PPC: Don't always report hash MMU capability for P9 < DD2.2 KVM: PPC: Book3S HV: Save and restore FSCR in the P9 path KVM: PPC: remove unneeded semicolon KVM: PPC: Book3S HV: Use POWER9 SLBIA IH=6 variant to clear SLB KVM: PPC: Book3S HV: No need to clear radix host SLB before loading HPT guest KVM: PPC: Book3S HV: Fix radix guest SLB side channel KVM: PPC: Book3S HV: Remove support for running HPT guest on RPT host without mixed mode support KVM: PPC: Book3S HV: Introduce new capability for 2nd DAWR KVM: PPC: Book3S HV: Add infrastructure to support 2nd DAWR ...
This commit is contained in:
commit
3e10585335
@ -263,6 +263,27 @@ Returns: 0 on success, -negative on error
|
||||
__u32 trans_len;
|
||||
};
|
||||
|
||||
10. KVM_SEV_GET_ATTESTATION_REPORT
|
||||
----------------------------------
|
||||
|
||||
The KVM_SEV_GET_ATTESTATION_REPORT command can be used by the hypervisor to query the attestation
|
||||
report containing the SHA-256 digest of the guest memory and VMSA passed through the KVM_SEV_LAUNCH
|
||||
commands and signed with the PEK. The digest returned by the command should match the digest
|
||||
used by the guest owner with the KVM_SEV_LAUNCH_MEASURE.
|
||||
|
||||
Parameters (in): struct kvm_sev_attestation
|
||||
|
||||
Returns: 0 on success, -negative on error
|
||||
|
||||
::
|
||||
|
||||
struct kvm_sev_attestation_report {
|
||||
__u8 mnonce[16]; /* A random mnonce that will be placed in the report */
|
||||
|
||||
__u64 uaddr; /* userspace address where the report should be copied */
|
||||
__u32 len;
|
||||
};
|
||||
|
||||
References
|
||||
==========
|
||||
|
||||
|
@ -960,6 +960,14 @@ memory.
|
||||
__u8 pad2[30];
|
||||
};
|
||||
|
||||
If the KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL flag is returned from the
|
||||
KVM_CAP_XEN_HVM check, it may be set in the flags field of this ioctl.
|
||||
This requests KVM to generate the contents of the hypercall page
|
||||
automatically; hypercalls will be intercepted and passed to userspace
|
||||
through KVM_EXIT_XEN. In this case, all of the blob size and address
|
||||
fields must be zero.
|
||||
|
||||
No other flags are currently valid in the struct kvm_xen_hvm_config.
|
||||
|
||||
4.29 KVM_GET_CLOCK
|
||||
------------------
|
||||
@ -2268,6 +2276,8 @@ registers, find a list below:
|
||||
PPC KVM_REG_PPC_PSSCR 64
|
||||
PPC KVM_REG_PPC_DEC_EXPIRY 64
|
||||
PPC KVM_REG_PPC_PTCR 64
|
||||
PPC KVM_REG_PPC_DAWR1 64
|
||||
PPC KVM_REG_PPC_DAWRX1 64
|
||||
PPC KVM_REG_PPC_TM_GPR0 64
|
||||
...
|
||||
PPC KVM_REG_PPC_TM_GPR31 64
|
||||
@ -4831,6 +4841,101 @@ into user space.
|
||||
If a vCPU is in running state while this ioctl is invoked, the vCPU may
|
||||
experience inconsistent filtering behavior on MSR accesses.
|
||||
|
||||
4.127 KVM_XEN_HVM_SET_ATTR
|
||||
--------------------------
|
||||
|
||||
:Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO
|
||||
:Architectures: x86
|
||||
:Type: vm ioctl
|
||||
:Parameters: struct kvm_xen_hvm_attr
|
||||
:Returns: 0 on success, < 0 on error
|
||||
|
||||
::
|
||||
|
||||
struct kvm_xen_hvm_attr {
|
||||
__u16 type;
|
||||
__u16 pad[3];
|
||||
union {
|
||||
__u8 long_mode;
|
||||
__u8 vector;
|
||||
struct {
|
||||
__u64 gfn;
|
||||
} shared_info;
|
||||
__u64 pad[4];
|
||||
} u;
|
||||
};
|
||||
|
||||
type values:
|
||||
|
||||
KVM_XEN_ATTR_TYPE_LONG_MODE
|
||||
Sets the ABI mode of the VM to 32-bit or 64-bit (long mode). This
|
||||
determines the layout of the shared info pages exposed to the VM.
|
||||
|
||||
KVM_XEN_ATTR_TYPE_SHARED_INFO
|
||||
Sets the guest physical frame number at which the Xen "shared info"
|
||||
page resides. Note that although Xen places vcpu_info for the first
|
||||
32 vCPUs in the shared_info page, KVM does not automatically do so
|
||||
and instead requires that KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO be used
|
||||
explicitly even when the vcpu_info for a given vCPU resides at the
|
||||
"default" location in the shared_info page. This is because KVM is
|
||||
not aware of the Xen CPU id which is used as the index into the
|
||||
vcpu_info[] array, so cannot know the correct default location.
|
||||
|
||||
KVM_XEN_ATTR_TYPE_UPCALL_VECTOR
|
||||
Sets the exception vector used to deliver Xen event channel upcalls.
|
||||
|
||||
4.128 KVM_XEN_HVM_GET_ATTR
|
||||
--------------------------
|
||||
|
||||
:Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO
|
||||
:Architectures: x86
|
||||
:Type: vm ioctl
|
||||
:Parameters: struct kvm_xen_hvm_attr
|
||||
:Returns: 0 on success, < 0 on error
|
||||
|
||||
Allows Xen VM attributes to be read. For the structure and types,
|
||||
see KVM_XEN_HVM_SET_ATTR above.
|
||||
|
||||
4.129 KVM_XEN_VCPU_SET_ATTR
|
||||
---------------------------
|
||||
|
||||
:Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO
|
||||
:Architectures: x86
|
||||
:Type: vcpu ioctl
|
||||
:Parameters: struct kvm_xen_vcpu_attr
|
||||
:Returns: 0 on success, < 0 on error
|
||||
|
||||
::
|
||||
|
||||
struct kvm_xen_vcpu_attr {
|
||||
__u16 type;
|
||||
__u16 pad[3];
|
||||
union {
|
||||
__u64 gpa;
|
||||
__u64 pad[4];
|
||||
} u;
|
||||
};
|
||||
|
||||
type values:
|
||||
|
||||
KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO
|
||||
Sets the guest physical address of the vcpu_info for a given vCPU.
|
||||
|
||||
KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
|
||||
Sets the guest physical address of an additional pvclock structure
|
||||
for a given vCPU. This is typically used for guest vsyscall support.
|
||||
|
||||
4.130 KVM_XEN_VCPU_GET_ATTR
|
||||
---------------------------
|
||||
|
||||
:Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO
|
||||
:Architectures: x86
|
||||
:Type: vcpu ioctl
|
||||
:Parameters: struct kvm_xen_vcpu_attr
|
||||
:Returns: 0 on success, < 0 on error
|
||||
|
||||
Allows Xen vCPU attributes to be read. For the structure and types,
|
||||
see KVM_XEN_VCPU_SET_ATTR above.
|
||||
|
||||
5. The kvm_run structure
|
||||
========================
|
||||
@ -4893,9 +4998,11 @@ local APIC is not used.
|
||||
__u16 flags;
|
||||
|
||||
More architecture-specific flags detailing state of the VCPU that may
|
||||
affect the device's behavior. The only currently defined flag is
|
||||
KVM_RUN_X86_SMM, which is valid on x86 machines and is set if the
|
||||
VCPU is in system management mode.
|
||||
affect the device's behavior. Current defined flags:
|
||||
/* x86, set if the VCPU is in system management mode */
|
||||
#define KVM_RUN_X86_SMM (1 << 0)
|
||||
/* x86, set if bus lock detected in VM */
|
||||
#define KVM_RUN_BUS_LOCK (1 << 1)
|
||||
|
||||
::
|
||||
|
||||
@ -4996,13 +5103,18 @@ to the byte array.
|
||||
|
||||
.. note::
|
||||
|
||||
For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR,
|
||||
For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR, KVM_EXIT_XEN,
|
||||
KVM_EXIT_EPR, KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding
|
||||
operations are complete (and guest state is consistent) only after userspace
|
||||
has re-entered the kernel with KVM_RUN. The kernel side will first finish
|
||||
incomplete operations and then check for pending signals. Userspace
|
||||
can re-enter the guest with an unmasked signal pending to complete
|
||||
pending operations.
|
||||
incomplete operations and then check for pending signals.
|
||||
|
||||
The pending state of the operation is not preserved in state which is
|
||||
visible to userspace, thus userspace should ensure that the operation is
|
||||
completed before performing a live migration. Userspace can re-enter the
|
||||
guest with an unmasked signal pending or with the immediate_exit field set
|
||||
to complete pending operations without allowing any further instructions
|
||||
to be executed.
|
||||
|
||||
::
|
||||
|
||||
@ -5327,6 +5439,34 @@ wants to write. Once finished processing the event, user space must continue
|
||||
vCPU execution. If the MSR write was unsuccessful, user space also sets the
|
||||
"error" field to "1".
|
||||
|
||||
::
|
||||
|
||||
|
||||
struct kvm_xen_exit {
|
||||
#define KVM_EXIT_XEN_HCALL 1
|
||||
__u32 type;
|
||||
union {
|
||||
struct {
|
||||
__u32 longmode;
|
||||
__u32 cpl;
|
||||
__u64 input;
|
||||
__u64 result;
|
||||
__u64 params[6];
|
||||
} hcall;
|
||||
} u;
|
||||
};
|
||||
/* KVM_EXIT_XEN */
|
||||
struct kvm_hyperv_exit xen;
|
||||
|
||||
Indicates that the VCPU exits into userspace to process some tasks
|
||||
related to Xen emulation.
|
||||
|
||||
Valid values for 'type' are:
|
||||
|
||||
- KVM_EXIT_XEN_HCALL -- synchronously notify user-space about Xen hypercall.
|
||||
Userspace is expected to place the hypercall result into the appropriate
|
||||
field before invoking KVM_RUN again.
|
||||
|
||||
::
|
||||
|
||||
/* Fix the size of the union. */
|
||||
@ -6038,6 +6178,53 @@ KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications which user space
|
||||
can then handle to implement model specific MSR handling and/or user notifications
|
||||
to inform a user that an MSR was not handled.
|
||||
|
||||
7.22 KVM_CAP_X86_BUS_LOCK_EXIT
|
||||
-------------------------------
|
||||
|
||||
:Architectures: x86
|
||||
:Target: VM
|
||||
:Parameters: args[0] defines the policy used when bus locks detected in guest
|
||||
:Returns: 0 on success, -EINVAL when args[0] contains invalid bits
|
||||
|
||||
Valid bits in args[0] are::
|
||||
|
||||
#define KVM_BUS_LOCK_DETECTION_OFF (1 << 0)
|
||||
#define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1)
|
||||
|
||||
Enabling this capability on a VM provides userspace with a way to select
|
||||
a policy to handle the bus locks detected in guest. Userspace can obtain
|
||||
the supported modes from the result of KVM_CHECK_EXTENSION and define it
|
||||
through the KVM_ENABLE_CAP.
|
||||
|
||||
KVM_BUS_LOCK_DETECTION_OFF and KVM_BUS_LOCK_DETECTION_EXIT are supported
|
||||
currently and mutually exclusive with each other. More bits can be added in
|
||||
the future.
|
||||
|
||||
With KVM_BUS_LOCK_DETECTION_OFF set, bus locks in guest will not cause vm exits
|
||||
so that no additional actions are needed. This is the default mode.
|
||||
|
||||
With KVM_BUS_LOCK_DETECTION_EXIT set, vm exits happen when bus lock detected
|
||||
in VM. KVM just exits to userspace when handling them. Userspace can enforce
|
||||
its own throttling or other policy based mitigations.
|
||||
|
||||
This capability is aimed to address the thread that VM can exploit bus locks to
|
||||
degree the performance of the whole system. Once the userspace enable this
|
||||
capability and select the KVM_BUS_LOCK_DETECTION_EXIT mode, KVM will set the
|
||||
KVM_RUN_BUS_LOCK flag in vcpu-run->flags field and exit to userspace. Concerning
|
||||
the bus lock vm exit can be preempted by a higher priority VM exit, the exit
|
||||
notifications to userspace can be KVM_EXIT_BUS_LOCK or other reasons.
|
||||
KVM_RUN_BUS_LOCK flag is used to distinguish between them.
|
||||
|
||||
7.22 KVM_CAP_PPC_DAWR1
|
||||
----------------------
|
||||
|
||||
:Architectures: ppc
|
||||
:Parameters: none
|
||||
:Returns: 0 on success, -EINVAL when CPU doesn't support 2nd DAWR
|
||||
|
||||
This capability can be used to check / enable 2nd DAWR feature provided
|
||||
by POWER10 processor.
|
||||
|
||||
8. Other capabilities.
|
||||
======================
|
||||
|
||||
@ -6415,7 +6602,6 @@ guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf
|
||||
(0x40000001). Otherwise, a guest may use the paravirtual features
|
||||
regardless of what has actually been exposed through the CPUID leaf.
|
||||
|
||||
|
||||
8.29 KVM_CAP_DIRTY_LOG_RING
|
||||
---------------------------
|
||||
|
||||
@ -6502,3 +6688,29 @@ KVM_GET_DIRTY_LOG and KVM_CLEAR_DIRTY_LOG. After enabling
|
||||
KVM_CAP_DIRTY_LOG_RING with an acceptable dirty ring size, the virtual
|
||||
machine will switch to ring-buffer dirty page tracking and further
|
||||
KVM_GET_DIRTY_LOG or KVM_CLEAR_DIRTY_LOG ioctls will fail.
|
||||
|
||||
8.30 KVM_CAP_XEN_HVM
|
||||
--------------------
|
||||
|
||||
:Architectures: x86
|
||||
|
||||
This capability indicates the features that Xen supports for hosting Xen
|
||||
PVHVM guests. Valid flags are::
|
||||
|
||||
#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
|
||||
#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
|
||||
#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
|
||||
|
||||
The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
|
||||
ioctl is available, for the guest to set its hypercall page.
|
||||
|
||||
If KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL is also set, the same flag may also be
|
||||
provided in the flags to KVM_XEN_HVM_CONFIG, without providing hypercall page
|
||||
contents, to request that KVM generate hypercall page content automatically
|
||||
and also enable interception of guest hypercalls with KVM_EXIT_XEN.
|
||||
|
||||
The KVM_XEN_HVM_CONFIG_SHARED_INFO flag indicates the availability of the
|
||||
KVM_XEN_HVM_SET_ATTR, KVM_XEN_HVM_GET_ATTR, KVM_XEN_VCPU_SET_ATTR and
|
||||
KVM_XEN_VCPU_GET_ATTR ioctls, as well as the delivery of exception vectors
|
||||
for event channel upcalls when the evtchn_upcall_pending field of a vcpu's
|
||||
vcpu_info is set.
|
||||
|
@ -16,7 +16,14 @@ The acquisition orders for mutexes are as follows:
|
||||
- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
|
||||
them together is quite rare.
|
||||
|
||||
On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
|
||||
On x86:
|
||||
|
||||
- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock
|
||||
|
||||
- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock is
|
||||
taken inside kvm->arch.mmu_lock, and cannot be taken without already
|
||||
holding kvm->arch.mmu_lock (typically with ``read_lock``, otherwise
|
||||
there's no need to take kvm->arch.tdp_mmu_pages_lock at all).
|
||||
|
||||
Everything else is a leaf: no other lock is taken inside the critical
|
||||
sections.
|
||||
|
@ -7,6 +7,9 @@
|
||||
#ifndef __ARM64_HYP_IMAGE_H__
|
||||
#define __ARM64_HYP_IMAGE_H__
|
||||
|
||||
#define __HYP_CONCAT(a, b) a ## b
|
||||
#define HYP_CONCAT(a, b) __HYP_CONCAT(a, b)
|
||||
|
||||
/*
|
||||
* KVM nVHE code has its own symbol namespace prefixed with __kvm_nvhe_,
|
||||
* to separate it from the kernel proper.
|
||||
@ -21,9 +24,31 @@
|
||||
*/
|
||||
#define HYP_SECTION_NAME(NAME) .hyp##NAME
|
||||
|
||||
/* Symbol defined at the beginning of each hyp section. */
|
||||
#define HYP_SECTION_SYMBOL_NAME(NAME) \
|
||||
HYP_CONCAT(__hyp_section_, HYP_SECTION_NAME(NAME))
|
||||
|
||||
/*
|
||||
* Helper to generate linker script statements starting a hyp section.
|
||||
*
|
||||
* A symbol with a well-known name is defined at the first byte. This
|
||||
* is used as a base for hyp relocations (see gen-hyprel.c). It must
|
||||
* be defined inside the section so the linker of `vmlinux` cannot
|
||||
* separate it from the section data.
|
||||
*/
|
||||
#define BEGIN_HYP_SECTION(NAME) \
|
||||
HYP_SECTION_NAME(NAME) : { \
|
||||
HYP_SECTION_SYMBOL_NAME(NAME) = .;
|
||||
|
||||
/* Helper to generate linker script statements ending a hyp section. */
|
||||
#define END_HYP_SECTION \
|
||||
}
|
||||
|
||||
/* Defines an ELF hyp section from input section @NAME and its subsections. */
|
||||
#define HYP_SECTION(NAME) \
|
||||
HYP_SECTION_NAME(NAME) : { *(NAME NAME##.*) }
|
||||
#define HYP_SECTION(NAME) \
|
||||
BEGIN_HYP_SECTION(NAME) \
|
||||
*(NAME NAME##.*) \
|
||||
END_HYP_SECTION
|
||||
|
||||
/*
|
||||
* Defines a linker script alias of a kernel-proper symbol referenced by
|
||||
|
@ -199,26 +199,6 @@ extern void __vgic_v3_init_lrs(void);
|
||||
|
||||
extern u32 __kvm_get_mdcr_el2(void);
|
||||
|
||||
/*
|
||||
* Obtain the PC-relative address of a kernel symbol
|
||||
* s: symbol
|
||||
*
|
||||
* The goal of this macro is to return a symbol's address based on a
|
||||
* PC-relative computation, as opposed to a loading the VA from a
|
||||
* constant pool or something similar. This works well for HYP, as an
|
||||
* absolute VA is guaranteed to be wrong. Only use this if trying to
|
||||
* obtain the address of a symbol (i.e. not something you obtained by
|
||||
* following a pointer).
|
||||
*/
|
||||
#define hyp_symbol_addr(s) \
|
||||
({ \
|
||||
typeof(s) *addr; \
|
||||
asm("adrp %0, %1\n" \
|
||||
"add %0, %0, :lo12:%1\n" \
|
||||
: "=r" (addr) : "S" (&s)); \
|
||||
addr; \
|
||||
})
|
||||
|
||||
#define __KVM_EXTABLE(from, to) \
|
||||
" .pushsection __kvm_ex_table, \"a\"\n" \
|
||||
" .align 3\n" \
|
||||
|
@ -30,7 +30,6 @@
|
||||
|
||||
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
|
||||
|
||||
#define KVM_USER_MEM_SLOTS 512
|
||||
#define KVM_HALT_POLL_NS_DEFAULT 500000
|
||||
|
||||
#include <kvm/arm_vgic.h>
|
||||
@ -771,4 +770,6 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
|
||||
#define kvm_vcpu_has_pmu(vcpu) \
|
||||
(test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
|
||||
|
||||
int kvm_trng_call(struct kvm_vcpu *vcpu);
|
||||
|
||||
#endif /* __ARM64_KVM_HOST_H__ */
|
||||
|
@ -73,8 +73,18 @@ alternative_cb_end
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Convert a kernel image address to a PA
|
||||
* reg: kernel address to be converted in place
|
||||
* Convert a hypervisor VA to a PA
|
||||
* reg: hypervisor address to be converted in place
|
||||
* tmp: temporary register
|
||||
*/
|
||||
.macro hyp_pa reg, tmp
|
||||
ldr_l \tmp, hyp_physvirt_offset
|
||||
add \reg, \reg, \tmp
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Convert a hypervisor VA to a kernel image address
|
||||
* reg: hypervisor address to be converted in place
|
||||
* tmp: temporary register
|
||||
*
|
||||
* The actual code generation takes place in kvm_get_kimage_voffset, and
|
||||
@ -82,7 +92,11 @@ alternative_cb_end
|
||||
* perform the register allocation (kvm_get_kimage_voffset uses the
|
||||
* specific registers encoded in the instructions).
|
||||
*/
|
||||
.macro kimg_pa reg, tmp
|
||||
.macro hyp_kimg_va reg, tmp
|
||||
/* Convert hyp VA -> PA. */
|
||||
hyp_pa \reg, \tmp
|
||||
|
||||
/* Load kimage_voffset. */
|
||||
alternative_cb kvm_get_kimage_voffset
|
||||
movz \tmp, #0
|
||||
movk \tmp, #0, lsl #16
|
||||
@ -90,32 +104,8 @@ alternative_cb kvm_get_kimage_voffset
|
||||
movk \tmp, #0, lsl #48
|
||||
alternative_cb_end
|
||||
|
||||
/* reg = __pa(reg) */
|
||||
sub \reg, \reg, \tmp
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Convert a kernel image address to a hyp VA
|
||||
* reg: kernel address to be converted in place
|
||||
* tmp: temporary register
|
||||
*
|
||||
* The actual code generation takes place in kvm_get_kimage_voffset, and
|
||||
* the instructions below are only there to reserve the space and
|
||||
* perform the register allocation (kvm_update_kimg_phys_offset uses the
|
||||
* specific registers encoded in the instructions).
|
||||
*/
|
||||
.macro kimg_hyp_va reg, tmp
|
||||
alternative_cb kvm_update_kimg_phys_offset
|
||||
movz \tmp, #0
|
||||
movk \tmp, #0, lsl #16
|
||||
movk \tmp, #0, lsl #32
|
||||
movk \tmp, #0, lsl #48
|
||||
alternative_cb_end
|
||||
|
||||
sub \reg, \reg, \tmp
|
||||
mov_q \tmp, PAGE_OFFSET
|
||||
orr \reg, \reg, \tmp
|
||||
kern_hyp_va \reg
|
||||
/* Convert PA -> kimg VA. */
|
||||
add \reg, \reg, \tmp
|
||||
.endm
|
||||
|
||||
#else
|
||||
@ -129,6 +119,7 @@ alternative_cb_end
|
||||
void kvm_update_va_mask(struct alt_instr *alt,
|
||||
__le32 *origptr, __le32 *updptr, int nr_inst);
|
||||
void kvm_compute_layout(void);
|
||||
void kvm_apply_hyp_relocations(void);
|
||||
|
||||
static __always_inline unsigned long __kern_hyp_va(unsigned long v)
|
||||
{
|
||||
@ -144,24 +135,6 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
|
||||
|
||||
#define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v))))
|
||||
|
||||
static __always_inline unsigned long __kimg_hyp_va(unsigned long v)
|
||||
{
|
||||
unsigned long offset;
|
||||
|
||||
asm volatile(ALTERNATIVE_CB("movz %0, #0\n"
|
||||
"movk %0, #0, lsl #16\n"
|
||||
"movk %0, #0, lsl #32\n"
|
||||
"movk %0, #0, lsl #48\n",
|
||||
kvm_update_kimg_phys_offset)
|
||||
: "=r" (offset));
|
||||
|
||||
return __kern_hyp_va((v - offset) | PAGE_OFFSET);
|
||||
}
|
||||
|
||||
#define kimg_fn_hyp_va(v) ((typeof(*v))(__kimg_hyp_va((unsigned long)(v))))
|
||||
|
||||
#define kimg_fn_ptr(x) (typeof(x) **)(x)
|
||||
|
||||
/*
|
||||
* We currently support using a VM-specified IPA size. For backward
|
||||
* compatibility, the default IPA size is fixed to 40bits.
|
||||
|
@ -157,6 +157,11 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
|
||||
* If device attributes are not explicitly requested in @prot, then the
|
||||
* mapping will be normal, cacheable.
|
||||
*
|
||||
* Note that the update of a valid leaf PTE in this function will be aborted,
|
||||
* if it's trying to recreate the exact same mapping or only change the access
|
||||
* permissions. Instead, the vCPU will exit one more time from guest if still
|
||||
* needed and then go through the path of relaxing permissions.
|
||||
*
|
||||
* Note that this function will both coalesce existing table entries and split
|
||||
* existing block mappings, relying on page-faults to fault back areas outside
|
||||
* of the new mapping lazily.
|
||||
|
@ -11,7 +11,8 @@ extern char __alt_instructions[], __alt_instructions_end[];
|
||||
extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
|
||||
extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
|
||||
extern char __hyp_text_start[], __hyp_text_end[];
|
||||
extern char __hyp_data_ro_after_init_start[], __hyp_data_ro_after_init_end[];
|
||||
extern char __hyp_rodata_start[], __hyp_rodata_end[];
|
||||
extern char __hyp_reloc_begin[], __hyp_reloc_end[];
|
||||
extern char __idmap_text_start[], __idmap_text_end[];
|
||||
extern char __initdata_begin[], __initdata_end[];
|
||||
extern char __inittext_begin[], __inittext_end[];
|
||||
|
@ -5,8 +5,8 @@
|
||||
#ifndef __ASM_SPINLOCK_H
|
||||
#define __ASM_SPINLOCK_H
|
||||
|
||||
#include <asm/qrwlock.h>
|
||||
#include <asm/qspinlock.h>
|
||||
#include <asm/qrwlock.h>
|
||||
|
||||
/* See include/linux/spinlock.h */
|
||||
#define smp_mb__after_spinlock() smp_mb()
|
||||
|
@ -853,7 +853,10 @@
|
||||
|
||||
#define ID_DFR0_PERFMON_SHIFT 24
|
||||
|
||||
#define ID_DFR0_PERFMON_8_0 0x3
|
||||
#define ID_DFR0_PERFMON_8_1 0x4
|
||||
#define ID_DFR0_PERFMON_8_4 0x5
|
||||
#define ID_DFR0_PERFMON_8_5 0x6
|
||||
|
||||
#define ID_ISAR4_SWP_FRAC_SHIFT 28
|
||||
#define ID_ISAR4_PSR_M_SHIFT 24
|
||||
|
@ -64,7 +64,6 @@ __efistub__ctype = _ctype;
|
||||
/* Alternative callbacks for init-time patching of nVHE hyp code. */
|
||||
KVM_NVHE_ALIAS(kvm_patch_vector_branch);
|
||||
KVM_NVHE_ALIAS(kvm_update_va_mask);
|
||||
KVM_NVHE_ALIAS(kvm_update_kimg_phys_offset);
|
||||
KVM_NVHE_ALIAS(kvm_get_kimage_voffset);
|
||||
|
||||
/* Global kernel state accessed by nVHE hyp code. */
|
||||
|
@ -434,8 +434,10 @@ static void __init hyp_mode_check(void)
|
||||
"CPU: CPUs started in inconsistent modes");
|
||||
else
|
||||
pr_info("CPU: All CPU(s) started at EL1\n");
|
||||
if (IS_ENABLED(CONFIG_KVM) && !is_kernel_in_hyp_mode())
|
||||
if (IS_ENABLED(CONFIG_KVM) && !is_kernel_in_hyp_mode()) {
|
||||
kvm_compute_layout();
|
||||
kvm_apply_hyp_relocations();
|
||||
}
|
||||
}
|
||||
|
||||
void __init smp_cpus_done(unsigned int max_cpus)
|
||||
|
@ -31,10 +31,11 @@ jiffies = jiffies_64;
|
||||
__stop___kvm_ex_table = .;
|
||||
|
||||
#define HYPERVISOR_DATA_SECTIONS \
|
||||
HYP_SECTION_NAME(.data..ro_after_init) : { \
|
||||
__hyp_data_ro_after_init_start = .; \
|
||||
HYP_SECTION_NAME(.rodata) : { \
|
||||
__hyp_rodata_start = .; \
|
||||
*(HYP_SECTION_NAME(.data..ro_after_init)) \
|
||||
__hyp_data_ro_after_init_end = .; \
|
||||
*(HYP_SECTION_NAME(.rodata)) \
|
||||
__hyp_rodata_end = .; \
|
||||
}
|
||||
|
||||
#define HYPERVISOR_PERCPU_SECTION \
|
||||
@ -42,10 +43,19 @@ jiffies = jiffies_64;
|
||||
HYP_SECTION_NAME(.data..percpu) : { \
|
||||
*(HYP_SECTION_NAME(.data..percpu)) \
|
||||
}
|
||||
|
||||
#define HYPERVISOR_RELOC_SECTION \
|
||||
.hyp.reloc : ALIGN(4) { \
|
||||
__hyp_reloc_begin = .; \
|
||||
*(.hyp.reloc) \
|
||||
__hyp_reloc_end = .; \
|
||||
}
|
||||
|
||||
#else /* CONFIG_KVM */
|
||||
#define HYPERVISOR_EXTABLE
|
||||
#define HYPERVISOR_DATA_SECTIONS
|
||||
#define HYPERVISOR_PERCPU_SECTION
|
||||
#define HYPERVISOR_RELOC_SECTION
|
||||
#endif
|
||||
|
||||
#define HYPERVISOR_TEXT \
|
||||
@ -216,6 +226,8 @@ SECTIONS
|
||||
PERCPU_SECTION(L1_CACHE_BYTES)
|
||||
HYPERVISOR_PERCPU_SECTION
|
||||
|
||||
HYPERVISOR_RELOC_SECTION
|
||||
|
||||
.rela.dyn : ALIGN(8) {
|
||||
*(.rela .rela*)
|
||||
}
|
||||
|
@ -16,7 +16,7 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
|
||||
inject_fault.o va_layout.o handle_exit.o \
|
||||
guest.o debug.o reset.o sys_regs.o \
|
||||
vgic-sys-reg-v3.o fpsimd.o pmu.o \
|
||||
arch_timer.o \
|
||||
arch_timer.o trng.o\
|
||||
vgic/vgic.o vgic/vgic-init.o \
|
||||
vgic/vgic-irqfd.o vgic/vgic-v2.o \
|
||||
vgic/vgic-v3.o vgic/vgic-v4.o \
|
||||
|
@ -1750,11 +1750,10 @@ static int init_hyp_mode(void)
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
err = create_hyp_mappings(kvm_ksym_ref(__hyp_data_ro_after_init_start),
|
||||
kvm_ksym_ref(__hyp_data_ro_after_init_end),
|
||||
PAGE_HYP_RO);
|
||||
err = create_hyp_mappings(kvm_ksym_ref(__hyp_rodata_start),
|
||||
kvm_ksym_ref(__hyp_rodata_end), PAGE_HYP_RO);
|
||||
if (err) {
|
||||
kvm_err("Cannot map .hyp.data..ro_after_init section\n");
|
||||
kvm_err("Cannot map .hyp.rodata section\n");
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
|
@ -505,8 +505,8 @@ static inline void __kvm_unexpected_el2_exception(void)
|
||||
struct exception_table_entry *entry, *end;
|
||||
unsigned long elr_el2 = read_sysreg(elr_el2);
|
||||
|
||||
entry = hyp_symbol_addr(__start___kvm_ex_table);
|
||||
end = hyp_symbol_addr(__stop___kvm_ex_table);
|
||||
entry = &__start___kvm_ex_table;
|
||||
end = &__stop___kvm_ex_table;
|
||||
|
||||
while (entry < end) {
|
||||
addr = (unsigned long)&entry->insn + entry->insn;
|
||||
|
2
arch/arm64/kvm/hyp/nvhe/.gitignore
vendored
2
arch/arm64/kvm/hyp/nvhe/.gitignore
vendored
@ -1,2 +1,4 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
gen-hyprel
|
||||
hyp.lds
|
||||
hyp-reloc.S
|
||||
|
@ -3,8 +3,11 @@
|
||||
# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part
|
||||
#
|
||||
|
||||
asflags-y := -D__KVM_NVHE_HYPERVISOR__
|
||||
ccflags-y := -D__KVM_NVHE_HYPERVISOR__
|
||||
asflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
|
||||
ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
|
||||
|
||||
hostprogs := gen-hyprel
|
||||
HOST_EXTRACFLAGS += -I$(objtree)/include
|
||||
|
||||
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
|
||||
hyp-main.o hyp-smp.o psci-relay.o
|
||||
@ -19,7 +22,7 @@ obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
|
||||
|
||||
hyp-obj := $(patsubst %.o,%.nvhe.o,$(obj-y))
|
||||
obj-y := kvm_nvhe.o
|
||||
extra-y := $(hyp-obj) kvm_nvhe.tmp.o hyp.lds
|
||||
extra-y := $(hyp-obj) kvm_nvhe.tmp.o kvm_nvhe.rel.o hyp.lds hyp-reloc.S hyp-reloc.o
|
||||
|
||||
# 1) Compile all source files to `.nvhe.o` object files. The file extension
|
||||
# avoids file name clashes for files shared with VHE.
|
||||
@ -42,11 +45,31 @@ LDFLAGS_kvm_nvhe.tmp.o := -r -T
|
||||
$(obj)/kvm_nvhe.tmp.o: $(obj)/hyp.lds $(addprefix $(obj)/,$(hyp-obj)) FORCE
|
||||
$(call if_changed,ld)
|
||||
|
||||
# 4) Produce the final 'kvm_nvhe.o', ready to be linked into 'vmlinux'.
|
||||
# 4) Generate list of hyp code/data positions that need to be relocated at
|
||||
# runtime. Because the hypervisor is part of the kernel binary, relocations
|
||||
# produce a kernel VA. We enumerate relocations targeting hyp at build time
|
||||
# and convert the kernel VAs at those positions to hyp VAs.
|
||||
$(obj)/hyp-reloc.S: $(obj)/kvm_nvhe.tmp.o $(obj)/gen-hyprel
|
||||
$(call if_changed,hyprel)
|
||||
|
||||
# 5) Compile hyp-reloc.S and link it into the existing partially linked object.
|
||||
# The object file now contains a section with pointers to hyp positions that
|
||||
# will contain kernel VAs at runtime. These pointers have relocations on them
|
||||
# so that they get updated as the hyp object is linked into `vmlinux`.
|
||||
LDFLAGS_kvm_nvhe.rel.o := -r
|
||||
$(obj)/kvm_nvhe.rel.o: $(obj)/kvm_nvhe.tmp.o $(obj)/hyp-reloc.o FORCE
|
||||
$(call if_changed,ld)
|
||||
|
||||
# 6) Produce the final 'kvm_nvhe.o', ready to be linked into 'vmlinux'.
|
||||
# Prefixes names of ELF symbols with '__kvm_nvhe_'.
|
||||
$(obj)/kvm_nvhe.o: $(obj)/kvm_nvhe.tmp.o FORCE
|
||||
$(obj)/kvm_nvhe.o: $(obj)/kvm_nvhe.rel.o FORCE
|
||||
$(call if_changed,hypcopy)
|
||||
|
||||
# The HYPREL command calls `gen-hyprel` to generate an assembly file with
|
||||
# a list of relocations targeting hyp code/data.
|
||||
quiet_cmd_hyprel = HYPREL $@
|
||||
cmd_hyprel = $(obj)/gen-hyprel $< > $@
|
||||
|
||||
# The HYPCOPY command uses `objcopy` to prefix all ELF symbol names
|
||||
# to avoid clashes with VHE code/data.
|
||||
quiet_cmd_hypcopy = HYPCOPY $@
|
||||
|
438
arch/arm64/kvm/hyp/nvhe/gen-hyprel.c
Normal file
438
arch/arm64/kvm/hyp/nvhe/gen-hyprel.c
Normal file
@ -0,0 +1,438 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2020 - Google LLC
|
||||
* Author: David Brazdil <dbrazdil@google.com>
|
||||
*
|
||||
* Generates relocation information used by the kernel to convert
|
||||
* absolute addresses in hyp data from kernel VAs to hyp VAs.
|
||||
*
|
||||
* This is necessary because hyp code is linked into the same binary
|
||||
* as the kernel but executes under different memory mappings.
|
||||
* If the compiler used absolute addressing, those addresses need to
|
||||
* be converted before they are used by hyp code.
|
||||
*
|
||||
* The input of this program is the relocatable ELF object containing
|
||||
* all hyp code/data, not yet linked into vmlinux. Hyp section names
|
||||
* should have been prefixed with `.hyp` at this point.
|
||||
*
|
||||
* The output (printed to stdout) is an assembly file containing
|
||||
* an array of 32-bit integers and static relocations that instruct
|
||||
* the linker of `vmlinux` to populate the array entries with offsets
|
||||
* to positions in the kernel binary containing VAs used by hyp code.
|
||||
*
|
||||
* Note that dynamic relocations could be used for the same purpose.
|
||||
* However, those are only generated if CONFIG_RELOCATABLE=y.
|
||||
*/
|
||||
|
||||
#include <elf.h>
|
||||
#include <endian.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <generated/autoconf.h>
|
||||
|
||||
#define HYP_SECTION_PREFIX ".hyp"
|
||||
#define HYP_RELOC_SECTION ".hyp.reloc"
|
||||
#define HYP_SECTION_SYMBOL_PREFIX "__hyp_section_"
|
||||
|
||||
/*
|
||||
* AArch64 relocation type constants.
|
||||
* Included in case these are not defined in the host toolchain.
|
||||
*/
|
||||
#ifndef R_AARCH64_ABS64
|
||||
#define R_AARCH64_ABS64 257
|
||||
#endif
|
||||
#ifndef R_AARCH64_LD_PREL_LO19
|
||||
#define R_AARCH64_LD_PREL_LO19 273
|
||||
#endif
|
||||
#ifndef R_AARCH64_ADR_PREL_LO21
|
||||
#define R_AARCH64_ADR_PREL_LO21 274
|
||||
#endif
|
||||
#ifndef R_AARCH64_ADR_PREL_PG_HI21
|
||||
#define R_AARCH64_ADR_PREL_PG_HI21 275
|
||||
#endif
|
||||
#ifndef R_AARCH64_ADR_PREL_PG_HI21_NC
|
||||
#define R_AARCH64_ADR_PREL_PG_HI21_NC 276
|
||||
#endif
|
||||
#ifndef R_AARCH64_ADD_ABS_LO12_NC
|
||||
#define R_AARCH64_ADD_ABS_LO12_NC 277
|
||||
#endif
|
||||
#ifndef R_AARCH64_LDST8_ABS_LO12_NC
|
||||
#define R_AARCH64_LDST8_ABS_LO12_NC 278
|
||||
#endif
|
||||
#ifndef R_AARCH64_TSTBR14
|
||||
#define R_AARCH64_TSTBR14 279
|
||||
#endif
|
||||
#ifndef R_AARCH64_CONDBR19
|
||||
#define R_AARCH64_CONDBR19 280
|
||||
#endif
|
||||
#ifndef R_AARCH64_JUMP26
|
||||
#define R_AARCH64_JUMP26 282
|
||||
#endif
|
||||
#ifndef R_AARCH64_CALL26
|
||||
#define R_AARCH64_CALL26 283
|
||||
#endif
|
||||
#ifndef R_AARCH64_LDST16_ABS_LO12_NC
|
||||
#define R_AARCH64_LDST16_ABS_LO12_NC 284
|
||||
#endif
|
||||
#ifndef R_AARCH64_LDST32_ABS_LO12_NC
|
||||
#define R_AARCH64_LDST32_ABS_LO12_NC 285
|
||||
#endif
|
||||
#ifndef R_AARCH64_LDST64_ABS_LO12_NC
|
||||
#define R_AARCH64_LDST64_ABS_LO12_NC 286
|
||||
#endif
|
||||
#ifndef R_AARCH64_MOVW_PREL_G0
|
||||
#define R_AARCH64_MOVW_PREL_G0 287
|
||||
#endif
|
||||
#ifndef R_AARCH64_MOVW_PREL_G0_NC
|
||||
#define R_AARCH64_MOVW_PREL_G0_NC 288
|
||||
#endif
|
||||
#ifndef R_AARCH64_MOVW_PREL_G1
|
||||
#define R_AARCH64_MOVW_PREL_G1 289
|
||||
#endif
|
||||
#ifndef R_AARCH64_MOVW_PREL_G1_NC
|
||||
#define R_AARCH64_MOVW_PREL_G1_NC 290
|
||||
#endif
|
||||
#ifndef R_AARCH64_MOVW_PREL_G2
|
||||
#define R_AARCH64_MOVW_PREL_G2 291
|
||||
#endif
|
||||
#ifndef R_AARCH64_MOVW_PREL_G2_NC
|
||||
#define R_AARCH64_MOVW_PREL_G2_NC 292
|
||||
#endif
|
||||
#ifndef R_AARCH64_MOVW_PREL_G3
|
||||
#define R_AARCH64_MOVW_PREL_G3 293
|
||||
#endif
|
||||
#ifndef R_AARCH64_LDST128_ABS_LO12_NC
|
||||
#define R_AARCH64_LDST128_ABS_LO12_NC 299
|
||||
#endif
|
||||
|
||||
/* Global state of the processed ELF. */
|
||||
static struct {
|
||||
const char *path;
|
||||
char *begin;
|
||||
size_t size;
|
||||
Elf64_Ehdr *ehdr;
|
||||
Elf64_Shdr *sh_table;
|
||||
const char *sh_string;
|
||||
} elf;
|
||||
|
||||
#if defined(CONFIG_CPU_LITTLE_ENDIAN)
|
||||
|
||||
#define elf16toh(x) le16toh(x)
|
||||
#define elf32toh(x) le32toh(x)
|
||||
#define elf64toh(x) le64toh(x)
|
||||
|
||||
#define ELFENDIAN ELFDATA2LSB
|
||||
|
||||
#elif defined(CONFIG_CPU_BIG_ENDIAN)
|
||||
|
||||
#define elf16toh(x) be16toh(x)
|
||||
#define elf32toh(x) be32toh(x)
|
||||
#define elf64toh(x) be64toh(x)
|
||||
|
||||
#define ELFENDIAN ELFDATA2MSB
|
||||
|
||||
#else
|
||||
|
||||
#error PDP-endian sadly unsupported...
|
||||
|
||||
#endif
|
||||
|
||||
#define fatal_error(fmt, ...) \
|
||||
({ \
|
||||
fprintf(stderr, "error: %s: " fmt "\n", \
|
||||
elf.path, ## __VA_ARGS__); \
|
||||
exit(EXIT_FAILURE); \
|
||||
__builtin_unreachable(); \
|
||||
})
|
||||
|
||||
#define fatal_perror(msg) \
|
||||
({ \
|
||||
fprintf(stderr, "error: %s: " msg ": %s\n", \
|
||||
elf.path, strerror(errno)); \
|
||||
exit(EXIT_FAILURE); \
|
||||
__builtin_unreachable(); \
|
||||
})
|
||||
|
||||
#define assert_op(lhs, rhs, fmt, op) \
|
||||
({ \
|
||||
typeof(lhs) _lhs = (lhs); \
|
||||
typeof(rhs) _rhs = (rhs); \
|
||||
\
|
||||
if (!(_lhs op _rhs)) { \
|
||||
fatal_error("assertion " #lhs " " #op " " #rhs \
|
||||
" failed (lhs=" fmt ", rhs=" fmt \
|
||||
", line=%d)", _lhs, _rhs, __LINE__); \
|
||||
} \
|
||||
})
|
||||
|
||||
#define assert_eq(lhs, rhs, fmt) assert_op(lhs, rhs, fmt, ==)
|
||||
#define assert_ne(lhs, rhs, fmt) assert_op(lhs, rhs, fmt, !=)
|
||||
#define assert_lt(lhs, rhs, fmt) assert_op(lhs, rhs, fmt, <)
|
||||
#define assert_ge(lhs, rhs, fmt) assert_op(lhs, rhs, fmt, >=)
|
||||
|
||||
/*
|
||||
* Return a pointer of a given type at a given offset from
|
||||
* the beginning of the ELF file.
|
||||
*/
|
||||
#define elf_ptr(type, off) ((type *)(elf.begin + (off)))
|
||||
|
||||
/* Iterate over all sections in the ELF. */
|
||||
#define for_each_section(var) \
|
||||
for (var = elf.sh_table; var < elf.sh_table + elf16toh(elf.ehdr->e_shnum); ++var)
|
||||
|
||||
/* Iterate over all Elf64_Rela relocations in a given section. */
|
||||
#define for_each_rela(shdr, var) \
|
||||
for (var = elf_ptr(Elf64_Rela, elf64toh(shdr->sh_offset)); \
|
||||
var < elf_ptr(Elf64_Rela, elf64toh(shdr->sh_offset) + elf64toh(shdr->sh_size)); var++)
|
||||
|
||||
/* True if a string starts with a given prefix. */
|
||||
static inline bool starts_with(const char *str, const char *prefix)
|
||||
{
|
||||
return memcmp(str, prefix, strlen(prefix)) == 0;
|
||||
}
|
||||
|
||||
/* Returns a string containing the name of a given section. */
|
||||
static inline const char *section_name(Elf64_Shdr *shdr)
|
||||
{
|
||||
return elf.sh_string + elf32toh(shdr->sh_name);
|
||||
}
|
||||
|
||||
/* Returns a pointer to the first byte of section data. */
|
||||
static inline const char *section_begin(Elf64_Shdr *shdr)
|
||||
{
|
||||
return elf_ptr(char, elf64toh(shdr->sh_offset));
|
||||
}
|
||||
|
||||
/* Find a section by its offset from the beginning of the file. */
|
||||
static inline Elf64_Shdr *section_by_off(Elf64_Off off)
|
||||
{
|
||||
assert_ne(off, 0UL, "%lu");
|
||||
return elf_ptr(Elf64_Shdr, off);
|
||||
}
|
||||
|
||||
/* Find a section by its index. */
|
||||
static inline Elf64_Shdr *section_by_idx(uint16_t idx)
|
||||
{
|
||||
assert_ne(idx, SHN_UNDEF, "%u");
|
||||
return &elf.sh_table[idx];
|
||||
}
|
||||
|
||||
/*
|
||||
* Memory-map the given ELF file, perform sanity checks, and
|
||||
* populate global state.
|
||||
*/
|
||||
static void init_elf(const char *path)
|
||||
{
|
||||
int fd, ret;
|
||||
struct stat stat;
|
||||
|
||||
/* Store path in the global struct for error printing. */
|
||||
elf.path = path;
|
||||
|
||||
/* Open the ELF file. */
|
||||
fd = open(path, O_RDONLY);
|
||||
if (fd < 0)
|
||||
fatal_perror("Could not open ELF file");
|
||||
|
||||
/* Get status of ELF file to obtain its size. */
|
||||
ret = fstat(fd, &stat);
|
||||
if (ret < 0) {
|
||||
close(fd);
|
||||
fatal_perror("Could not get status of ELF file");
|
||||
}
|
||||
|
||||
/* mmap() the entire ELF file read-only at an arbitrary address. */
|
||||
elf.begin = mmap(0, stat.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
|
||||
if (elf.begin == MAP_FAILED) {
|
||||
close(fd);
|
||||
fatal_perror("Could not mmap ELF file");
|
||||
}
|
||||
|
||||
/* mmap() was successful, close the FD. */
|
||||
close(fd);
|
||||
|
||||
/* Get pointer to the ELF header. */
|
||||
assert_ge(stat.st_size, sizeof(*elf.ehdr), "%lu");
|
||||
elf.ehdr = elf_ptr(Elf64_Ehdr, 0);
|
||||
|
||||
/* Check the ELF magic. */
|
||||
assert_eq(elf.ehdr->e_ident[EI_MAG0], ELFMAG0, "0x%x");
|
||||
assert_eq(elf.ehdr->e_ident[EI_MAG1], ELFMAG1, "0x%x");
|
||||
assert_eq(elf.ehdr->e_ident[EI_MAG2], ELFMAG2, "0x%x");
|
||||
assert_eq(elf.ehdr->e_ident[EI_MAG3], ELFMAG3, "0x%x");
|
||||
|
||||
/* Sanity check that this is an ELF64 relocatable object for AArch64. */
|
||||
assert_eq(elf.ehdr->e_ident[EI_CLASS], ELFCLASS64, "%u");
|
||||
assert_eq(elf.ehdr->e_ident[EI_DATA], ELFENDIAN, "%u");
|
||||
assert_eq(elf16toh(elf.ehdr->e_type), ET_REL, "%u");
|
||||
assert_eq(elf16toh(elf.ehdr->e_machine), EM_AARCH64, "%u");
|
||||
|
||||
/* Populate fields of the global struct. */
|
||||
elf.sh_table = section_by_off(elf64toh(elf.ehdr->e_shoff));
|
||||
elf.sh_string = section_begin(section_by_idx(elf16toh(elf.ehdr->e_shstrndx)));
|
||||
}
|
||||
|
||||
/* Print the prologue of the output ASM file. */
|
||||
static void emit_prologue(void)
|
||||
{
|
||||
printf(".data\n"
|
||||
".pushsection " HYP_RELOC_SECTION ", \"a\"\n");
|
||||
}
|
||||
|
||||
/* Print ASM statements needed as a prologue to a processed hyp section. */
|
||||
static void emit_section_prologue(const char *sh_orig_name)
|
||||
{
|
||||
/* Declare the hyp section symbol. */
|
||||
printf(".global %s%s\n", HYP_SECTION_SYMBOL_PREFIX, sh_orig_name);
|
||||
}
|
||||
|
||||
/*
|
||||
* Print ASM statements to create a hyp relocation entry for a given
|
||||
* R_AARCH64_ABS64 relocation.
|
||||
*
|
||||
* The linker of vmlinux will populate the position given by `rela` with
|
||||
* an absolute 64-bit kernel VA. If the kernel is relocatable, it will
|
||||
* also generate a dynamic relocation entry so that the kernel can shift
|
||||
* the address at runtime for KASLR.
|
||||
*
|
||||
* Emit a 32-bit offset from the current address to the position given
|
||||
* by `rela`. This way the kernel can iterate over all kernel VAs used
|
||||
* by hyp at runtime and convert them to hyp VAs. However, that offset
|
||||
* will not be known until linking of `vmlinux`, so emit a PREL32
|
||||
* relocation referencing a symbol that the hyp linker script put at
|
||||
* the beginning of the relocated section + the offset from `rela`.
|
||||
*/
|
||||
static void emit_rela_abs64(Elf64_Rela *rela, const char *sh_orig_name)
|
||||
{
|
||||
/* Offset of this reloc from the beginning of HYP_RELOC_SECTION. */
|
||||
static size_t reloc_offset;
|
||||
|
||||
/* Create storage for the 32-bit offset. */
|
||||
printf(".word 0\n");
|
||||
|
||||
/*
|
||||
* Create a PREL32 relocation which instructs the linker of `vmlinux`
|
||||
* to insert offset to position <base> + <offset>, where <base> is
|
||||
* a symbol at the beginning of the relocated section, and <offset>
|
||||
* is `rela->r_offset`.
|
||||
*/
|
||||
printf(".reloc %lu, R_AARCH64_PREL32, %s%s + 0x%lx\n",
|
||||
reloc_offset, HYP_SECTION_SYMBOL_PREFIX, sh_orig_name,
|
||||
elf64toh(rela->r_offset));
|
||||
|
||||
reloc_offset += 4;
|
||||
}
|
||||
|
||||
/* Print the epilogue of the output ASM file. */
|
||||
static void emit_epilogue(void)
|
||||
{
|
||||
printf(".popsection\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate over all RELA relocations in a given section and emit
|
||||
* hyp relocation data for all absolute addresses in hyp code/data.
|
||||
*
|
||||
* Static relocations that generate PC-relative-addressing are ignored.
|
||||
* Failure is reported for unexpected relocation types.
|
||||
*/
|
||||
static void emit_rela_section(Elf64_Shdr *sh_rela)
|
||||
{
|
||||
Elf64_Shdr *sh_orig = &elf.sh_table[elf32toh(sh_rela->sh_info)];
|
||||
const char *sh_orig_name = section_name(sh_orig);
|
||||
Elf64_Rela *rela;
|
||||
|
||||
/* Skip all non-hyp sections. */
|
||||
if (!starts_with(sh_orig_name, HYP_SECTION_PREFIX))
|
||||
return;
|
||||
|
||||
emit_section_prologue(sh_orig_name);
|
||||
|
||||
for_each_rela(sh_rela, rela) {
|
||||
uint32_t type = (uint32_t)elf64toh(rela->r_info);
|
||||
|
||||
/* Check that rela points inside the relocated section. */
|
||||
assert_lt(elf64toh(rela->r_offset), elf64toh(sh_orig->sh_size), "0x%lx");
|
||||
|
||||
switch (type) {
|
||||
/*
|
||||
* Data relocations to generate absolute addressing.
|
||||
* Emit a hyp relocation.
|
||||
*/
|
||||
case R_AARCH64_ABS64:
|
||||
emit_rela_abs64(rela, sh_orig_name);
|
||||
break;
|
||||
/* Allow relocations to generate PC-relative addressing. */
|
||||
case R_AARCH64_LD_PREL_LO19:
|
||||
case R_AARCH64_ADR_PREL_LO21:
|
||||
case R_AARCH64_ADR_PREL_PG_HI21:
|
||||
case R_AARCH64_ADR_PREL_PG_HI21_NC:
|
||||
case R_AARCH64_ADD_ABS_LO12_NC:
|
||||
case R_AARCH64_LDST8_ABS_LO12_NC:
|
||||
case R_AARCH64_LDST16_ABS_LO12_NC:
|
||||
case R_AARCH64_LDST32_ABS_LO12_NC:
|
||||
case R_AARCH64_LDST64_ABS_LO12_NC:
|
||||
case R_AARCH64_LDST128_ABS_LO12_NC:
|
||||
break;
|
||||
/* Allow relative relocations for control-flow instructions. */
|
||||
case R_AARCH64_TSTBR14:
|
||||
case R_AARCH64_CONDBR19:
|
||||
case R_AARCH64_JUMP26:
|
||||
case R_AARCH64_CALL26:
|
||||
break;
|
||||
/* Allow group relocations to create PC-relative offset inline. */
|
||||
case R_AARCH64_MOVW_PREL_G0:
|
||||
case R_AARCH64_MOVW_PREL_G0_NC:
|
||||
case R_AARCH64_MOVW_PREL_G1:
|
||||
case R_AARCH64_MOVW_PREL_G1_NC:
|
||||
case R_AARCH64_MOVW_PREL_G2:
|
||||
case R_AARCH64_MOVW_PREL_G2_NC:
|
||||
case R_AARCH64_MOVW_PREL_G3:
|
||||
break;
|
||||
default:
|
||||
fatal_error("Unexpected RELA type %u", type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Iterate over all sections and emit hyp relocation data for RELA sections. */
|
||||
static void emit_all_relocs(void)
|
||||
{
|
||||
Elf64_Shdr *shdr;
|
||||
|
||||
for_each_section(shdr) {
|
||||
switch (elf32toh(shdr->sh_type)) {
|
||||
case SHT_REL:
|
||||
fatal_error("Unexpected SHT_REL section \"%s\"",
|
||||
section_name(shdr));
|
||||
case SHT_RELA:
|
||||
emit_rela_section(shdr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, const char **argv)
|
||||
{
|
||||
if (argc != 2) {
|
||||
fprintf(stderr, "Usage: %s <elf_input>\n", argv[0]);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
init_elf(argv[1]);
|
||||
|
||||
emit_prologue();
|
||||
emit_all_relocs();
|
||||
emit_epilogue();
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
@ -74,27 +74,28 @@ SYM_FUNC_END(__host_enter)
|
||||
* void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
|
||||
*/
|
||||
SYM_FUNC_START(__hyp_do_panic)
|
||||
/* Load the format arguments into x1-7 */
|
||||
mov x6, x3
|
||||
get_vcpu_ptr x7, x3
|
||||
|
||||
mrs x3, esr_el2
|
||||
mrs x4, far_el2
|
||||
mrs x5, hpfar_el2
|
||||
|
||||
/* Prepare and exit to the host's panic funciton. */
|
||||
mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
|
||||
PSR_MODE_EL1h)
|
||||
msr spsr_el2, lr
|
||||
ldr lr, =panic
|
||||
hyp_kimg_va lr, x6
|
||||
msr elr_el2, lr
|
||||
|
||||
/*
|
||||
* Set the panic format string and enter the host, conditionally
|
||||
* restoring the host context.
|
||||
*/
|
||||
/* Set the panic format string. Use the, now free, LR as scratch. */
|
||||
ldr lr, =__hyp_panic_string
|
||||
hyp_kimg_va lr, x6
|
||||
|
||||
/* Load the format arguments into x1-7. */
|
||||
mov x6, x3
|
||||
get_vcpu_ptr x7, x3
|
||||
mrs x3, esr_el2
|
||||
mrs x4, far_el2
|
||||
mrs x5, hpfar_el2
|
||||
|
||||
/* Enter the host, conditionally restoring the host context. */
|
||||
cmp x0, xzr
|
||||
ldr x0, =__hyp_panic_string
|
||||
mov x0, lr
|
||||
b.eq __host_enter_without_restoring
|
||||
b __host_enter_for_panic
|
||||
SYM_FUNC_END(__hyp_do_panic)
|
||||
@ -124,7 +125,7 @@ SYM_FUNC_END(__hyp_do_panic)
|
||||
* Preserve x0-x4, which may contain stub parameters.
|
||||
*/
|
||||
ldr x5, =__kvm_handle_stub_hvc
|
||||
kimg_pa x5, x6
|
||||
hyp_pa x5, x6
|
||||
br x5
|
||||
.L__vect_end\@:
|
||||
.if ((.L__vect_end\@ - .L__vect_start\@) > 0x80)
|
||||
|
@ -18,7 +18,7 @@
|
||||
#include <asm/virt.h>
|
||||
|
||||
.text
|
||||
.pushsection .hyp.idmap.text, "ax"
|
||||
.pushsection .idmap.text, "ax"
|
||||
|
||||
.align 11
|
||||
|
||||
@ -57,17 +57,10 @@ __do_hyp_init:
|
||||
cmp x0, #HVC_STUB_HCALL_NR
|
||||
b.lo __kvm_handle_stub_hvc
|
||||
|
||||
// We only actively check bits [24:31], and everything
|
||||
// else has to be zero, which we check at build time.
|
||||
#if (KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) & 0xFFFFFFFF00FFFFFF)
|
||||
#error Unexpected __KVM_HOST_SMCCC_FUNC___kvm_hyp_init value
|
||||
#endif
|
||||
mov x3, #KVM_HOST_SMCCC_FUNC(__kvm_hyp_init)
|
||||
cmp x0, x3
|
||||
b.eq 1f
|
||||
|
||||
ror x0, x0, #24
|
||||
eor x0, x0, #((KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) >> 24) & 0xF)
|
||||
ror x0, x0, #4
|
||||
eor x0, x0, #((KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) >> 28) & 0xF)
|
||||
cbz x0, 1f
|
||||
mov x0, #SMCCC_RET_NOT_SUPPORTED
|
||||
eret
|
||||
|
||||
@ -141,7 +134,6 @@ alternative_else_nop_endif
|
||||
|
||||
/* Set the host vector */
|
||||
ldr x0, =__kvm_hyp_host_vector
|
||||
kimg_hyp_va x0, x1
|
||||
msr vbar_el2, x0
|
||||
|
||||
ret
|
||||
@ -200,7 +192,6 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu)
|
||||
/* Leave idmap. */
|
||||
mov x0, x29
|
||||
ldr x1, =kvm_host_psci_cpu_entry
|
||||
kimg_hyp_va x1, x2
|
||||
br x1
|
||||
SYM_CODE_END(__kvm_hyp_init_cpu)
|
||||
|
||||
|
@ -108,9 +108,9 @@ static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt)
|
||||
|
||||
typedef void (*hcall_t)(struct kvm_cpu_context *);
|
||||
|
||||
#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = kimg_fn_ptr(handle_##x)
|
||||
#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
|
||||
|
||||
static const hcall_t *host_hcall[] = {
|
||||
static const hcall_t host_hcall[] = {
|
||||
HANDLE_FUNC(__kvm_vcpu_run),
|
||||
HANDLE_FUNC(__kvm_flush_vm_context),
|
||||
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
|
||||
@ -130,7 +130,6 @@ static const hcall_t *host_hcall[] = {
|
||||
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(unsigned long, id, host_ctxt, 0);
|
||||
const hcall_t *kfn;
|
||||
hcall_t hfn;
|
||||
|
||||
id -= KVM_HOST_SMCCC_ID(0);
|
||||
@ -138,13 +137,11 @@ static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
|
||||
if (unlikely(id >= ARRAY_SIZE(host_hcall)))
|
||||
goto inval;
|
||||
|
||||
kfn = host_hcall[id];
|
||||
if (unlikely(!kfn))
|
||||
hfn = host_hcall[id];
|
||||
if (unlikely(!hfn))
|
||||
goto inval;
|
||||
|
||||
cpu_reg(host_ctxt, 0) = SMCCC_RET_SUCCESS;
|
||||
|
||||
hfn = kimg_fn_hyp_va(kfn);
|
||||
hfn(host_ctxt);
|
||||
|
||||
return;
|
||||
|
@ -33,8 +33,8 @@ unsigned long __hyp_per_cpu_offset(unsigned int cpu)
|
||||
if (cpu >= ARRAY_SIZE(kvm_arm_hyp_percpu_base))
|
||||
hyp_panic();
|
||||
|
||||
cpu_base_array = (unsigned long *)hyp_symbol_addr(kvm_arm_hyp_percpu_base);
|
||||
cpu_base_array = (unsigned long *)&kvm_arm_hyp_percpu_base;
|
||||
this_cpu_base = kern_hyp_va(cpu_base_array[cpu]);
|
||||
elf_base = (unsigned long)hyp_symbol_addr(__per_cpu_start);
|
||||
elf_base = (unsigned long)&__per_cpu_start;
|
||||
return this_cpu_base - elf_base;
|
||||
}
|
||||
|
@ -12,14 +12,17 @@
|
||||
#include <asm/memory.h>
|
||||
|
||||
SECTIONS {
|
||||
HYP_SECTION(.idmap.text)
|
||||
HYP_SECTION(.text)
|
||||
HYP_SECTION(.data..ro_after_init)
|
||||
HYP_SECTION(.rodata)
|
||||
|
||||
/*
|
||||
* .hyp..data..percpu needs to be page aligned to maintain the same
|
||||
* alignment for when linking into vmlinux.
|
||||
*/
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
HYP_SECTION_NAME(.data..percpu) : {
|
||||
BEGIN_HYP_SECTION(.data..percpu)
|
||||
PERCPU_INPUT(L1_CACHE_BYTES)
|
||||
}
|
||||
HYP_SECTION(.data..ro_after_init)
|
||||
END_HYP_SECTION
|
||||
}
|
||||
|
@ -128,8 +128,8 @@ static int psci_cpu_on(u64 func_id, struct kvm_cpu_context *host_ctxt)
|
||||
if (cpu_id == INVALID_CPU_ID)
|
||||
return PSCI_RET_INVALID_PARAMS;
|
||||
|
||||
boot_args = per_cpu_ptr(hyp_symbol_addr(cpu_on_args), cpu_id);
|
||||
init_params = per_cpu_ptr(hyp_symbol_addr(kvm_init_params), cpu_id);
|
||||
boot_args = per_cpu_ptr(&cpu_on_args, cpu_id);
|
||||
init_params = per_cpu_ptr(&kvm_init_params, cpu_id);
|
||||
|
||||
/* Check if the target CPU is already being booted. */
|
||||
if (!try_acquire_boot_args(boot_args))
|
||||
@ -140,7 +140,7 @@ static int psci_cpu_on(u64 func_id, struct kvm_cpu_context *host_ctxt)
|
||||
wmb();
|
||||
|
||||
ret = psci_call(func_id, mpidr,
|
||||
__hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_entry)),
|
||||
__hyp_pa(&kvm_hyp_cpu_entry),
|
||||
__hyp_pa(init_params));
|
||||
|
||||
/* If successful, the lock will be released by the target CPU. */
|
||||
@ -159,8 +159,8 @@ static int psci_cpu_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
|
||||
struct psci_boot_args *boot_args;
|
||||
struct kvm_nvhe_init_params *init_params;
|
||||
|
||||
boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args));
|
||||
init_params = this_cpu_ptr(hyp_symbol_addr(kvm_init_params));
|
||||
boot_args = this_cpu_ptr(&suspend_args);
|
||||
init_params = this_cpu_ptr(&kvm_init_params);
|
||||
|
||||
/*
|
||||
* No need to acquire a lock before writing to boot_args because a core
|
||||
@ -174,7 +174,7 @@ static int psci_cpu_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
|
||||
* point if it is a deep sleep state.
|
||||
*/
|
||||
return psci_call(func_id, power_state,
|
||||
__hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_resume)),
|
||||
__hyp_pa(&kvm_hyp_cpu_resume),
|
||||
__hyp_pa(init_params));
|
||||
}
|
||||
|
||||
@ -186,8 +186,8 @@ static int psci_system_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
|
||||
struct psci_boot_args *boot_args;
|
||||
struct kvm_nvhe_init_params *init_params;
|
||||
|
||||
boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args));
|
||||
init_params = this_cpu_ptr(hyp_symbol_addr(kvm_init_params));
|
||||
boot_args = this_cpu_ptr(&suspend_args);
|
||||
init_params = this_cpu_ptr(&kvm_init_params);
|
||||
|
||||
/*
|
||||
* No need to acquire a lock before writing to boot_args because a core
|
||||
@ -198,7 +198,7 @@ static int psci_system_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
|
||||
|
||||
/* Will only return on error. */
|
||||
return psci_call(func_id,
|
||||
__hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_resume)),
|
||||
__hyp_pa(&kvm_hyp_cpu_resume),
|
||||
__hyp_pa(init_params), 0);
|
||||
}
|
||||
|
||||
@ -207,12 +207,12 @@ asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on)
|
||||
struct psci_boot_args *boot_args;
|
||||
struct kvm_cpu_context *host_ctxt;
|
||||
|
||||
host_ctxt = &this_cpu_ptr(hyp_symbol_addr(kvm_host_data))->host_ctxt;
|
||||
host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
|
||||
|
||||
if (is_cpu_on)
|
||||
boot_args = this_cpu_ptr(hyp_symbol_addr(cpu_on_args));
|
||||
boot_args = this_cpu_ptr(&cpu_on_args);
|
||||
else
|
||||
boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args));
|
||||
boot_args = this_cpu_ptr(&suspend_args);
|
||||
|
||||
cpu_reg(host_ctxt, 0) = boot_args->r0;
|
||||
write_sysreg_el2(boot_args->pc, SYS_ELR);
|
||||
|
@ -45,6 +45,10 @@
|
||||
|
||||
#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
|
||||
|
||||
#define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
|
||||
KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
|
||||
KVM_PTE_LEAF_ATTR_HI_S2_XN)
|
||||
|
||||
struct kvm_pgtable_walk_data {
|
||||
struct kvm_pgtable *pgt;
|
||||
struct kvm_pgtable_walker *walker;
|
||||
@ -170,10 +174,9 @@ static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp)
|
||||
smp_store_release(ptep, pte);
|
||||
}
|
||||
|
||||
static bool kvm_set_valid_leaf_pte(kvm_pte_t *ptep, u64 pa, kvm_pte_t attr,
|
||||
u32 level)
|
||||
static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level)
|
||||
{
|
||||
kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(pa);
|
||||
kvm_pte_t pte = kvm_phys_to_pte(pa);
|
||||
u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE :
|
||||
KVM_PTE_TYPE_BLOCK;
|
||||
|
||||
@ -181,12 +184,7 @@ static bool kvm_set_valid_leaf_pte(kvm_pte_t *ptep, u64 pa, kvm_pte_t attr,
|
||||
pte |= FIELD_PREP(KVM_PTE_TYPE, type);
|
||||
pte |= KVM_PTE_VALID;
|
||||
|
||||
/* Tolerate KVM recreating the exact same mapping. */
|
||||
if (kvm_pte_valid(old))
|
||||
return old == pte;
|
||||
|
||||
smp_store_release(ptep, pte);
|
||||
return true;
|
||||
return pte;
|
||||
}
|
||||
|
||||
static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr,
|
||||
@ -341,12 +339,17 @@ static int hyp_map_set_prot_attr(enum kvm_pgtable_prot prot,
|
||||
static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
|
||||
kvm_pte_t *ptep, struct hyp_map_data *data)
|
||||
{
|
||||
kvm_pte_t new, old = *ptep;
|
||||
u64 granule = kvm_granule_size(level), phys = data->phys;
|
||||
|
||||
if (!kvm_block_mapping_supported(addr, end, phys, level))
|
||||
return false;
|
||||
|
||||
WARN_ON(!kvm_set_valid_leaf_pte(ptep, phys, data->attr, level));
|
||||
/* Tolerate KVM recreating the exact same mapping */
|
||||
new = kvm_init_valid_leaf_pte(phys, data->attr, level);
|
||||
if (old != new && !WARN_ON(kvm_pte_valid(old)))
|
||||
smp_store_release(ptep, new);
|
||||
|
||||
data->phys += granule;
|
||||
return true;
|
||||
}
|
||||
@ -461,34 +464,41 @@ static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
|
||||
kvm_pte_t *ptep,
|
||||
struct stage2_map_data *data)
|
||||
static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
|
||||
kvm_pte_t *ptep,
|
||||
struct stage2_map_data *data)
|
||||
{
|
||||
kvm_pte_t new, old = *ptep;
|
||||
u64 granule = kvm_granule_size(level), phys = data->phys;
|
||||
struct page *page = virt_to_page(ptep);
|
||||
|
||||
if (!kvm_block_mapping_supported(addr, end, phys, level))
|
||||
return false;
|
||||
return -E2BIG;
|
||||
|
||||
/*
|
||||
* If the PTE was already valid, drop the refcount on the table
|
||||
* early, as it will be bumped-up again in stage2_map_walk_leaf().
|
||||
* This ensures that the refcount stays constant across a valid to
|
||||
* valid PTE update.
|
||||
*/
|
||||
if (kvm_pte_valid(*ptep))
|
||||
put_page(virt_to_page(ptep));
|
||||
new = kvm_init_valid_leaf_pte(phys, data->attr, level);
|
||||
if (kvm_pte_valid(old)) {
|
||||
/*
|
||||
* Skip updating the PTE if we are trying to recreate the exact
|
||||
* same mapping or only change the access permissions. Instead,
|
||||
* the vCPU will exit one more time from guest if still needed
|
||||
* and then go through the path of relaxing permissions.
|
||||
*/
|
||||
if (!((old ^ new) & (~KVM_PTE_LEAF_ATTR_S2_PERMS)))
|
||||
return -EAGAIN;
|
||||
|
||||
if (kvm_set_valid_leaf_pte(ptep, phys, data->attr, level))
|
||||
goto out;
|
||||
/*
|
||||
* There's an existing different valid leaf entry, so perform
|
||||
* break-before-make.
|
||||
*/
|
||||
kvm_set_invalid_pte(ptep);
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level);
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
/* There's an existing valid leaf entry, so perform break-before-make */
|
||||
kvm_set_invalid_pte(ptep);
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level);
|
||||
kvm_set_valid_leaf_pte(ptep, phys, data->attr, level);
|
||||
out:
|
||||
smp_store_release(ptep, new);
|
||||
get_page(page);
|
||||
data->phys += granule;
|
||||
return true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
|
||||
@ -516,6 +526,7 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
|
||||
static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
||||
struct stage2_map_data *data)
|
||||
{
|
||||
int ret;
|
||||
kvm_pte_t *childp, pte = *ptep;
|
||||
struct page *page = virt_to_page(ptep);
|
||||
|
||||
@ -526,8 +537,9 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (stage2_map_walker_try_leaf(addr, end, level, ptep, data))
|
||||
goto out_get_page;
|
||||
ret = stage2_map_walker_try_leaf(addr, end, level, ptep, data);
|
||||
if (ret != -E2BIG)
|
||||
return ret;
|
||||
|
||||
if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
|
||||
return -EINVAL;
|
||||
@ -551,9 +563,8 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
||||
}
|
||||
|
||||
kvm_set_table_pte(ptep, childp);
|
||||
|
||||
out_get_page:
|
||||
get_page(page);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -64,7 +64,7 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
rd = kvm_vcpu_dabt_get_rd(vcpu);
|
||||
addr = hyp_symbol_addr(kvm_vgic_global_state)->vcpu_hyp_va;
|
||||
addr = kvm_vgic_global_state.vcpu_hyp_va;
|
||||
addr += fault_ipa - vgic->vgic_cpu_base;
|
||||
|
||||
if (kvm_vcpu_dabt_iswrite(vcpu)) {
|
||||
|
@ -71,6 +71,12 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
|
||||
if (gpa != GPA_INVALID)
|
||||
val = gpa;
|
||||
break;
|
||||
case ARM_SMCCC_TRNG_VERSION:
|
||||
case ARM_SMCCC_TRNG_FEATURES:
|
||||
case ARM_SMCCC_TRNG_GET_UUID:
|
||||
case ARM_SMCCC_TRNG_RND32:
|
||||
case ARM_SMCCC_TRNG_RND64:
|
||||
return kvm_trng_call(vcpu);
|
||||
default:
|
||||
return kvm_psci_call(vcpu);
|
||||
}
|
||||
|
@ -879,11 +879,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
if (vma_pagesize == PAGE_SIZE && !force_pte)
|
||||
vma_pagesize = transparent_hugepage_adjust(memslot, hva,
|
||||
&pfn, &fault_ipa);
|
||||
if (writable) {
|
||||
if (writable)
|
||||
prot |= KVM_PGTABLE_PROT_W;
|
||||
kvm_set_pfn_dirty(pfn);
|
||||
mark_page_dirty(kvm, gfn);
|
||||
}
|
||||
|
||||
if (fault_status != FSC_PERM && !device)
|
||||
clean_dcache_guest_page(pfn, vma_pagesize);
|
||||
@ -911,11 +908,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
memcache);
|
||||
}
|
||||
|
||||
/* Mark the page dirty only if the fault is handled successfully */
|
||||
if (writable && !ret) {
|
||||
kvm_set_pfn_dirty(pfn);
|
||||
mark_page_dirty(kvm, gfn);
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
kvm_set_pfn_accessed(pfn);
|
||||
kvm_release_pfn_clean(pfn);
|
||||
return ret;
|
||||
return ret != -EAGAIN ? ret : 0;
|
||||
}
|
||||
|
||||
/* Resolve the access fault by making the page young again. */
|
||||
|
@ -23,11 +23,11 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
|
||||
static u32 kvm_pmu_event_mask(struct kvm *kvm)
|
||||
{
|
||||
switch (kvm->arch.pmuver) {
|
||||
case 1: /* ARMv8.0 */
|
||||
case ID_AA64DFR0_PMUVER_8_0:
|
||||
return GENMASK(9, 0);
|
||||
case 4: /* ARMv8.1 */
|
||||
case 5: /* ARMv8.4 */
|
||||
case 6: /* ARMv8.5 */
|
||||
case ID_AA64DFR0_PMUVER_8_1:
|
||||
case ID_AA64DFR0_PMUVER_8_4:
|
||||
case ID_AA64DFR0_PMUVER_8_5:
|
||||
return GENMASK(15, 0);
|
||||
default: /* Shouldn't be here, just for sanity */
|
||||
WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver);
|
||||
@ -795,6 +795,12 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
|
||||
base = 0;
|
||||
} else {
|
||||
val = read_sysreg(pmceid1_el0);
|
||||
/*
|
||||
* Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
|
||||
* as RAZ
|
||||
*/
|
||||
if (vcpu->kvm->arch.pmuver >= ID_AA64DFR0_PMUVER_8_4)
|
||||
val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
|
||||
base = 32;
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
* Christoffer Dall <c.dall@virtualopensystems.com>
|
||||
*/
|
||||
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/bsearch.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/mm.h>
|
||||
@ -700,14 +701,18 @@ static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
static bool access_pmceid(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
u64 pmceid;
|
||||
u64 pmceid, mask, shift;
|
||||
|
||||
BUG_ON(p->is_write);
|
||||
|
||||
if (pmu_access_el0_disabled(vcpu))
|
||||
return false;
|
||||
|
||||
get_access_mask(r, &mask, &shift);
|
||||
|
||||
pmceid = kvm_pmu_get_pmceid(vcpu, (p->Op2 & 1));
|
||||
pmceid &= mask;
|
||||
pmceid >>= shift;
|
||||
|
||||
p->regval = pmceid;
|
||||
|
||||
@ -1021,6 +1026,8 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
|
||||
return true;
|
||||
}
|
||||
|
||||
#define FEATURE(x) (GENMASK_ULL(x##_SHIFT + 3, x##_SHIFT))
|
||||
|
||||
/* Read a sanitised cpufeature ID register by sys_reg_desc */
|
||||
static u64 read_id_reg(const struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_desc const *r, bool raz)
|
||||
@ -1028,36 +1035,41 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
|
||||
u32 id = reg_to_encoding(r);
|
||||
u64 val = raz ? 0 : read_sanitised_ftr_reg(id);
|
||||
|
||||
if (id == SYS_ID_AA64PFR0_EL1) {
|
||||
switch (id) {
|
||||
case SYS_ID_AA64PFR0_EL1:
|
||||
if (!vcpu_has_sve(vcpu))
|
||||
val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT);
|
||||
val &= ~(0xfUL << ID_AA64PFR0_AMU_SHIFT);
|
||||
val &= ~(0xfUL << ID_AA64PFR0_CSV2_SHIFT);
|
||||
val |= ((u64)vcpu->kvm->arch.pfr0_csv2 << ID_AA64PFR0_CSV2_SHIFT);
|
||||
val &= ~(0xfUL << ID_AA64PFR0_CSV3_SHIFT);
|
||||
val |= ((u64)vcpu->kvm->arch.pfr0_csv3 << ID_AA64PFR0_CSV3_SHIFT);
|
||||
} else if (id == SYS_ID_AA64PFR1_EL1) {
|
||||
val &= ~(0xfUL << ID_AA64PFR1_MTE_SHIFT);
|
||||
} else if (id == SYS_ID_AA64ISAR1_EL1 && !vcpu_has_ptrauth(vcpu)) {
|
||||
val &= ~((0xfUL << ID_AA64ISAR1_APA_SHIFT) |
|
||||
(0xfUL << ID_AA64ISAR1_API_SHIFT) |
|
||||
(0xfUL << ID_AA64ISAR1_GPA_SHIFT) |
|
||||
(0xfUL << ID_AA64ISAR1_GPI_SHIFT));
|
||||
} else if (id == SYS_ID_AA64DFR0_EL1) {
|
||||
u64 cap = 0;
|
||||
|
||||
/* Limit guests to PMUv3 for ARMv8.1 */
|
||||
if (kvm_vcpu_has_pmu(vcpu))
|
||||
cap = ID_AA64DFR0_PMUVER_8_1;
|
||||
|
||||
val &= ~FEATURE(ID_AA64PFR0_SVE);
|
||||
val &= ~FEATURE(ID_AA64PFR0_AMU);
|
||||
val &= ~FEATURE(ID_AA64PFR0_CSV2);
|
||||
val |= FIELD_PREP(FEATURE(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2);
|
||||
val &= ~FEATURE(ID_AA64PFR0_CSV3);
|
||||
val |= FIELD_PREP(FEATURE(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3);
|
||||
break;
|
||||
case SYS_ID_AA64PFR1_EL1:
|
||||
val &= ~FEATURE(ID_AA64PFR1_MTE);
|
||||
break;
|
||||
case SYS_ID_AA64ISAR1_EL1:
|
||||
if (!vcpu_has_ptrauth(vcpu))
|
||||
val &= ~(FEATURE(ID_AA64ISAR1_APA) |
|
||||
FEATURE(ID_AA64ISAR1_API) |
|
||||
FEATURE(ID_AA64ISAR1_GPA) |
|
||||
FEATURE(ID_AA64ISAR1_GPI));
|
||||
break;
|
||||
case SYS_ID_AA64DFR0_EL1:
|
||||
/* Limit debug to ARMv8.0 */
|
||||
val &= ~FEATURE(ID_AA64DFR0_DEBUGVER);
|
||||
val |= FIELD_PREP(FEATURE(ID_AA64DFR0_DEBUGVER), 6);
|
||||
/* Limit guests to PMUv3 for ARMv8.4 */
|
||||
val = cpuid_feature_cap_perfmon_field(val,
|
||||
ID_AA64DFR0_PMUVER_SHIFT,
|
||||
cap);
|
||||
} else if (id == SYS_ID_DFR0_EL1) {
|
||||
/* Limit guests to PMUv3 for ARMv8.1 */
|
||||
ID_AA64DFR0_PMUVER_SHIFT,
|
||||
kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_PMUVER_8_4 : 0);
|
||||
break;
|
||||
case SYS_ID_DFR0_EL1:
|
||||
/* Limit guests to PMUv3 for ARMv8.4 */
|
||||
val = cpuid_feature_cap_perfmon_field(val,
|
||||
ID_DFR0_PERFMON_SHIFT,
|
||||
ID_DFR0_PERFMON_8_1);
|
||||
ID_DFR0_PERFMON_SHIFT,
|
||||
kvm_vcpu_has_pmu(vcpu) ? ID_DFR0_PERFMON_8_4 : 0);
|
||||
break;
|
||||
}
|
||||
|
||||
return val;
|
||||
@ -1493,6 +1505,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
.access = access_pminten, .reg = PMINTENSET_EL1 },
|
||||
{ PMU_SYS_REG(SYS_PMINTENCLR_EL1),
|
||||
.access = access_pminten, .reg = PMINTENSET_EL1 },
|
||||
{ SYS_DESC(SYS_PMMIR_EL1), trap_raz_wi },
|
||||
|
||||
{ SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 },
|
||||
{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
|
||||
@ -1720,7 +1733,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
{ SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x700 },
|
||||
};
|
||||
|
||||
static bool trap_dbgidr(struct kvm_vcpu *vcpu,
|
||||
static bool trap_dbgdidr(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
@ -1734,7 +1747,7 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu,
|
||||
p->regval = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) |
|
||||
(((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) |
|
||||
(((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20)
|
||||
| (6 << 16) | (el3 << 14) | (el3 << 12));
|
||||
| (6 << 16) | (1 << 15) | (el3 << 14) | (el3 << 12));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -1767,8 +1780,8 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu,
|
||||
* guest. Revisit this one day, would this principle change.
|
||||
*/
|
||||
static const struct sys_reg_desc cp14_regs[] = {
|
||||
/* DBGIDR */
|
||||
{ Op1( 0), CRn( 0), CRm( 0), Op2( 0), trap_dbgidr },
|
||||
/* DBGDIDR */
|
||||
{ Op1( 0), CRn( 0), CRm( 0), Op2( 0), trap_dbgdidr },
|
||||
/* DBGDTRRXext */
|
||||
{ Op1( 0), CRn( 0), CRm( 0), Op2( 2), trap_raz_wi },
|
||||
|
||||
@ -1918,8 +1931,8 @@ static const struct sys_reg_desc cp15_regs[] = {
|
||||
{ Op1( 0), CRn( 9), CRm(12), Op2( 3), access_pmovs },
|
||||
{ Op1( 0), CRn( 9), CRm(12), Op2( 4), access_pmswinc },
|
||||
{ Op1( 0), CRn( 9), CRm(12), Op2( 5), access_pmselr },
|
||||
{ Op1( 0), CRn( 9), CRm(12), Op2( 6), access_pmceid },
|
||||
{ Op1( 0), CRn( 9), CRm(12), Op2( 7), access_pmceid },
|
||||
{ AA32(LO), Op1( 0), CRn( 9), CRm(12), Op2( 6), access_pmceid },
|
||||
{ AA32(LO), Op1( 0), CRn( 9), CRm(12), Op2( 7), access_pmceid },
|
||||
{ Op1( 0), CRn( 9), CRm(13), Op2( 0), access_pmu_evcntr },
|
||||
{ Op1( 0), CRn( 9), CRm(13), Op2( 1), access_pmu_evtyper },
|
||||
{ Op1( 0), CRn( 9), CRm(13), Op2( 2), access_pmu_evcntr },
|
||||
@ -1927,6 +1940,10 @@ static const struct sys_reg_desc cp15_regs[] = {
|
||||
{ Op1( 0), CRn( 9), CRm(14), Op2( 1), access_pminten },
|
||||
{ Op1( 0), CRn( 9), CRm(14), Op2( 2), access_pminten },
|
||||
{ Op1( 0), CRn( 9), CRm(14), Op2( 3), access_pmovs },
|
||||
{ AA32(HI), Op1( 0), CRn( 9), CRm(14), Op2( 4), access_pmceid },
|
||||
{ AA32(HI), Op1( 0), CRn( 9), CRm(14), Op2( 5), access_pmceid },
|
||||
/* PMMIR */
|
||||
{ Op1( 0), CRn( 9), CRm(14), Op2( 6), trap_raz_wi },
|
||||
|
||||
/* PRRR/MAIR0 */
|
||||
{ AA32(LO), Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, MAIR_EL1 },
|
||||
|
85
arch/arm64/kvm/trng.c
Normal file
85
arch/arm64/kvm/trng.c
Normal file
@ -0,0 +1,85 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
// Copyright (C) 2020 Arm Ltd.
|
||||
|
||||
#include <linux/arm-smccc.h>
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
#include <asm/kvm_emulate.h>
|
||||
|
||||
#include <kvm/arm_hypercalls.h>
|
||||
|
||||
#define ARM_SMCCC_TRNG_VERSION_1_0 0x10000UL
|
||||
|
||||
/* Those values are deliberately separate from the generic SMCCC definitions. */
|
||||
#define TRNG_SUCCESS 0UL
|
||||
#define TRNG_NOT_SUPPORTED ((unsigned long)-1)
|
||||
#define TRNG_INVALID_PARAMETER ((unsigned long)-2)
|
||||
#define TRNG_NO_ENTROPY ((unsigned long)-3)
|
||||
|
||||
#define TRNG_MAX_BITS64 192
|
||||
|
||||
static const uuid_t arm_smc_trng_uuid __aligned(4) = UUID_INIT(
|
||||
0x0d21e000, 0x4384, 0x11eb, 0x80, 0x70, 0x52, 0x44, 0x55, 0x4e, 0x5a, 0x4c);
|
||||
|
||||
static int kvm_trng_do_rnd(struct kvm_vcpu *vcpu, int size)
|
||||
{
|
||||
DECLARE_BITMAP(bits, TRNG_MAX_BITS64);
|
||||
u32 num_bits = smccc_get_arg1(vcpu);
|
||||
int i;
|
||||
|
||||
if (num_bits > 3 * size) {
|
||||
smccc_set_retval(vcpu, TRNG_INVALID_PARAMETER, 0, 0, 0);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* get as many bits as we need to fulfil the request */
|
||||
for (i = 0; i < DIV_ROUND_UP(num_bits, BITS_PER_LONG); i++)
|
||||
bits[i] = get_random_long();
|
||||
|
||||
bitmap_clear(bits, num_bits, TRNG_MAX_BITS64 - num_bits);
|
||||
|
||||
if (size == 32)
|
||||
smccc_set_retval(vcpu, TRNG_SUCCESS, lower_32_bits(bits[1]),
|
||||
upper_32_bits(bits[0]), lower_32_bits(bits[0]));
|
||||
else
|
||||
smccc_set_retval(vcpu, TRNG_SUCCESS, bits[2], bits[1], bits[0]);
|
||||
|
||||
memzero_explicit(bits, sizeof(bits));
|
||||
return 1;
|
||||
}
|
||||
|
||||
int kvm_trng_call(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
const __le32 *u = (__le32 *)arm_smc_trng_uuid.b;
|
||||
u32 func_id = smccc_get_function(vcpu);
|
||||
unsigned long val = TRNG_NOT_SUPPORTED;
|
||||
int size = 64;
|
||||
|
||||
switch (func_id) {
|
||||
case ARM_SMCCC_TRNG_VERSION:
|
||||
val = ARM_SMCCC_TRNG_VERSION_1_0;
|
||||
break;
|
||||
case ARM_SMCCC_TRNG_FEATURES:
|
||||
switch (smccc_get_arg1(vcpu)) {
|
||||
case ARM_SMCCC_TRNG_VERSION:
|
||||
case ARM_SMCCC_TRNG_FEATURES:
|
||||
case ARM_SMCCC_TRNG_GET_UUID:
|
||||
case ARM_SMCCC_TRNG_RND32:
|
||||
case ARM_SMCCC_TRNG_RND64:
|
||||
val = TRNG_SUCCESS;
|
||||
}
|
||||
break;
|
||||
case ARM_SMCCC_TRNG_GET_UUID:
|
||||
smccc_set_retval(vcpu, le32_to_cpu(u[0]), le32_to_cpu(u[1]),
|
||||
le32_to_cpu(u[2]), le32_to_cpu(u[3]));
|
||||
return 1;
|
||||
case ARM_SMCCC_TRNG_RND32:
|
||||
size = 32;
|
||||
fallthrough;
|
||||
case ARM_SMCCC_TRNG_RND64:
|
||||
return kvm_trng_do_rnd(vcpu, size);
|
||||
}
|
||||
|
||||
smccc_set_retval(vcpu, val, 0, 0, 0);
|
||||
return 1;
|
||||
}
|
@ -81,6 +81,34 @@ __init void kvm_compute_layout(void)
|
||||
init_hyp_physvirt_offset();
|
||||
}
|
||||
|
||||
/*
|
||||
* The .hyp.reloc ELF section contains a list of kimg positions that
|
||||
* contains kimg VAs but will be accessed only in hyp execution context.
|
||||
* Convert them to hyp VAs. See gen-hyprel.c for more details.
|
||||
*/
|
||||
__init void kvm_apply_hyp_relocations(void)
|
||||
{
|
||||
int32_t *rel;
|
||||
int32_t *begin = (int32_t *)__hyp_reloc_begin;
|
||||
int32_t *end = (int32_t *)__hyp_reloc_end;
|
||||
|
||||
for (rel = begin; rel < end; ++rel) {
|
||||
uintptr_t *ptr, kimg_va;
|
||||
|
||||
/*
|
||||
* Each entry contains a 32-bit relative offset from itself
|
||||
* to a kimg VA position.
|
||||
*/
|
||||
ptr = (uintptr_t *)lm_alias((char *)rel + *rel);
|
||||
|
||||
/* Read the kimg VA value at the relocation address. */
|
||||
kimg_va = *ptr;
|
||||
|
||||
/* Convert to hyp VA and store back to the relocation address. */
|
||||
*ptr = __early_kern_hyp_va((uintptr_t)lm_alias(kimg_va));
|
||||
}
|
||||
}
|
||||
|
||||
static u32 compute_instruction(int n, u32 rd, u32 rn)
|
||||
{
|
||||
u32 insn = AARCH64_BREAK_FAULT;
|
||||
@ -255,12 +283,6 @@ static void generate_mov_q(u64 val, __le32 *origptr, __le32 *updptr, int nr_inst
|
||||
*updptr++ = cpu_to_le32(insn);
|
||||
}
|
||||
|
||||
void kvm_update_kimg_phys_offset(struct alt_instr *alt,
|
||||
__le32 *origptr, __le32 *updptr, int nr_inst)
|
||||
{
|
||||
generate_mov_q(kimage_voffset + PHYS_OFFSET, origptr, updptr, nr_inst);
|
||||
}
|
||||
|
||||
void kvm_get_kimage_voffset(struct alt_instr *alt,
|
||||
__le32 *origptr, __le32 *updptr, int nr_inst)
|
||||
{
|
||||
|
@ -83,7 +83,6 @@
|
||||
|
||||
|
||||
#define KVM_MAX_VCPUS 16
|
||||
#define KVM_USER_MEM_SLOTS 16
|
||||
/* memory slots that does not exposed to userspace */
|
||||
#define KVM_PRIVATE_MEM_SLOTS 0
|
||||
|
||||
|
@ -10,7 +10,6 @@
|
||||
#define _ASM_SPINLOCK_H
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/qrwlock.h>
|
||||
|
||||
#include <asm-generic/qspinlock_types.h>
|
||||
|
||||
@ -27,5 +26,6 @@ static inline void queued_spin_unlock(struct qspinlock *lock)
|
||||
}
|
||||
|
||||
#include <asm/qspinlock.h>
|
||||
#include <asm/qrwlock.h>
|
||||
|
||||
#endif /* _ASM_SPINLOCK_H */
|
||||
|
@ -535,9 +535,12 @@ struct h_cpu_char_result {
|
||||
u64 behaviour;
|
||||
};
|
||||
|
||||
/* Register state for entering a nested guest with H_ENTER_NESTED */
|
||||
/*
|
||||
* Register state for entering a nested guest with H_ENTER_NESTED.
|
||||
* New member must be added at the end.
|
||||
*/
|
||||
struct hv_guest_state {
|
||||
u64 version; /* version of this structure layout */
|
||||
u64 version; /* version of this structure layout, must be first */
|
||||
u32 lpid;
|
||||
u32 vcpu_token;
|
||||
/* These registers are hypervisor privileged (at least for writing) */
|
||||
@ -566,10 +569,26 @@ struct hv_guest_state {
|
||||
u64 pidr;
|
||||
u64 cfar;
|
||||
u64 ppr;
|
||||
/* Version 1 ends here */
|
||||
u64 dawr1;
|
||||
u64 dawrx1;
|
||||
/* Version 2 ends here */
|
||||
};
|
||||
|
||||
/* Latest version of hv_guest_state structure */
|
||||
#define HV_GUEST_STATE_VERSION 1
|
||||
#define HV_GUEST_STATE_VERSION 2
|
||||
|
||||
static inline int hv_guest_state_size(unsigned int version)
|
||||
{
|
||||
switch (version) {
|
||||
case 1:
|
||||
return offsetofend(struct hv_guest_state, ppr);
|
||||
case 2:
|
||||
return offsetofend(struct hv_guest_state, dawrx1);
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* From the document "H_GetPerformanceCounterInfo Interface" v1.07
|
||||
|
@ -74,16 +74,6 @@ struct kvm_split_mode {
|
||||
u8 do_nap;
|
||||
u8 napped[MAX_SMT_THREADS];
|
||||
struct kvmppc_vcore *vc[MAX_SUBCORES];
|
||||
/* Bits for changing lpcr on P9 */
|
||||
unsigned long lpcr_req;
|
||||
unsigned long lpidr_req;
|
||||
unsigned long host_lpcr;
|
||||
u32 do_set;
|
||||
u32 do_restore;
|
||||
union {
|
||||
u32 allphases;
|
||||
u8 phase[4];
|
||||
} lpcr_sync;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -110,7 +100,6 @@ struct kvmppc_host_state {
|
||||
u8 hwthread_state;
|
||||
u8 host_ipi;
|
||||
u8 ptid; /* thread number within subcore when split */
|
||||
u8 tid; /* thread number within whole core */
|
||||
u8 fake_suspend;
|
||||
struct kvm_vcpu *kvm_vcpu;
|
||||
struct kvmppc_vcore *kvm_vcore;
|
||||
|
@ -28,7 +28,6 @@
|
||||
|
||||
#define KVM_MAX_VCPUS NR_CPUS
|
||||
#define KVM_MAX_VCORES NR_CPUS
|
||||
#define KVM_USER_MEM_SLOTS 512
|
||||
|
||||
#include <asm/cputhreads.h>
|
||||
|
||||
@ -307,6 +306,7 @@ struct kvm_arch {
|
||||
u8 svm_enabled;
|
||||
bool threads_indep;
|
||||
bool nested_enable;
|
||||
bool dawr1_enabled;
|
||||
pgd_t *pgtable;
|
||||
u64 process_table;
|
||||
struct dentry *debugfs_dir;
|
||||
@ -584,8 +584,10 @@ struct kvm_vcpu_arch {
|
||||
u32 ctrl;
|
||||
u32 dabrx;
|
||||
ulong dabr;
|
||||
ulong dawr;
|
||||
ulong dawrx;
|
||||
ulong dawr0;
|
||||
ulong dawrx0;
|
||||
ulong dawr1;
|
||||
ulong dawrx1;
|
||||
ulong ciabr;
|
||||
ulong cfar;
|
||||
ulong ppr;
|
||||
|
@ -314,6 +314,8 @@ struct kvmppc_ops {
|
||||
int size);
|
||||
int (*enable_svm)(struct kvm *kvm);
|
||||
int (*svm_off)(struct kvm *kvm);
|
||||
int (*enable_dawr1)(struct kvm *kvm);
|
||||
bool (*hash_v3_possible)(void);
|
||||
};
|
||||
|
||||
extern struct kvmppc_ops *kvmppc_hv_ops;
|
||||
|
@ -644,6 +644,8 @@ struct kvm_ppc_cpu_char {
|
||||
#define KVM_REG_PPC_MMCR3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1)
|
||||
#define KVM_REG_PPC_SIER2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2)
|
||||
#define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3)
|
||||
#define KVM_REG_PPC_DAWR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4)
|
||||
#define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5)
|
||||
|
||||
/* Transactional Memory checkpointed state:
|
||||
* This is all GPRs, all VSX regs and a subset of SPRs
|
||||
|
@ -526,8 +526,10 @@ int main(void)
|
||||
OFFSET(VCPU_CTRL, kvm_vcpu, arch.ctrl);
|
||||
OFFSET(VCPU_DABR, kvm_vcpu, arch.dabr);
|
||||
OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx);
|
||||
OFFSET(VCPU_DAWR, kvm_vcpu, arch.dawr);
|
||||
OFFSET(VCPU_DAWRX, kvm_vcpu, arch.dawrx);
|
||||
OFFSET(VCPU_DAWR0, kvm_vcpu, arch.dawr0);
|
||||
OFFSET(VCPU_DAWRX0, kvm_vcpu, arch.dawrx0);
|
||||
OFFSET(VCPU_DAWR1, kvm_vcpu, arch.dawr1);
|
||||
OFFSET(VCPU_DAWRX1, kvm_vcpu, arch.dawrx1);
|
||||
OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr);
|
||||
OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags);
|
||||
OFFSET(VCPU_DEC, kvm_vcpu, arch.dec);
|
||||
@ -668,7 +670,6 @@ int main(void)
|
||||
HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
|
||||
HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
|
||||
HSTATE_FIELD(HSTATE_PTID, ptid);
|
||||
HSTATE_FIELD(HSTATE_TID, tid);
|
||||
HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend);
|
||||
HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]);
|
||||
HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]);
|
||||
@ -698,8 +699,6 @@ int main(void)
|
||||
OFFSET(KVM_SPLIT_LDBAR, kvm_split_mode, ldbar);
|
||||
OFFSET(KVM_SPLIT_DO_NAP, kvm_split_mode, do_nap);
|
||||
OFFSET(KVM_SPLIT_NAPPED, kvm_split_mode, napped);
|
||||
OFFSET(KVM_SPLIT_DO_SET, kvm_split_mode, do_set);
|
||||
OFFSET(KVM_SPLIT_DO_RESTORE, kvm_split_mode, do_restore);
|
||||
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
|
@ -134,7 +134,7 @@ static inline bool nesting_enabled(struct kvm *kvm)
|
||||
}
|
||||
|
||||
/* If set, the threads on each CPU core have to be in the same MMU mode */
|
||||
static bool no_mixing_hpt_and_radix;
|
||||
static bool no_mixing_hpt_and_radix __read_mostly;
|
||||
|
||||
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
|
||||
|
||||
@ -782,8 +782,24 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
|
||||
return H_UNSUPPORTED_FLAG_START;
|
||||
if (value2 & DABRX_HYP)
|
||||
return H_P4;
|
||||
vcpu->arch.dawr = value1;
|
||||
vcpu->arch.dawrx = value2;
|
||||
vcpu->arch.dawr0 = value1;
|
||||
vcpu->arch.dawrx0 = value2;
|
||||
return H_SUCCESS;
|
||||
case H_SET_MODE_RESOURCE_SET_DAWR1:
|
||||
if (!kvmppc_power8_compatible(vcpu))
|
||||
return H_P2;
|
||||
if (!ppc_breakpoint_available())
|
||||
return H_P2;
|
||||
if (!cpu_has_feature(CPU_FTR_DAWR1))
|
||||
return H_P2;
|
||||
if (!vcpu->kvm->arch.dawr1_enabled)
|
||||
return H_FUNCTION;
|
||||
if (mflags)
|
||||
return H_UNSUPPORTED_FLAG_START;
|
||||
if (value2 & DABRX_HYP)
|
||||
return H_P4;
|
||||
vcpu->arch.dawr1 = value1;
|
||||
vcpu->arch.dawrx1 = value2;
|
||||
return H_SUCCESS;
|
||||
case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
|
||||
/* KVM does not support mflags=2 (AIL=2) */
|
||||
@ -1759,10 +1775,16 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
|
||||
*val = get_reg_val(id, vcpu->arch.vcore->vtb);
|
||||
break;
|
||||
case KVM_REG_PPC_DAWR:
|
||||
*val = get_reg_val(id, vcpu->arch.dawr);
|
||||
*val = get_reg_val(id, vcpu->arch.dawr0);
|
||||
break;
|
||||
case KVM_REG_PPC_DAWRX:
|
||||
*val = get_reg_val(id, vcpu->arch.dawrx);
|
||||
*val = get_reg_val(id, vcpu->arch.dawrx0);
|
||||
break;
|
||||
case KVM_REG_PPC_DAWR1:
|
||||
*val = get_reg_val(id, vcpu->arch.dawr1);
|
||||
break;
|
||||
case KVM_REG_PPC_DAWRX1:
|
||||
*val = get_reg_val(id, vcpu->arch.dawrx1);
|
||||
break;
|
||||
case KVM_REG_PPC_CIABR:
|
||||
*val = get_reg_val(id, vcpu->arch.ciabr);
|
||||
@ -1991,10 +2013,16 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
|
||||
vcpu->arch.vcore->vtb = set_reg_val(id, *val);
|
||||
break;
|
||||
case KVM_REG_PPC_DAWR:
|
||||
vcpu->arch.dawr = set_reg_val(id, *val);
|
||||
vcpu->arch.dawr0 = set_reg_val(id, *val);
|
||||
break;
|
||||
case KVM_REG_PPC_DAWRX:
|
||||
vcpu->arch.dawrx = set_reg_val(id, *val) & ~DAWRX_HYP;
|
||||
vcpu->arch.dawrx0 = set_reg_val(id, *val) & ~DAWRX_HYP;
|
||||
break;
|
||||
case KVM_REG_PPC_DAWR1:
|
||||
vcpu->arch.dawr1 = set_reg_val(id, *val);
|
||||
break;
|
||||
case KVM_REG_PPC_DAWRX1:
|
||||
vcpu->arch.dawrx1 = set_reg_val(id, *val) & ~DAWRX_HYP;
|
||||
break;
|
||||
case KVM_REG_PPC_CIABR:
|
||||
vcpu->arch.ciabr = set_reg_val(id, *val);
|
||||
@ -2862,11 +2890,6 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
|
||||
if (one_vm_per_core && vc->kvm != cip->vc[0]->kvm)
|
||||
return false;
|
||||
|
||||
/* Some POWER9 chips require all threads to be in the same MMU mode */
|
||||
if (no_mixing_hpt_and_radix &&
|
||||
kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm))
|
||||
return false;
|
||||
|
||||
if (n_threads < cip->max_subcore_threads)
|
||||
n_threads = cip->max_subcore_threads;
|
||||
if (!subcore_config_ok(cip->n_subcores + 1, n_threads))
|
||||
@ -2905,6 +2928,9 @@ static void prepare_threads(struct kvmppc_vcore *vc)
|
||||
for_each_runnable_thread(i, vcpu, vc) {
|
||||
if (signal_pending(vcpu->arch.run_task))
|
||||
vcpu->arch.ret = -EINTR;
|
||||
else if (no_mixing_hpt_and_radix &&
|
||||
kvm_is_radix(vc->kvm) != radix_enabled())
|
||||
vcpu->arch.ret = -EINVAL;
|
||||
else if (vcpu->arch.vpa.update_pending ||
|
||||
vcpu->arch.slb_shadow.update_pending ||
|
||||
vcpu->arch.dtl.update_pending)
|
||||
@ -3110,7 +3136,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
||||
int controlled_threads;
|
||||
int trap;
|
||||
bool is_power8;
|
||||
bool hpt_on_radix;
|
||||
|
||||
/*
|
||||
* Remove from the list any threads that have a signal pending
|
||||
@ -3143,11 +3168,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
||||
* this is a HPT guest on a radix host machine where the
|
||||
* CPU threads may not be in different MMU modes.
|
||||
*/
|
||||
hpt_on_radix = no_mixing_hpt_and_radix && radix_enabled() &&
|
||||
!kvm_is_radix(vc->kvm);
|
||||
if (((controlled_threads > 1) &&
|
||||
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) ||
|
||||
(hpt_on_radix && vc->kvm->arch.threads_indep)) {
|
||||
if ((controlled_threads > 1) &&
|
||||
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
|
||||
for_each_runnable_thread(i, vcpu, vc) {
|
||||
vcpu->arch.ret = -EBUSY;
|
||||
kvmppc_remove_runnable(vc, vcpu);
|
||||
@ -3215,7 +3237,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
||||
is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S)
|
||||
&& !cpu_has_feature(CPU_FTR_ARCH_300);
|
||||
|
||||
if (split > 1 || hpt_on_radix) {
|
||||
if (split > 1) {
|
||||
sip = &split_info;
|
||||
memset(&split_info, 0, sizeof(split_info));
|
||||
for (sub = 0; sub < core_info.n_subcores; ++sub)
|
||||
@ -3237,13 +3259,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
||||
split_info.subcore_size = subcore_size;
|
||||
} else {
|
||||
split_info.subcore_size = 1;
|
||||
if (hpt_on_radix) {
|
||||
/* Use the split_info for LPCR/LPIDR changes */
|
||||
split_info.lpcr_req = vc->lpcr;
|
||||
split_info.lpidr_req = vc->kvm->arch.lpid;
|
||||
split_info.host_lpcr = vc->kvm->arch.host_lpcr;
|
||||
split_info.do_set = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* order writes to split_info before kvm_split_mode pointer */
|
||||
@ -3253,7 +3268,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
||||
for (thr = 0; thr < controlled_threads; ++thr) {
|
||||
struct paca_struct *paca = paca_ptrs[pcpu + thr];
|
||||
|
||||
paca->kvm_hstate.tid = thr;
|
||||
paca->kvm_hstate.napping = 0;
|
||||
paca->kvm_hstate.kvm_split_mode = sip;
|
||||
}
|
||||
@ -3327,10 +3341,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
||||
* When doing micro-threading, poke the inactive threads as well.
|
||||
* This gets them to the nap instruction after kvm_do_nap,
|
||||
* which reduces the time taken to unsplit later.
|
||||
* For POWER9 HPT guest on radix host, we need all the secondary
|
||||
* threads woken up so they can do the LPCR/LPIDR change.
|
||||
*/
|
||||
if (cmd_bit || hpt_on_radix) {
|
||||
if (cmd_bit) {
|
||||
split_info.do_nap = 1; /* ask secondaries to nap when done */
|
||||
for (thr = 1; thr < threads_per_subcore; ++thr)
|
||||
if (!(active & (1 << thr)))
|
||||
@ -3391,19 +3403,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
||||
cpu_relax();
|
||||
++loops;
|
||||
}
|
||||
} else if (hpt_on_radix) {
|
||||
/* Wait for all threads to have seen final sync */
|
||||
for (thr = 1; thr < controlled_threads; ++thr) {
|
||||
struct paca_struct *paca = paca_ptrs[pcpu + thr];
|
||||
|
||||
while (paca->kvm_hstate.kvm_split_mode) {
|
||||
HMT_low();
|
||||
barrier();
|
||||
}
|
||||
HMT_medium();
|
||||
}
|
||||
split_info.do_nap = 0;
|
||||
}
|
||||
split_info.do_nap = 0;
|
||||
|
||||
kvmppc_set_host_core(pcpu);
|
||||
|
||||
@ -3449,10 +3450,17 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
|
||||
int trap;
|
||||
unsigned long host_hfscr = mfspr(SPRN_HFSCR);
|
||||
unsigned long host_ciabr = mfspr(SPRN_CIABR);
|
||||
unsigned long host_dawr = mfspr(SPRN_DAWR0);
|
||||
unsigned long host_dawrx = mfspr(SPRN_DAWRX0);
|
||||
unsigned long host_dawr0 = mfspr(SPRN_DAWR0);
|
||||
unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0);
|
||||
unsigned long host_psscr = mfspr(SPRN_PSSCR);
|
||||
unsigned long host_pidr = mfspr(SPRN_PID);
|
||||
unsigned long host_dawr1 = 0;
|
||||
unsigned long host_dawrx1 = 0;
|
||||
|
||||
if (cpu_has_feature(CPU_FTR_DAWR1)) {
|
||||
host_dawr1 = mfspr(SPRN_DAWR1);
|
||||
host_dawrx1 = mfspr(SPRN_DAWRX1);
|
||||
}
|
||||
|
||||
/*
|
||||
* P8 and P9 suppress the HDEC exception when LPCR[HDICE] = 0,
|
||||
@ -3489,8 +3497,12 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
|
||||
mtspr(SPRN_SPURR, vcpu->arch.spurr);
|
||||
|
||||
if (dawr_enabled()) {
|
||||
mtspr(SPRN_DAWR0, vcpu->arch.dawr);
|
||||
mtspr(SPRN_DAWRX0, vcpu->arch.dawrx);
|
||||
mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
|
||||
mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
|
||||
if (cpu_has_feature(CPU_FTR_DAWR1)) {
|
||||
mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
|
||||
mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
|
||||
}
|
||||
}
|
||||
mtspr(SPRN_CIABR, vcpu->arch.ciabr);
|
||||
mtspr(SPRN_IC, vcpu->arch.ic);
|
||||
@ -3542,8 +3554,12 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
|
||||
(local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
|
||||
mtspr(SPRN_HFSCR, host_hfscr);
|
||||
mtspr(SPRN_CIABR, host_ciabr);
|
||||
mtspr(SPRN_DAWR0, host_dawr);
|
||||
mtspr(SPRN_DAWRX0, host_dawrx);
|
||||
mtspr(SPRN_DAWR0, host_dawr0);
|
||||
mtspr(SPRN_DAWRX0, host_dawrx0);
|
||||
if (cpu_has_feature(CPU_FTR_DAWR1)) {
|
||||
mtspr(SPRN_DAWR1, host_dawr1);
|
||||
mtspr(SPRN_DAWRX1, host_dawrx1);
|
||||
}
|
||||
mtspr(SPRN_PID, host_pidr);
|
||||
|
||||
/*
|
||||
@ -3595,6 +3611,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
|
||||
unsigned long host_tidr = mfspr(SPRN_TIDR);
|
||||
unsigned long host_iamr = mfspr(SPRN_IAMR);
|
||||
unsigned long host_amr = mfspr(SPRN_AMR);
|
||||
unsigned long host_fscr = mfspr(SPRN_FSCR);
|
||||
s64 dec;
|
||||
u64 tb;
|
||||
int trap, save_pmu;
|
||||
@ -3735,6 +3752,9 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
|
||||
if (host_amr != vcpu->arch.amr)
|
||||
mtspr(SPRN_AMR, host_amr);
|
||||
|
||||
if (host_fscr != vcpu->arch.fscr)
|
||||
mtspr(SPRN_FSCR, host_fscr);
|
||||
|
||||
msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
|
||||
store_fp_state(&vcpu->arch.fp);
|
||||
#ifdef CONFIG_ALTIVEC
|
||||
@ -4173,7 +4193,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
|
||||
|
||||
kvmppc_clear_host_core(pcpu);
|
||||
|
||||
local_paca->kvm_hstate.tid = 0;
|
||||
local_paca->kvm_hstate.napping = 0;
|
||||
local_paca->kvm_hstate.kvm_split_mode = NULL;
|
||||
kvmppc_start_thread(vcpu, vc);
|
||||
@ -4358,15 +4377,11 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
|
||||
|
||||
do {
|
||||
/*
|
||||
* The early POWER9 chips that can't mix radix and HPT threads
|
||||
* on the same core also need the workaround for the problem
|
||||
* where the TLB would prefetch entries in the guest exit path
|
||||
* for radix guests using the guest PIDR value and LPID 0.
|
||||
* The workaround is in the old path (kvmppc_run_vcpu())
|
||||
* but not the new path (kvmhv_run_single_vcpu()).
|
||||
* The TLB prefetch bug fixup is only in the kvmppc_run_vcpu
|
||||
* path, which also handles hash and dependent threads mode.
|
||||
*/
|
||||
if (kvm->arch.threads_indep && kvm_is_radix(kvm) &&
|
||||
!no_mixing_hpt_and_radix)
|
||||
!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
|
||||
r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
|
||||
vcpu->arch.vcore->lpcr);
|
||||
else
|
||||
@ -5599,6 +5614,26 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvmhv_enable_dawr1(struct kvm *kvm)
|
||||
{
|
||||
if (!cpu_has_feature(CPU_FTR_DAWR1))
|
||||
return -ENODEV;
|
||||
|
||||
/* kvm == NULL means the caller is testing if the capability exists */
|
||||
if (kvm)
|
||||
kvm->arch.dawr1_enabled = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool kvmppc_hash_v3_possible(void)
|
||||
{
|
||||
if (radix_enabled() && no_mixing_hpt_and_radix)
|
||||
return false;
|
||||
|
||||
return cpu_has_feature(CPU_FTR_ARCH_300) &&
|
||||
cpu_has_feature(CPU_FTR_HVMODE);
|
||||
}
|
||||
|
||||
static struct kvmppc_ops kvm_ops_hv = {
|
||||
.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
|
||||
.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
|
||||
@ -5642,6 +5677,8 @@ static struct kvmppc_ops kvm_ops_hv = {
|
||||
.store_to_eaddr = kvmhv_store_to_eaddr,
|
||||
.enable_svm = kvmhv_enable_svm,
|
||||
.svm_off = kvmhv_svm_off,
|
||||
.enable_dawr1 = kvmhv_enable_dawr1,
|
||||
.hash_v3_possible = kvmppc_hash_v3_possible,
|
||||
};
|
||||
|
||||
static int kvm_init_subcore_bitmap(void)
|
||||
|
@ -277,8 +277,7 @@ void kvmhv_commence_exit(int trap)
|
||||
struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
|
||||
int ptid = local_paca->kvm_hstate.ptid;
|
||||
struct kvm_split_mode *sip = local_paca->kvm_hstate.kvm_split_mode;
|
||||
int me, ee, i, t;
|
||||
int cpu0;
|
||||
int me, ee, i;
|
||||
|
||||
/* Set our bit in the threads-exiting-guest map in the 0xff00
|
||||
bits of vcore->entry_exit_map */
|
||||
@ -320,22 +319,6 @@ void kvmhv_commence_exit(int trap)
|
||||
if ((ee >> 8) == 0)
|
||||
kvmhv_interrupt_vcore(vc, ee);
|
||||
}
|
||||
|
||||
/*
|
||||
* On POWER9 when running a HPT guest on a radix host (sip != NULL),
|
||||
* we have to interrupt inactive CPU threads to get them to
|
||||
* restore the host LPCR value.
|
||||
*/
|
||||
if (sip->lpcr_req) {
|
||||
if (cmpxchg(&sip->do_restore, 0, 1) == 0) {
|
||||
vc = local_paca->kvm_hstate.kvm_vcore;
|
||||
cpu0 = vc->pcpu + ptid - local_paca->kvm_hstate.tid;
|
||||
for (t = 1; t < threads_per_core; ++t) {
|
||||
if (sip->napped[t])
|
||||
kvmhv_rm_send_ipi(cpu0 + t);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
|
||||
@ -667,95 +650,6 @@ void kvmppc_bad_interrupt(struct pt_regs *regs)
|
||||
panic("Bad KVM trap");
|
||||
}
|
||||
|
||||
/*
|
||||
* Functions used to switch LPCR HR and UPRT bits on all threads
|
||||
* when entering and exiting HPT guests on a radix host.
|
||||
*/
|
||||
|
||||
#define PHASE_REALMODE 1 /* in real mode */
|
||||
#define PHASE_SET_LPCR 2 /* have set LPCR */
|
||||
#define PHASE_OUT_OF_GUEST 4 /* have finished executing in guest */
|
||||
#define PHASE_RESET_LPCR 8 /* have reset LPCR to host value */
|
||||
|
||||
#define ALL(p) (((p) << 24) | ((p) << 16) | ((p) << 8) | (p))
|
||||
|
||||
static void wait_for_sync(struct kvm_split_mode *sip, int phase)
|
||||
{
|
||||
int thr = local_paca->kvm_hstate.tid;
|
||||
|
||||
sip->lpcr_sync.phase[thr] |= phase;
|
||||
phase = ALL(phase);
|
||||
while ((sip->lpcr_sync.allphases & phase) != phase) {
|
||||
HMT_low();
|
||||
barrier();
|
||||
}
|
||||
HMT_medium();
|
||||
}
|
||||
|
||||
void kvmhv_p9_set_lpcr(struct kvm_split_mode *sip)
|
||||
{
|
||||
int num_sets;
|
||||
unsigned long rb, set;
|
||||
|
||||
/* wait for every other thread to get to real mode */
|
||||
wait_for_sync(sip, PHASE_REALMODE);
|
||||
|
||||
/* Set LPCR and LPIDR */
|
||||
mtspr(SPRN_LPCR, sip->lpcr_req);
|
||||
mtspr(SPRN_LPID, sip->lpidr_req);
|
||||
isync();
|
||||
|
||||
/*
|
||||
* P10 will flush all the congruence class with a single tlbiel
|
||||
*/
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_31))
|
||||
num_sets = 1;
|
||||
else
|
||||
num_sets = POWER9_TLB_SETS_RADIX;
|
||||
|
||||
/* Invalidate the TLB on thread 0 */
|
||||
if (local_paca->kvm_hstate.tid == 0) {
|
||||
sip->do_set = 0;
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
for (set = 0; set < num_sets; ++set) {
|
||||
rb = TLBIEL_INVAL_SET_LPID +
|
||||
(set << TLBIEL_INVAL_SET_SHIFT);
|
||||
asm volatile(PPC_TLBIEL(%0, %1, 0, 0, 0) : :
|
||||
"r" (rb), "r" (0));
|
||||
}
|
||||
asm volatile("ptesync" : : : "memory");
|
||||
}
|
||||
|
||||
/* indicate that we have done so and wait for others */
|
||||
wait_for_sync(sip, PHASE_SET_LPCR);
|
||||
/* order read of sip->lpcr_sync.allphases vs. sip->do_set */
|
||||
smp_rmb();
|
||||
}
|
||||
|
||||
/*
|
||||
* Called when a thread that has been in the guest needs
|
||||
* to reload the host LPCR value - but only on POWER9 when
|
||||
* running a HPT guest on a radix host.
|
||||
*/
|
||||
void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip)
|
||||
{
|
||||
/* we're out of the guest... */
|
||||
wait_for_sync(sip, PHASE_OUT_OF_GUEST);
|
||||
|
||||
mtspr(SPRN_LPID, 0);
|
||||
mtspr(SPRN_LPCR, sip->host_lpcr);
|
||||
isync();
|
||||
|
||||
if (local_paca->kvm_hstate.tid == 0) {
|
||||
sip->do_restore = 0;
|
||||
smp_wmb(); /* order store of do_restore vs. phase */
|
||||
}
|
||||
|
||||
wait_for_sync(sip, PHASE_RESET_LPCR);
|
||||
smp_mb();
|
||||
local_paca->kvm_hstate.kvm_split_mode = NULL;
|
||||
}
|
||||
|
||||
static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.ceded = 0;
|
||||
|
@ -33,8 +33,8 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
|
||||
hr->dpdes = vc->dpdes;
|
||||
hr->hfscr = vcpu->arch.hfscr;
|
||||
hr->tb_offset = vc->tb_offset;
|
||||
hr->dawr0 = vcpu->arch.dawr;
|
||||
hr->dawrx0 = vcpu->arch.dawrx;
|
||||
hr->dawr0 = vcpu->arch.dawr0;
|
||||
hr->dawrx0 = vcpu->arch.dawrx0;
|
||||
hr->ciabr = vcpu->arch.ciabr;
|
||||
hr->purr = vcpu->arch.purr;
|
||||
hr->spurr = vcpu->arch.spurr;
|
||||
@ -49,6 +49,8 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
|
||||
hr->pidr = vcpu->arch.pid;
|
||||
hr->cfar = vcpu->arch.cfar;
|
||||
hr->ppr = vcpu->arch.ppr;
|
||||
hr->dawr1 = vcpu->arch.dawr1;
|
||||
hr->dawrx1 = vcpu->arch.dawrx1;
|
||||
}
|
||||
|
||||
static void byteswap_pt_regs(struct pt_regs *regs)
|
||||
@ -91,6 +93,8 @@ static void byteswap_hv_regs(struct hv_guest_state *hr)
|
||||
hr->pidr = swab64(hr->pidr);
|
||||
hr->cfar = swab64(hr->cfar);
|
||||
hr->ppr = swab64(hr->ppr);
|
||||
hr->dawr1 = swab64(hr->dawr1);
|
||||
hr->dawrx1 = swab64(hr->dawrx1);
|
||||
}
|
||||
|
||||
static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
|
||||
@ -138,6 +142,7 @@ static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
|
||||
|
||||
/* Don't let data address watchpoint match in hypervisor state */
|
||||
hr->dawrx0 &= ~DAWRX_HYP;
|
||||
hr->dawrx1 &= ~DAWRX_HYP;
|
||||
|
||||
/* Don't let completed instruction address breakpt match in HV state */
|
||||
if ((hr->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
|
||||
@ -151,8 +156,8 @@ static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
|
||||
vc->pcr = hr->pcr | PCR_MASK;
|
||||
vc->dpdes = hr->dpdes;
|
||||
vcpu->arch.hfscr = hr->hfscr;
|
||||
vcpu->arch.dawr = hr->dawr0;
|
||||
vcpu->arch.dawrx = hr->dawrx0;
|
||||
vcpu->arch.dawr0 = hr->dawr0;
|
||||
vcpu->arch.dawrx0 = hr->dawrx0;
|
||||
vcpu->arch.ciabr = hr->ciabr;
|
||||
vcpu->arch.purr = hr->purr;
|
||||
vcpu->arch.spurr = hr->spurr;
|
||||
@ -167,6 +172,8 @@ static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
|
||||
vcpu->arch.pid = hr->pidr;
|
||||
vcpu->arch.cfar = hr->cfar;
|
||||
vcpu->arch.ppr = hr->ppr;
|
||||
vcpu->arch.dawr1 = hr->dawr1;
|
||||
vcpu->arch.dawrx1 = hr->dawrx1;
|
||||
}
|
||||
|
||||
void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
|
||||
@ -215,12 +222,51 @@ static void kvmhv_nested_mmio_needed(struct kvm_vcpu *vcpu, u64 regs_ptr)
|
||||
}
|
||||
}
|
||||
|
||||
static int kvmhv_read_guest_state_and_regs(struct kvm_vcpu *vcpu,
|
||||
struct hv_guest_state *l2_hv,
|
||||
struct pt_regs *l2_regs,
|
||||
u64 hv_ptr, u64 regs_ptr)
|
||||
{
|
||||
int size;
|
||||
|
||||
if (kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv->version,
|
||||
sizeof(l2_hv->version)))
|
||||
return -1;
|
||||
|
||||
if (kvmppc_need_byteswap(vcpu))
|
||||
l2_hv->version = swab64(l2_hv->version);
|
||||
|
||||
size = hv_guest_state_size(l2_hv->version);
|
||||
if (size < 0)
|
||||
return -1;
|
||||
|
||||
return kvm_vcpu_read_guest(vcpu, hv_ptr, l2_hv, size) ||
|
||||
kvm_vcpu_read_guest(vcpu, regs_ptr, l2_regs,
|
||||
sizeof(struct pt_regs));
|
||||
}
|
||||
|
||||
static int kvmhv_write_guest_state_and_regs(struct kvm_vcpu *vcpu,
|
||||
struct hv_guest_state *l2_hv,
|
||||
struct pt_regs *l2_regs,
|
||||
u64 hv_ptr, u64 regs_ptr)
|
||||
{
|
||||
int size;
|
||||
|
||||
size = hv_guest_state_size(l2_hv->version);
|
||||
if (size < 0)
|
||||
return -1;
|
||||
|
||||
return kvm_vcpu_write_guest(vcpu, hv_ptr, l2_hv, size) ||
|
||||
kvm_vcpu_write_guest(vcpu, regs_ptr, l2_regs,
|
||||
sizeof(struct pt_regs));
|
||||
}
|
||||
|
||||
long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
long int err, r;
|
||||
struct kvm_nested_guest *l2;
|
||||
struct pt_regs l2_regs, saved_l1_regs;
|
||||
struct hv_guest_state l2_hv, saved_l1_hv;
|
||||
struct hv_guest_state l2_hv = {0}, saved_l1_hv;
|
||||
struct kvmppc_vcore *vc = vcpu->arch.vcore;
|
||||
u64 hv_ptr, regs_ptr;
|
||||
u64 hdec_exp;
|
||||
@ -235,17 +281,15 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
|
||||
hv_ptr = kvmppc_get_gpr(vcpu, 4);
|
||||
regs_ptr = kvmppc_get_gpr(vcpu, 5);
|
||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv,
|
||||
sizeof(struct hv_guest_state)) ||
|
||||
kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs,
|
||||
sizeof(struct pt_regs));
|
||||
err = kvmhv_read_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
|
||||
hv_ptr, regs_ptr);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
||||
if (err)
|
||||
return H_PARAMETER;
|
||||
|
||||
if (kvmppc_need_byteswap(vcpu))
|
||||
byteswap_hv_regs(&l2_hv);
|
||||
if (l2_hv.version != HV_GUEST_STATE_VERSION)
|
||||
if (l2_hv.version > HV_GUEST_STATE_VERSION)
|
||||
return H_P2;
|
||||
|
||||
if (kvmppc_need_byteswap(vcpu))
|
||||
@ -325,10 +369,8 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
|
||||
byteswap_pt_regs(&l2_regs);
|
||||
}
|
||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
err = kvm_vcpu_write_guest(vcpu, hv_ptr, &l2_hv,
|
||||
sizeof(struct hv_guest_state)) ||
|
||||
kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs,
|
||||
sizeof(struct pt_regs));
|
||||
err = kvmhv_write_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
|
||||
hv_ptr, regs_ptr);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
||||
if (err)
|
||||
return H_AUTHORITY;
|
||||
|
@ -52,11 +52,13 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
|
||||
#define STACK_SLOT_PID (SFS-32)
|
||||
#define STACK_SLOT_IAMR (SFS-40)
|
||||
#define STACK_SLOT_CIABR (SFS-48)
|
||||
#define STACK_SLOT_DAWR (SFS-56)
|
||||
#define STACK_SLOT_DAWRX (SFS-64)
|
||||
#define STACK_SLOT_DAWR0 (SFS-56)
|
||||
#define STACK_SLOT_DAWRX0 (SFS-64)
|
||||
#define STACK_SLOT_HFSCR (SFS-72)
|
||||
#define STACK_SLOT_AMR (SFS-80)
|
||||
#define STACK_SLOT_UAMOR (SFS-88)
|
||||
#define STACK_SLOT_DAWR1 (SFS-96)
|
||||
#define STACK_SLOT_DAWRX1 (SFS-104)
|
||||
/* the following is used by the P9 short path */
|
||||
#define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */
|
||||
|
||||
@ -85,19 +87,6 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
|
||||
RFI_TO_KERNEL
|
||||
|
||||
kvmppc_call_hv_entry:
|
||||
BEGIN_FTR_SECTION
|
||||
/* On P9, do LPCR setting, if necessary */
|
||||
ld r3, HSTATE_SPLIT_MODE(r13)
|
||||
cmpdi r3, 0
|
||||
beq 46f
|
||||
lwz r4, KVM_SPLIT_DO_SET(r3)
|
||||
cmpwi r4, 0
|
||||
beq 46f
|
||||
bl kvmhv_p9_set_lpcr
|
||||
nop
|
||||
46:
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
|
||||
ld r4, HSTATE_KVM_VCPU(r13)
|
||||
bl kvmppc_hv_entry
|
||||
|
||||
@ -361,11 +350,11 @@ kvm_secondary_got_guest:
|
||||
LOAD_REG_ADDR(r6, decrementer_max)
|
||||
ld r6, 0(r6)
|
||||
mtspr SPRN_HDEC, r6
|
||||
BEGIN_FTR_SECTION
|
||||
/* and set per-LPAR registers, if doing dynamic micro-threading */
|
||||
ld r6, HSTATE_SPLIT_MODE(r13)
|
||||
cmpdi r6, 0
|
||||
beq 63f
|
||||
BEGIN_FTR_SECTION
|
||||
ld r0, KVM_SPLIT_RPR(r6)
|
||||
mtspr SPRN_RPR, r0
|
||||
ld r0, KVM_SPLIT_PMMAR(r6)
|
||||
@ -373,16 +362,7 @@ BEGIN_FTR_SECTION
|
||||
ld r0, KVM_SPLIT_LDBAR(r6)
|
||||
mtspr SPRN_LDBAR, r0
|
||||
isync
|
||||
FTR_SECTION_ELSE
|
||||
/* On P9 we use the split_info for coordinating LPCR changes */
|
||||
lwz r4, KVM_SPLIT_DO_SET(r6)
|
||||
cmpwi r4, 0
|
||||
beq 1f
|
||||
mr r3, r6
|
||||
bl kvmhv_p9_set_lpcr
|
||||
nop
|
||||
1:
|
||||
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
|
||||
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
|
||||
63:
|
||||
/* Order load of vcpu after load of vcore */
|
||||
lwsync
|
||||
@ -452,19 +432,15 @@ kvm_no_guest:
|
||||
mtcr r5
|
||||
blr
|
||||
|
||||
53: HMT_LOW
|
||||
53:
|
||||
BEGIN_FTR_SECTION
|
||||
HMT_LOW
|
||||
ld r5, HSTATE_KVM_VCORE(r13)
|
||||
cmpdi r5, 0
|
||||
bne 60f
|
||||
ld r3, HSTATE_SPLIT_MODE(r13)
|
||||
cmpdi r3, 0
|
||||
beq kvm_no_guest
|
||||
lwz r0, KVM_SPLIT_DO_SET(r3)
|
||||
cmpwi r0, 0
|
||||
bne kvmhv_do_set
|
||||
lwz r0, KVM_SPLIT_DO_RESTORE(r3)
|
||||
cmpwi r0, 0
|
||||
bne kvmhv_do_restore
|
||||
lbz r0, KVM_SPLIT_DO_NAP(r3)
|
||||
cmpwi r0, 0
|
||||
beq kvm_no_guest
|
||||
@ -472,24 +448,19 @@ kvm_no_guest:
|
||||
b kvm_unsplit_nap
|
||||
60: HMT_MEDIUM
|
||||
b kvm_secondary_got_guest
|
||||
FTR_SECTION_ELSE
|
||||
HMT_LOW
|
||||
ld r5, HSTATE_KVM_VCORE(r13)
|
||||
cmpdi r5, 0
|
||||
beq kvm_no_guest
|
||||
HMT_MEDIUM
|
||||
b kvm_secondary_got_guest
|
||||
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
|
||||
|
||||
54: li r0, KVM_HWTHREAD_IN_KVM
|
||||
stb r0, HSTATE_HWTHREAD_STATE(r13)
|
||||
b kvm_no_guest
|
||||
|
||||
kvmhv_do_set:
|
||||
/* Set LPCR, LPIDR etc. on P9 */
|
||||
HMT_MEDIUM
|
||||
bl kvmhv_p9_set_lpcr
|
||||
nop
|
||||
b kvm_no_guest
|
||||
|
||||
kvmhv_do_restore:
|
||||
HMT_MEDIUM
|
||||
bl kvmhv_p9_restore_lpcr
|
||||
nop
|
||||
b kvm_no_guest
|
||||
|
||||
/*
|
||||
* Here the primary thread is trying to return the core to
|
||||
* whole-core mode, so we need to nap.
|
||||
@ -527,7 +498,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
||||
/* Set kvm_split_mode.napped[tid] = 1 */
|
||||
ld r3, HSTATE_SPLIT_MODE(r13)
|
||||
li r0, 1
|
||||
lbz r4, HSTATE_TID(r13)
|
||||
lhz r4, PACAPACAINDEX(r13)
|
||||
clrldi r4, r4, 61 /* micro-threading => P8 => 8 threads/core */
|
||||
addi r4, r4, KVM_SPLIT_NAPPED
|
||||
stbx r0, r3, r4
|
||||
/* Check the do_nap flag again after setting napped[] */
|
||||
@ -711,10 +683,16 @@ BEGIN_FTR_SECTION
|
||||
mfspr r7, SPRN_DAWRX0
|
||||
mfspr r8, SPRN_IAMR
|
||||
std r5, STACK_SLOT_CIABR(r1)
|
||||
std r6, STACK_SLOT_DAWR(r1)
|
||||
std r7, STACK_SLOT_DAWRX(r1)
|
||||
std r6, STACK_SLOT_DAWR0(r1)
|
||||
std r7, STACK_SLOT_DAWRX0(r1)
|
||||
std r8, STACK_SLOT_IAMR(r1)
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
||||
BEGIN_FTR_SECTION
|
||||
mfspr r6, SPRN_DAWR1
|
||||
mfspr r7, SPRN_DAWRX1
|
||||
std r6, STACK_SLOT_DAWR1(r1)
|
||||
std r7, STACK_SLOT_DAWRX1(r1)
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1)
|
||||
|
||||
mfspr r5, SPRN_AMR
|
||||
std r5, STACK_SLOT_AMR(r1)
|
||||
@ -801,10 +779,16 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
|
||||
lbz r5, 0(r5)
|
||||
cmpdi r5, 0
|
||||
beq 1f
|
||||
ld r5, VCPU_DAWR(r4)
|
||||
ld r6, VCPU_DAWRX(r4)
|
||||
ld r5, VCPU_DAWR0(r4)
|
||||
ld r6, VCPU_DAWRX0(r4)
|
||||
mtspr SPRN_DAWR0, r5
|
||||
mtspr SPRN_DAWRX0, r6
|
||||
BEGIN_FTR_SECTION
|
||||
ld r5, VCPU_DAWR1(r4)
|
||||
ld r6, VCPU_DAWRX1(r4)
|
||||
mtspr SPRN_DAWR1, r5
|
||||
mtspr SPRN_DAWRX1, r6
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
|
||||
1:
|
||||
ld r7, VCPU_CIABR(r4)
|
||||
ld r8, VCPU_TAR(r4)
|
||||
@ -918,15 +902,19 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
|
||||
cmpdi r3, 512 /* 1 microsecond */
|
||||
blt hdec_soon
|
||||
|
||||
/* For hash guest, clear out and reload the SLB */
|
||||
ld r6, VCPU_KVM(r4)
|
||||
lbz r0, KVM_RADIX(r6)
|
||||
cmpwi r0, 0
|
||||
bne 9f
|
||||
|
||||
/* For hash guest, clear out and reload the SLB */
|
||||
BEGIN_MMU_FTR_SECTION
|
||||
/* Radix host won't have populated the SLB, so no need to clear */
|
||||
li r6, 0
|
||||
slbmte r6, r6
|
||||
slbia
|
||||
PPC_SLBIA(6)
|
||||
ptesync
|
||||
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
|
||||
|
||||
/* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
|
||||
lwz r5,VCPU_SLB_MAX(r4)
|
||||
@ -1187,6 +1175,20 @@ EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9)
|
||||
mr r4, r3
|
||||
b fast_guest_entry_c
|
||||
guest_exit_short_path:
|
||||
/*
|
||||
* Malicious or buggy radix guests may have inserted SLB entries
|
||||
* (only 0..3 because radix always runs with UPRT=1), so these must
|
||||
* be cleared here to avoid side-channels. slbmte is used rather
|
||||
* than slbia, as it won't clear cached translations.
|
||||
*/
|
||||
li r0,0
|
||||
slbmte r0,r0
|
||||
li r4,1
|
||||
slbmte r0,r4
|
||||
li r4,2
|
||||
slbmte r0,r4
|
||||
li r4,3
|
||||
slbmte r0,r4
|
||||
|
||||
li r0, KVM_GUEST_MODE_NONE
|
||||
stb r0, HSTATE_IN_GUEST(r13)
|
||||
@ -1499,7 +1501,7 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
|
||||
lbz r0, KVM_RADIX(r5)
|
||||
li r5, 0
|
||||
cmpwi r0, 0
|
||||
bne 3f /* for radix, save 0 entries */
|
||||
bne 0f /* for radix, save 0 entries */
|
||||
lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
|
||||
mtctr r0
|
||||
li r6,0
|
||||
@ -1518,13 +1520,13 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
|
||||
/* Finally clear out the SLB */
|
||||
li r0,0
|
||||
slbmte r0,r0
|
||||
slbia
|
||||
PPC_SLBIA(6)
|
||||
ptesync
|
||||
3: stw r5,VCPU_SLB_MAX(r9)
|
||||
stw r5,VCPU_SLB_MAX(r9)
|
||||
|
||||
/* load host SLB entries */
|
||||
BEGIN_MMU_FTR_SECTION
|
||||
b 0f
|
||||
b guest_bypass
|
||||
END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
|
||||
ld r8,PACA_SLBSHADOWPTR(r13)
|
||||
|
||||
@ -1538,7 +1540,21 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
|
||||
slbmte r6,r5
|
||||
1: addi r8,r8,16
|
||||
.endr
|
||||
0:
|
||||
b guest_bypass
|
||||
|
||||
0: /*
|
||||
* Sanitise radix guest SLB, see guest_exit_short_path comment.
|
||||
* We clear vcpu->arch.slb_max to match earlier behaviour.
|
||||
*/
|
||||
li r0,0
|
||||
stw r0,VCPU_SLB_MAX(r9)
|
||||
slbmte r0,r0
|
||||
li r4,1
|
||||
slbmte r0,r4
|
||||
li r4,2
|
||||
slbmte r0,r4
|
||||
li r4,3
|
||||
slbmte r0,r4
|
||||
|
||||
guest_bypass:
|
||||
stw r12, STACK_SLOT_TRAP(r1)
|
||||
@ -1759,8 +1775,8 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
|
||||
/* Restore host values of some registers */
|
||||
BEGIN_FTR_SECTION
|
||||
ld r5, STACK_SLOT_CIABR(r1)
|
||||
ld r6, STACK_SLOT_DAWR(r1)
|
||||
ld r7, STACK_SLOT_DAWRX(r1)
|
||||
ld r6, STACK_SLOT_DAWR0(r1)
|
||||
ld r7, STACK_SLOT_DAWRX0(r1)
|
||||
mtspr SPRN_CIABR, r5
|
||||
/*
|
||||
* If the DAWR doesn't work, it's ok to write these here as
|
||||
@ -1769,6 +1785,12 @@ BEGIN_FTR_SECTION
|
||||
mtspr SPRN_DAWR0, r6
|
||||
mtspr SPRN_DAWRX0, r7
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
||||
BEGIN_FTR_SECTION
|
||||
ld r6, STACK_SLOT_DAWR1(r1)
|
||||
ld r7, STACK_SLOT_DAWRX1(r1)
|
||||
mtspr SPRN_DAWR1, r6
|
||||
mtspr SPRN_DAWRX1, r7
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1)
|
||||
BEGIN_FTR_SECTION
|
||||
ld r5, STACK_SLOT_TID(r1)
|
||||
ld r6, STACK_SLOT_PSSCR(r1)
|
||||
@ -1938,24 +1960,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
||||
19: lis r8,0x7fff /* MAX_INT@h */
|
||||
mtspr SPRN_HDEC,r8
|
||||
|
||||
16:
|
||||
BEGIN_FTR_SECTION
|
||||
/* On POWER9 with HPT-on-radix we need to wait for all other threads */
|
||||
ld r3, HSTATE_SPLIT_MODE(r13)
|
||||
cmpdi r3, 0
|
||||
beq 47f
|
||||
lwz r8, KVM_SPLIT_DO_RESTORE(r3)
|
||||
cmpwi r8, 0
|
||||
beq 47f
|
||||
bl kvmhv_p9_restore_lpcr
|
||||
nop
|
||||
b 48f
|
||||
47:
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
ld r8,KVM_HOST_LPCR(r4)
|
||||
16: ld r8,KVM_HOST_LPCR(r4)
|
||||
mtspr SPRN_LPCR,r8
|
||||
isync
|
||||
48:
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
|
||||
/* Finish timing, if we have a vcpu */
|
||||
ld r4, HSTATE_KVM_VCPU(r13)
|
||||
@ -2574,8 +2582,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
||||
rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW
|
||||
rlwimi r5, r4, 2, DAWRX_WT
|
||||
clrrdi r4, r4, 3
|
||||
std r4, VCPU_DAWR(r3)
|
||||
std r5, VCPU_DAWRX(r3)
|
||||
std r4, VCPU_DAWR0(r3)
|
||||
std r5, VCPU_DAWRX0(r3)
|
||||
/*
|
||||
* If came in through the real mode hcall handler then it is necessary
|
||||
* to write the registers since the return path won't. Otherwise it is
|
||||
@ -2779,8 +2787,10 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
|
||||
beq kvm_end_cede
|
||||
cmpwi r0, NAPPING_NOVCPU
|
||||
beq kvm_novcpu_wakeup
|
||||
BEGIN_FTR_SECTION
|
||||
cmpwi r0, NAPPING_UNSPLIT
|
||||
beq kvm_unsplit_wakeup
|
||||
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
|
||||
twi 31,0,0 /* Nap state must not be zero */
|
||||
|
||||
33: mr r4, r3
|
||||
@ -3343,13 +3353,18 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
|
||||
mtspr SPRN_IAMR, r0
|
||||
mtspr SPRN_CIABR, r0
|
||||
mtspr SPRN_DAWRX0, r0
|
||||
BEGIN_FTR_SECTION
|
||||
mtspr SPRN_DAWRX1, r0
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
|
||||
|
||||
/* Clear hash and radix guest SLB, see guest_exit_short_path comment. */
|
||||
slbmte r0, r0
|
||||
PPC_SLBIA(6)
|
||||
|
||||
BEGIN_MMU_FTR_SECTION
|
||||
b 4f
|
||||
END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
|
||||
|
||||
slbmte r0, r0
|
||||
slbia
|
||||
ptesync
|
||||
ld r8, PACA_SLBSHADOWPTR(r13)
|
||||
.rept SLB_NUM_BOLTED
|
||||
|
@ -698,7 +698,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
|
||||
|
||||
kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
|
||||
r = 1;
|
||||
};
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -611,8 +611,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
r = !!(hv_enabled && radix_enabled());
|
||||
break;
|
||||
case KVM_CAP_PPC_MMU_HASH_V3:
|
||||
r = !!(hv_enabled && cpu_has_feature(CPU_FTR_ARCH_300) &&
|
||||
cpu_has_feature(CPU_FTR_HVMODE));
|
||||
r = !!(hv_enabled && kvmppc_hv_ops->hash_v3_possible &&
|
||||
kvmppc_hv_ops->hash_v3_possible());
|
||||
break;
|
||||
case KVM_CAP_PPC_NESTED_HV:
|
||||
r = !!(hv_enabled && kvmppc_hv_ops->enable_nested &&
|
||||
@ -678,6 +678,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
r = hv_enabled && kvmppc_hv_ops->enable_svm &&
|
||||
!kvmppc_hv_ops->enable_svm(NULL);
|
||||
break;
|
||||
case KVM_CAP_PPC_DAWR1:
|
||||
r = !!(hv_enabled && kvmppc_hv_ops->enable_dawr1 &&
|
||||
!kvmppc_hv_ops->enable_dawr1(NULL));
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
r = 0;
|
||||
@ -2187,6 +2191,12 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
||||
break;
|
||||
r = kvm->arch.kvm_ops->enable_svm(kvm);
|
||||
break;
|
||||
case KVM_CAP_PPC_DAWR1:
|
||||
r = -EINVAL;
|
||||
if (!is_kvmppc_hv_enabled(kvm) || !kvm->arch.kvm_ops->enable_dawr1)
|
||||
break;
|
||||
r = kvm->arch.kvm_ops->enable_dawr1(kvm);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
r = -EINVAL;
|
||||
|
@ -28,7 +28,6 @@
|
||||
#define KVM_S390_BSCA_CPU_SLOTS 64
|
||||
#define KVM_S390_ESCA_CPU_SLOTS 248
|
||||
#define KVM_MAX_VCPUS 255
|
||||
#define KVM_USER_MEM_SLOTS 32
|
||||
|
||||
/*
|
||||
* These seem to be used for allocating ->chip in the routing table, which we
|
||||
|
@ -170,7 +170,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
|
||||
if (!(vma->vm_flags & VM_WRITE))
|
||||
goto out_unlock_mmap;
|
||||
|
||||
ret = follow_pte(vma->vm_mm, mmio_addr, NULL, &ptep, NULL, &ptl);
|
||||
ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
|
||||
if (ret)
|
||||
goto out_unlock_mmap;
|
||||
|
||||
@ -311,7 +311,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
|
||||
if (!(vma->vm_flags & VM_WRITE))
|
||||
goto out_unlock_mmap;
|
||||
|
||||
ret = follow_pte(vma->vm_mm, mmio_addr, NULL, &ptep, NULL, &ptl);
|
||||
ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
|
||||
if (ret)
|
||||
goto out_unlock_mmap;
|
||||
|
||||
|
@ -11,8 +11,8 @@
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/barrier.h>
|
||||
#include <asm/qrwlock.h>
|
||||
#include <asm/qspinlock.h>
|
||||
#include <asm/qrwlock.h>
|
||||
|
||||
#endif /* !(__ASSEMBLY__) */
|
||||
|
||||
|
@ -292,6 +292,7 @@
|
||||
#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
|
||||
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
|
||||
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
|
||||
|
||||
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
|
||||
@ -335,6 +336,7 @@
|
||||
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
|
||||
#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
|
||||
#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
|
||||
#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
|
||||
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
|
||||
|
127
arch/x86/include/asm/kvm-x86-ops.h
Normal file
127
arch/x86/include/asm/kvm-x86-ops.h
Normal file
@ -0,0 +1,127 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#if !defined(KVM_X86_OP) || !defined(KVM_X86_OP_NULL)
|
||||
BUILD_BUG_ON(1)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* KVM_X86_OP() and KVM_X86_OP_NULL() are used to help generate
|
||||
* "static_call()"s. They are also intended for use when defining
|
||||
* the vmx/svm kvm_x86_ops. KVM_X86_OP() can be used for those
|
||||
* functions that follow the [svm|vmx]_func_name convention.
|
||||
* KVM_X86_OP_NULL() can leave a NULL definition for the
|
||||
* case where there is no definition or a function name that
|
||||
* doesn't match the typical naming convention is supplied.
|
||||
*/
|
||||
KVM_X86_OP_NULL(hardware_enable)
|
||||
KVM_X86_OP_NULL(hardware_disable)
|
||||
KVM_X86_OP_NULL(hardware_unsetup)
|
||||
KVM_X86_OP_NULL(cpu_has_accelerated_tpr)
|
||||
KVM_X86_OP(has_emulated_msr)
|
||||
KVM_X86_OP(vcpu_after_set_cpuid)
|
||||
KVM_X86_OP(vm_init)
|
||||
KVM_X86_OP_NULL(vm_destroy)
|
||||
KVM_X86_OP(vcpu_create)
|
||||
KVM_X86_OP(vcpu_free)
|
||||
KVM_X86_OP(vcpu_reset)
|
||||
KVM_X86_OP(prepare_guest_switch)
|
||||
KVM_X86_OP(vcpu_load)
|
||||
KVM_X86_OP(vcpu_put)
|
||||
KVM_X86_OP(update_exception_bitmap)
|
||||
KVM_X86_OP(get_msr)
|
||||
KVM_X86_OP(set_msr)
|
||||
KVM_X86_OP(get_segment_base)
|
||||
KVM_X86_OP(get_segment)
|
||||
KVM_X86_OP(get_cpl)
|
||||
KVM_X86_OP(set_segment)
|
||||
KVM_X86_OP_NULL(get_cs_db_l_bits)
|
||||
KVM_X86_OP(set_cr0)
|
||||
KVM_X86_OP(is_valid_cr4)
|
||||
KVM_X86_OP(set_cr4)
|
||||
KVM_X86_OP(set_efer)
|
||||
KVM_X86_OP(get_idt)
|
||||
KVM_X86_OP(set_idt)
|
||||
KVM_X86_OP(get_gdt)
|
||||
KVM_X86_OP(set_gdt)
|
||||
KVM_X86_OP(sync_dirty_debug_regs)
|
||||
KVM_X86_OP(set_dr7)
|
||||
KVM_X86_OP(cache_reg)
|
||||
KVM_X86_OP(get_rflags)
|
||||
KVM_X86_OP(set_rflags)
|
||||
KVM_X86_OP(tlb_flush_all)
|
||||
KVM_X86_OP(tlb_flush_current)
|
||||
KVM_X86_OP_NULL(tlb_remote_flush)
|
||||
KVM_X86_OP_NULL(tlb_remote_flush_with_range)
|
||||
KVM_X86_OP(tlb_flush_gva)
|
||||
KVM_X86_OP(tlb_flush_guest)
|
||||
KVM_X86_OP(run)
|
||||
KVM_X86_OP_NULL(handle_exit)
|
||||
KVM_X86_OP_NULL(skip_emulated_instruction)
|
||||
KVM_X86_OP_NULL(update_emulated_instruction)
|
||||
KVM_X86_OP(set_interrupt_shadow)
|
||||
KVM_X86_OP(get_interrupt_shadow)
|
||||
KVM_X86_OP(patch_hypercall)
|
||||
KVM_X86_OP(set_irq)
|
||||
KVM_X86_OP(set_nmi)
|
||||
KVM_X86_OP(queue_exception)
|
||||
KVM_X86_OP(cancel_injection)
|
||||
KVM_X86_OP(interrupt_allowed)
|
||||
KVM_X86_OP(nmi_allowed)
|
||||
KVM_X86_OP(get_nmi_mask)
|
||||
KVM_X86_OP(set_nmi_mask)
|
||||
KVM_X86_OP(enable_nmi_window)
|
||||
KVM_X86_OP(enable_irq_window)
|
||||
KVM_X86_OP(update_cr8_intercept)
|
||||
KVM_X86_OP(check_apicv_inhibit_reasons)
|
||||
KVM_X86_OP_NULL(pre_update_apicv_exec_ctrl)
|
||||
KVM_X86_OP(refresh_apicv_exec_ctrl)
|
||||
KVM_X86_OP(hwapic_irr_update)
|
||||
KVM_X86_OP(hwapic_isr_update)
|
||||
KVM_X86_OP_NULL(guest_apic_has_interrupt)
|
||||
KVM_X86_OP(load_eoi_exitmap)
|
||||
KVM_X86_OP(set_virtual_apic_mode)
|
||||
KVM_X86_OP_NULL(set_apic_access_page_addr)
|
||||
KVM_X86_OP(deliver_posted_interrupt)
|
||||
KVM_X86_OP_NULL(sync_pir_to_irr)
|
||||
KVM_X86_OP(set_tss_addr)
|
||||
KVM_X86_OP(set_identity_map_addr)
|
||||
KVM_X86_OP(get_mt_mask)
|
||||
KVM_X86_OP(load_mmu_pgd)
|
||||
KVM_X86_OP_NULL(has_wbinvd_exit)
|
||||
KVM_X86_OP(write_l1_tsc_offset)
|
||||
KVM_X86_OP(get_exit_info)
|
||||
KVM_X86_OP(check_intercept)
|
||||
KVM_X86_OP(handle_exit_irqoff)
|
||||
KVM_X86_OP_NULL(request_immediate_exit)
|
||||
KVM_X86_OP(sched_in)
|
||||
KVM_X86_OP_NULL(slot_enable_log_dirty)
|
||||
KVM_X86_OP_NULL(slot_disable_log_dirty)
|
||||
KVM_X86_OP_NULL(flush_log_dirty)
|
||||
KVM_X86_OP_NULL(enable_log_dirty_pt_masked)
|
||||
KVM_X86_OP_NULL(cpu_dirty_log_size)
|
||||
KVM_X86_OP_NULL(pre_block)
|
||||
KVM_X86_OP_NULL(post_block)
|
||||
KVM_X86_OP_NULL(vcpu_blocking)
|
||||
KVM_X86_OP_NULL(vcpu_unblocking)
|
||||
KVM_X86_OP_NULL(update_pi_irte)
|
||||
KVM_X86_OP_NULL(apicv_post_state_restore)
|
||||
KVM_X86_OP_NULL(dy_apicv_has_pending_interrupt)
|
||||
KVM_X86_OP_NULL(set_hv_timer)
|
||||
KVM_X86_OP_NULL(cancel_hv_timer)
|
||||
KVM_X86_OP(setup_mce)
|
||||
KVM_X86_OP(smi_allowed)
|
||||
KVM_X86_OP(pre_enter_smm)
|
||||
KVM_X86_OP(pre_leave_smm)
|
||||
KVM_X86_OP(enable_smi_window)
|
||||
KVM_X86_OP_NULL(mem_enc_op)
|
||||
KVM_X86_OP_NULL(mem_enc_reg_region)
|
||||
KVM_X86_OP_NULL(mem_enc_unreg_region)
|
||||
KVM_X86_OP(get_msr_feature)
|
||||
KVM_X86_OP(can_emulate_instruction)
|
||||
KVM_X86_OP(apic_init_signal_blocked)
|
||||
KVM_X86_OP_NULL(enable_direct_tlbflush)
|
||||
KVM_X86_OP_NULL(migrate_timers)
|
||||
KVM_X86_OP(msr_filter_changed)
|
||||
KVM_X86_OP_NULL(complete_emulated_msr)
|
||||
|
||||
#undef KVM_X86_OP
|
||||
#undef KVM_X86_OP_NULL
|
@ -40,10 +40,8 @@
|
||||
#define KVM_MAX_VCPUS 288
|
||||
#define KVM_SOFT_MAX_VCPUS 240
|
||||
#define KVM_MAX_VCPU_ID 1023
|
||||
#define KVM_USER_MEM_SLOTS 509
|
||||
/* memory slots that are not exposed to userspace */
|
||||
#define KVM_PRIVATE_MEM_SLOTS 3
|
||||
#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
|
||||
|
||||
#define KVM_HALT_POLL_NS_DEFAULT 200000
|
||||
|
||||
@ -52,6 +50,9 @@
|
||||
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
|
||||
KVM_DIRTY_LOG_INITIALLY_SET)
|
||||
|
||||
#define KVM_BUS_LOCK_DETECTION_VALID_MODE (KVM_BUS_LOCK_DETECTION_OFF | \
|
||||
KVM_BUS_LOCK_DETECTION_EXIT)
|
||||
|
||||
/* x86-specific vcpu->requests bit members */
|
||||
#define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0)
|
||||
#define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1)
|
||||
@ -200,9 +201,17 @@ enum x86_intercept_stage;
|
||||
#define DR6_BS (1 << 14)
|
||||
#define DR6_BT (1 << 15)
|
||||
#define DR6_RTM (1 << 16)
|
||||
#define DR6_FIXED_1 0xfffe0ff0
|
||||
#define DR6_INIT 0xffff0ff0
|
||||
/*
|
||||
* DR6_ACTIVE_LOW combines fixed-1 and active-low bits.
|
||||
* We can regard all the bits in DR6_FIXED_1 as active_low bits;
|
||||
* they will never be 0 for now, but when they are defined
|
||||
* in the future it will require no code change.
|
||||
*
|
||||
* DR6_ACTIVE_LOW is also used as the init/reset value for DR6.
|
||||
*/
|
||||
#define DR6_ACTIVE_LOW 0xffff0ff0
|
||||
#define DR6_VOLATILE 0x0001e00f
|
||||
#define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE)
|
||||
|
||||
#define DR7_BP_EN_MASK 0x000000ff
|
||||
#define DR7_GE (1 << 9)
|
||||
@ -337,6 +346,8 @@ struct kvm_mmu_root_info {
|
||||
|
||||
#define KVM_MMU_NUM_PREV_ROOTS 3
|
||||
|
||||
#define KVM_HAVE_MMU_RWLOCK
|
||||
|
||||
struct kvm_mmu_page;
|
||||
|
||||
/*
|
||||
@ -358,8 +369,6 @@ struct kvm_mmu {
|
||||
int (*sync_page)(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu_page *sp);
|
||||
void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa);
|
||||
void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||
u64 *spte, const void *pte);
|
||||
hpa_t root_hpa;
|
||||
gpa_t root_pgd;
|
||||
union kvm_mmu_role mmu_role;
|
||||
@ -510,6 +519,7 @@ struct kvm_vcpu_hv_synic {
|
||||
|
||||
/* Hyper-V per vcpu emulation context */
|
||||
struct kvm_vcpu_hv {
|
||||
struct kvm_vcpu *vcpu;
|
||||
u32 vp_index;
|
||||
u64 hv_vapic;
|
||||
s64 runtime_offset;
|
||||
@ -520,6 +530,15 @@ struct kvm_vcpu_hv {
|
||||
cpumask_t tlb_flush;
|
||||
};
|
||||
|
||||
/* Xen HVM per vcpu emulation context */
|
||||
struct kvm_vcpu_xen {
|
||||
u64 hypercall_rip;
|
||||
bool vcpu_info_set;
|
||||
bool vcpu_time_info_set;
|
||||
struct gfn_to_hva_cache vcpu_info_cache;
|
||||
struct gfn_to_hva_cache vcpu_time_info_cache;
|
||||
};
|
||||
|
||||
struct kvm_vcpu_arch {
|
||||
/*
|
||||
* rip and regs accesses must go through
|
||||
@ -640,7 +659,7 @@ struct kvm_vcpu_arch {
|
||||
int cpuid_nent;
|
||||
struct kvm_cpuid_entry2 *cpuid_entries;
|
||||
|
||||
unsigned long cr3_lm_rsvd_bits;
|
||||
u64 reserved_gpa_bits;
|
||||
int maxphyaddr;
|
||||
int max_tdp_level;
|
||||
|
||||
@ -717,7 +736,9 @@ struct kvm_vcpu_arch {
|
||||
/* used for guest single stepping over the given code position */
|
||||
unsigned long singlestep_rip;
|
||||
|
||||
struct kvm_vcpu_hv hyperv;
|
||||
bool hyperv_enabled;
|
||||
struct kvm_vcpu_hv *hyperv;
|
||||
struct kvm_vcpu_xen xen;
|
||||
|
||||
cpumask_var_t wbinvd_dirty_mask;
|
||||
|
||||
@ -888,6 +909,14 @@ struct msr_bitmap_range {
|
||||
unsigned long *bitmap;
|
||||
};
|
||||
|
||||
/* Xen emulation context */
|
||||
struct kvm_xen {
|
||||
bool long_mode;
|
||||
bool shinfo_set;
|
||||
u8 upcall_vector;
|
||||
struct gfn_to_hva_cache shinfo_cache;
|
||||
};
|
||||
|
||||
enum kvm_irqchip_mode {
|
||||
KVM_IRQCHIP_NONE,
|
||||
KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */
|
||||
@ -967,6 +996,7 @@ struct kvm_arch {
|
||||
struct hlist_head mask_notifier_list;
|
||||
|
||||
struct kvm_hv hyperv;
|
||||
struct kvm_xen xen;
|
||||
|
||||
#ifdef CONFIG_KVM_MMU_AUDIT
|
||||
int audit_point;
|
||||
@ -998,9 +1028,12 @@ struct kvm_arch {
|
||||
struct msr_bitmap_range ranges[16];
|
||||
} msr_filter;
|
||||
|
||||
bool bus_lock_detection_enabled;
|
||||
|
||||
struct kvm_pmu_event_filter *pmu_event_filter;
|
||||
struct task_struct *nx_lpage_recovery_thread;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Whether the TDP MMU is enabled for this VM. This contains a
|
||||
* snapshot of the TDP MMU module parameter from when the VM was
|
||||
@ -1026,12 +1059,25 @@ struct kvm_arch {
|
||||
* tdp_mmu_page set and a root_count of 0.
|
||||
*/
|
||||
struct list_head tdp_mmu_pages;
|
||||
|
||||
/*
|
||||
* Protects accesses to the following fields when the MMU lock
|
||||
* is held in read mode:
|
||||
* - tdp_mmu_pages (above)
|
||||
* - the link field of struct kvm_mmu_pages used by the TDP MMU
|
||||
* - lpage_disallowed_mmu_pages
|
||||
* - the lpage_disallowed_link field of struct kvm_mmu_pages used
|
||||
* by the TDP MMU
|
||||
* It is acceptable, but not necessary, to acquire this lock when
|
||||
* the thread holds the MMU lock in write mode.
|
||||
*/
|
||||
spinlock_t tdp_mmu_pages_lock;
|
||||
#endif /* CONFIG_X86_64 */
|
||||
};
|
||||
|
||||
struct kvm_vm_stat {
|
||||
ulong mmu_shadow_zapped;
|
||||
ulong mmu_pte_write;
|
||||
ulong mmu_pte_updated;
|
||||
ulong mmu_pde_zapped;
|
||||
ulong mmu_flooded;
|
||||
ulong mmu_recycled;
|
||||
@ -1340,6 +1386,19 @@ extern u64 __read_mostly host_efer;
|
||||
extern bool __read_mostly allow_smaller_maxphyaddr;
|
||||
extern struct kvm_x86_ops kvm_x86_ops;
|
||||
|
||||
#define KVM_X86_OP(func) \
|
||||
DECLARE_STATIC_CALL(kvm_x86_##func, *(((struct kvm_x86_ops *)0)->func));
|
||||
#define KVM_X86_OP_NULL KVM_X86_OP
|
||||
#include <asm/kvm-x86-ops.h>
|
||||
|
||||
static inline void kvm_ops_static_call_update(void)
|
||||
{
|
||||
#define KVM_X86_OP(func) \
|
||||
static_call_update(kvm_x86_##func, kvm_x86_ops.func);
|
||||
#define KVM_X86_OP_NULL KVM_X86_OP
|
||||
#include <asm/kvm-x86-ops.h>
|
||||
}
|
||||
|
||||
#define __KVM_HAVE_ARCH_VM_ALLOC
|
||||
static inline struct kvm *kvm_arch_alloc_vm(void)
|
||||
{
|
||||
@ -1351,7 +1410,7 @@ void kvm_arch_free_vm(struct kvm *kvm);
|
||||
static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
|
||||
{
|
||||
if (kvm_x86_ops.tlb_remote_flush &&
|
||||
!kvm_x86_ops.tlb_remote_flush(kvm))
|
||||
!static_call(kvm_x86_tlb_remote_flush)(kvm))
|
||||
return 0;
|
||||
else
|
||||
return -ENOTSUPP;
|
||||
@ -1421,6 +1480,8 @@ extern u8 kvm_tsc_scaling_ratio_frac_bits;
|
||||
extern u64 kvm_max_tsc_scaling_ratio;
|
||||
/* 1ull << kvm_tsc_scaling_ratio_frac_bits */
|
||||
extern u64 kvm_default_tsc_scaling_ratio;
|
||||
/* bus lock detection supported? */
|
||||
extern bool kvm_has_bus_lock_exit;
|
||||
|
||||
extern u64 kvm_mce_cap_supported;
|
||||
|
||||
@ -1501,7 +1562,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
|
||||
int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
|
||||
int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8);
|
||||
int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val);
|
||||
int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val);
|
||||
void kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val);
|
||||
unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
|
||||
void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
|
||||
void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
|
||||
@ -1742,14 +1803,12 @@ static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
|
||||
|
||||
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_x86_ops.vcpu_blocking)
|
||||
kvm_x86_ops.vcpu_blocking(vcpu);
|
||||
static_call_cond(kvm_x86_vcpu_blocking)(vcpu);
|
||||
}
|
||||
|
||||
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_x86_ops.vcpu_unblocking)
|
||||
kvm_x86_ops.vcpu_unblocking(vcpu);
|
||||
static_call_cond(kvm_x86_vcpu_unblocking)(vcpu);
|
||||
}
|
||||
|
||||
static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
|
||||
|
@ -30,16 +30,29 @@ static inline int cpu_has_vmx(void)
|
||||
}
|
||||
|
||||
|
||||
/** Disable VMX on the current CPU
|
||||
/**
|
||||
* cpu_vmxoff() - Disable VMX on the current CPU
|
||||
*
|
||||
* vmxoff causes a undefined-opcode exception if vmxon was not run
|
||||
* on the CPU previously. Only call this function if you know VMX
|
||||
* is enabled.
|
||||
* Disable VMX and clear CR4.VMXE (even if VMXOFF faults)
|
||||
*
|
||||
* Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to
|
||||
* atomically track post-VMXON state, e.g. this may be called in NMI context.
|
||||
* Eat all faults as all other faults on VMXOFF faults are mode related, i.e.
|
||||
* faults are guaranteed to be due to the !post-VMXON check unless the CPU is
|
||||
* magically in RM, VM86, compat mode, or at CPL>0.
|
||||
*/
|
||||
static inline void cpu_vmxoff(void)
|
||||
static inline int cpu_vmxoff(void)
|
||||
{
|
||||
asm volatile ("vmxoff");
|
||||
asm_volatile_goto("1: vmxoff\n\t"
|
||||
_ASM_EXTABLE(1b, %l[fault])
|
||||
::: "cc", "memory" : fault);
|
||||
|
||||
cr4_clear_bits(X86_CR4_VMXE);
|
||||
return 0;
|
||||
|
||||
fault:
|
||||
cr4_clear_bits(X86_CR4_VMXE);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static inline int cpu_vmx_enabled(void)
|
||||
|
@ -73,6 +73,7 @@
|
||||
#define SECONDARY_EXEC_PT_USE_GPA VMCS_CONTROL_BIT(PT_USE_GPA)
|
||||
#define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING)
|
||||
#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE VMCS_CONTROL_BIT(USR_WAIT_PAUSE)
|
||||
#define SECONDARY_EXEC_BUS_LOCK_DETECTION VMCS_CONTROL_BIT(BUS_LOCK_DETECTION)
|
||||
|
||||
#define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING)
|
||||
#define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING)
|
||||
|
@ -83,5 +83,6 @@
|
||||
#define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* Scale hardware TSC when read in guest */
|
||||
#define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* Enable TPAUSE, UMONITOR, UMWAIT in guest */
|
||||
#define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */
|
||||
#define VMX_FEATURE_BUS_LOCK_DETECTION ( 2*32+ 30) /* "" VM-Exit when bus lock caused */
|
||||
|
||||
#endif /* _ASM_X86_VMXFEATURES_H */
|
||||
|
@ -182,6 +182,9 @@ struct arch_shared_info {
|
||||
unsigned long p2m_cr3; /* cr3 value of the p2m address space */
|
||||
unsigned long p2m_vaddr; /* virtual address of the p2m list */
|
||||
unsigned long p2m_generation; /* generation count of p2m mapping */
|
||||
#ifdef CONFIG_X86_32
|
||||
uint32_t wc_sec_hi;
|
||||
#endif
|
||||
};
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
|
@ -112,6 +112,7 @@ struct kvm_ioapic_state {
|
||||
#define KVM_NR_IRQCHIPS 3
|
||||
|
||||
#define KVM_RUN_X86_SMM (1 << 0)
|
||||
#define KVM_RUN_X86_BUS_LOCK (1 << 1)
|
||||
|
||||
/* for KVM_GET_REGS and KVM_SET_REGS */
|
||||
struct kvm_regs {
|
||||
|
@ -89,6 +89,7 @@
|
||||
#define EXIT_REASON_XRSTORS 64
|
||||
#define EXIT_REASON_UMWAIT 67
|
||||
#define EXIT_REASON_TPAUSE 68
|
||||
#define EXIT_REASON_BUS_LOCK 74
|
||||
|
||||
#define VMX_EXIT_REASONS \
|
||||
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
|
||||
@ -150,7 +151,8 @@
|
||||
{ EXIT_REASON_XSAVES, "XSAVES" }, \
|
||||
{ EXIT_REASON_XRSTORS, "XRSTORS" }, \
|
||||
{ EXIT_REASON_UMWAIT, "UMWAIT" }, \
|
||||
{ EXIT_REASON_TPAUSE, "TPAUSE" }
|
||||
{ EXIT_REASON_TPAUSE, "TPAUSE" }, \
|
||||
{ EXIT_REASON_BUS_LOCK, "BUS_LOCK" }
|
||||
|
||||
#define VMX_EXIT_REASON_FLAGS \
|
||||
{ VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" }
|
||||
|
@ -1747,6 +1747,7 @@ void apic_ap_setup(void)
|
||||
|
||||
#ifdef CONFIG_X86_X2APIC
|
||||
int x2apic_mode;
|
||||
EXPORT_SYMBOL_GPL(x2apic_mode);
|
||||
|
||||
enum {
|
||||
X2APIC_OFF,
|
||||
|
@ -547,31 +547,21 @@ static void emergency_vmx_disable_all(void)
|
||||
local_irq_disable();
|
||||
|
||||
/*
|
||||
* We need to disable VMX on all CPUs before rebooting, otherwise
|
||||
* we risk hanging up the machine, because the CPU ignores INIT
|
||||
* signals when VMX is enabled.
|
||||
* Disable VMX on all CPUs before rebooting, otherwise we risk hanging
|
||||
* the machine, because the CPU blocks INIT when it's in VMX root.
|
||||
*
|
||||
* We can't take any locks and we may be on an inconsistent
|
||||
* state, so we use NMIs as IPIs to tell the other CPUs to disable
|
||||
* VMX and halt.
|
||||
* We can't take any locks and we may be on an inconsistent state, so
|
||||
* use NMIs as IPIs to tell the other CPUs to exit VMX root and halt.
|
||||
*
|
||||
* For safety, we will avoid running the nmi_shootdown_cpus()
|
||||
* stuff unnecessarily, but we don't have a way to check
|
||||
* if other CPUs have VMX enabled. So we will call it only if the
|
||||
* CPU we are running on has VMX enabled.
|
||||
*
|
||||
* We will miss cases where VMX is not enabled on all CPUs. This
|
||||
* shouldn't do much harm because KVM always enable VMX on all
|
||||
* CPUs anyway. But we can miss it on the small window where KVM
|
||||
* is still enabling VMX.
|
||||
* Do the NMI shootdown even if VMX if off on _this_ CPU, as that
|
||||
* doesn't prevent a different CPU from being in VMX root operation.
|
||||
*/
|
||||
if (cpu_has_vmx() && cpu_vmx_enabled()) {
|
||||
/* Disable VMX on this CPU. */
|
||||
cpu_vmxoff();
|
||||
if (cpu_has_vmx()) {
|
||||
/* Safely force _this_ CPU out of VMX root operation. */
|
||||
__cpu_emergency_vmxoff();
|
||||
|
||||
/* Halt and disable VMX on the other CPUs */
|
||||
/* Halt and exit VMX root operation on the other CPUs. */
|
||||
nmi_shootdown_cpus(vmxoff_nmi);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -14,10 +14,11 @@ kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
|
||||
$(KVM)/dirty_ring.o
|
||||
kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
|
||||
|
||||
kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
|
||||
kvm-y += x86.o emulate.o i8259.o irq.o lapic.o xen.o \
|
||||
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
|
||||
hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
|
||||
mmu/spte.o mmu/tdp_iter.o mmu/tdp_mmu.o
|
||||
mmu/spte.o
|
||||
kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
|
||||
|
||||
kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
|
||||
vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
|
||||
|
@ -173,16 +173,22 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
||||
kvm_update_pv_runtime(vcpu);
|
||||
|
||||
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
|
||||
kvm_mmu_reset_context(vcpu);
|
||||
vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu);
|
||||
|
||||
kvm_pmu_refresh(vcpu);
|
||||
vcpu->arch.cr4_guest_rsvd_bits =
|
||||
__cr4_reserved_bits(guest_cpuid_has, vcpu);
|
||||
|
||||
vcpu->arch.cr3_lm_rsvd_bits = rsvd_bits(cpuid_maxphyaddr(vcpu), 63);
|
||||
kvm_hv_set_cpuid(vcpu);
|
||||
|
||||
/* Invoke the vendor callback only after the above state is updated. */
|
||||
kvm_x86_ops.vcpu_after_set_cpuid(vcpu);
|
||||
static_call(kvm_x86_vcpu_after_set_cpuid)(vcpu);
|
||||
|
||||
/*
|
||||
* Except for the MMU, which needs to be reset after any vendor
|
||||
* specific adjustments to the reserved GPA bits.
|
||||
*/
|
||||
kvm_mmu_reset_context(vcpu);
|
||||
}
|
||||
|
||||
static int is_efer_nx(void)
|
||||
@ -223,6 +229,16 @@ not_found:
|
||||
return 36;
|
||||
}
|
||||
|
||||
/*
|
||||
* This "raw" version returns the reserved GPA bits without any adjustments for
|
||||
* encryption technologies that usurp bits. The raw mask should be used if and
|
||||
* only if hardware does _not_ strip the usurped bits, e.g. in virtual MTRRs.
|
||||
*/
|
||||
u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return rsvd_bits(cpuid_maxphyaddr(vcpu), 63);
|
||||
}
|
||||
|
||||
/* when an old userspace process fills a new kernel module */
|
||||
int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
|
||||
struct kvm_cpuid *cpuid,
|
||||
@ -434,7 +450,7 @@ void kvm_set_cpu_caps(void)
|
||||
kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD);
|
||||
|
||||
kvm_cpu_cap_mask(CPUID_7_1_EAX,
|
||||
F(AVX512_BF16)
|
||||
F(AVX_VNNI) | F(AVX512_BF16)
|
||||
);
|
||||
|
||||
kvm_cpu_cap_mask(CPUID_D_1_EAX,
|
||||
|
@ -30,15 +30,32 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
|
||||
u32 *ecx, u32 *edx, bool exact_only);
|
||||
|
||||
int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu);
|
||||
u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu);
|
||||
|
||||
static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.maxphyaddr;
|
||||
}
|
||||
|
||||
static inline bool kvm_vcpu_is_legal_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
|
||||
{
|
||||
return !(gpa & vcpu->arch.reserved_gpa_bits);
|
||||
}
|
||||
|
||||
static inline bool kvm_vcpu_is_illegal_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
|
||||
{
|
||||
return (gpa >= BIT_ULL(cpuid_maxphyaddr(vcpu)));
|
||||
return !kvm_vcpu_is_legal_gpa(vcpu, gpa);
|
||||
}
|
||||
|
||||
static inline bool kvm_vcpu_is_legal_aligned_gpa(struct kvm_vcpu *vcpu,
|
||||
gpa_t gpa, gpa_t alignment)
|
||||
{
|
||||
return IS_ALIGNED(gpa, alignment) && kvm_vcpu_is_legal_gpa(vcpu, gpa);
|
||||
}
|
||||
|
||||
static inline bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
|
||||
{
|
||||
return kvm_vcpu_is_legal_aligned_gpa(vcpu, gpa, PAGE_SIZE);
|
||||
}
|
||||
|
||||
struct cpuid_reg {
|
||||
@ -324,11 +341,6 @@ static __always_inline void kvm_cpu_cap_check_and_set(unsigned int x86_feature)
|
||||
kvm_cpu_cap_set(x86_feature);
|
||||
}
|
||||
|
||||
static inline bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
|
||||
{
|
||||
return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
|
||||
}
|
||||
|
||||
static __always_inline bool guest_pv_has(struct kvm_vcpu *vcpu,
|
||||
unsigned int kvm_feature)
|
||||
{
|
||||
|
@ -2506,12 +2506,12 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
|
||||
|
||||
val = GET_SMSTATE(u32, smstate, 0x7fcc);
|
||||
|
||||
if (ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1))
|
||||
if (ctxt->ops->set_dr(ctxt, 6, val))
|
||||
return X86EMUL_UNHANDLEABLE;
|
||||
|
||||
val = GET_SMSTATE(u32, smstate, 0x7fc8);
|
||||
|
||||
if (ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1))
|
||||
if (ctxt->ops->set_dr(ctxt, 7, val))
|
||||
return X86EMUL_UNHANDLEABLE;
|
||||
|
||||
selector = GET_SMSTATE(u32, smstate, 0x7fc4);
|
||||
@ -2564,14 +2564,14 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
|
||||
ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78);
|
||||
ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
|
||||
|
||||
val = GET_SMSTATE(u32, smstate, 0x7f68);
|
||||
val = GET_SMSTATE(u64, smstate, 0x7f68);
|
||||
|
||||
if (ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1))
|
||||
if (ctxt->ops->set_dr(ctxt, 6, val))
|
||||
return X86EMUL_UNHANDLEABLE;
|
||||
|
||||
val = GET_SMSTATE(u32, smstate, 0x7f60);
|
||||
val = GET_SMSTATE(u64, smstate, 0x7f60);
|
||||
|
||||
if (ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1))
|
||||
if (ctxt->ops->set_dr(ctxt, 7, val))
|
||||
return X86EMUL_UNHANDLEABLE;
|
||||
|
||||
cr0 = GET_SMSTATE(u64, smstate, 0x7f58);
|
||||
@ -4329,7 +4329,7 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt)
|
||||
|
||||
ctxt->ops->get_dr(ctxt, 6, &dr6);
|
||||
dr6 &= ~DR_TRAP_BITS;
|
||||
dr6 |= DR6_BD | DR6_RTM;
|
||||
dr6 |= DR6_BD | DR6_ACTIVE_LOW;
|
||||
ctxt->ops->set_dr(ctxt, 6, dr6);
|
||||
return emulate_db(ctxt);
|
||||
}
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "ioapic.h"
|
||||
#include "cpuid.h"
|
||||
#include "hyperv.h"
|
||||
#include "xen.h"
|
||||
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/kvm_host.h>
|
||||
@ -36,6 +37,9 @@
|
||||
#include "trace.h"
|
||||
#include "irq.h"
|
||||
|
||||
/* "Hv#1" signature */
|
||||
#define HYPERV_CPUID_SIGNATURE_EAX 0x31237648
|
||||
|
||||
#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64)
|
||||
|
||||
static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
|
||||
@ -128,7 +132,7 @@ static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
|
||||
synic_update_vector(synic, vector);
|
||||
|
||||
/* Load SynIC vectors into EOI exit bitmap */
|
||||
kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic));
|
||||
kvm_make_request(KVM_REQ_SCAN_IOAPIC, hv_synic_to_vcpu(synic));
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -141,10 +145,10 @@ static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx)
|
||||
return NULL;
|
||||
|
||||
vcpu = kvm_get_vcpu(kvm, vpidx);
|
||||
if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
|
||||
if (vcpu && kvm_hv_get_vpindex(vcpu) == vpidx)
|
||||
return vcpu;
|
||||
kvm_for_each_vcpu(i, vcpu, kvm)
|
||||
if (vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
|
||||
if (kvm_hv_get_vpindex(vcpu) == vpidx)
|
||||
return vcpu;
|
||||
return NULL;
|
||||
}
|
||||
@ -157,15 +161,15 @@ static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx)
|
||||
vcpu = get_vcpu_by_vpidx(kvm, vpidx);
|
||||
if (!vcpu)
|
||||
return NULL;
|
||||
synic = vcpu_to_synic(vcpu);
|
||||
synic = to_hv_synic(vcpu);
|
||||
return (synic->active) ? synic : NULL;
|
||||
}
|
||||
|
||||
static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
|
||||
struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
|
||||
struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu);
|
||||
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
|
||||
struct kvm_vcpu_hv_stimer *stimer;
|
||||
int gsi, idx;
|
||||
|
||||
@ -189,8 +193,8 @@ static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint)
|
||||
|
||||
static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
|
||||
struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
|
||||
struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic);
|
||||
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
|
||||
|
||||
hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC;
|
||||
hv_vcpu->exit.u.synic.msr = msr;
|
||||
@ -204,7 +208,7 @@ static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr)
|
||||
static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
|
||||
u32 msr, u64 data, bool host)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
|
||||
struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic);
|
||||
int ret;
|
||||
|
||||
if (!synic->active && !host)
|
||||
@ -282,8 +286,7 @@ static bool kvm_hv_is_syndbg_enabled(struct kvm_vcpu *vcpu)
|
||||
|
||||
static int kvm_hv_syndbg_complete_userspace(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvm_hv *hv = &kvm->arch.hyperv;
|
||||
struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
|
||||
|
||||
if (vcpu->run->hyperv.u.syndbg.msr == HV_X64_MSR_SYNDBG_CONTROL)
|
||||
hv->hv_syndbg.control.status =
|
||||
@ -293,8 +296,8 @@ static int kvm_hv_syndbg_complete_userspace(struct kvm_vcpu *vcpu)
|
||||
|
||||
static void syndbg_exit(struct kvm_vcpu *vcpu, u32 msr)
|
||||
{
|
||||
struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu);
|
||||
struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
|
||||
struct kvm_hv_syndbg *syndbg = to_hv_syndbg(vcpu);
|
||||
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
|
||||
|
||||
hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNDBG;
|
||||
hv_vcpu->exit.u.syndbg.msr = msr;
|
||||
@ -310,13 +313,13 @@ static void syndbg_exit(struct kvm_vcpu *vcpu, u32 msr)
|
||||
|
||||
static int syndbg_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
|
||||
{
|
||||
struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu);
|
||||
struct kvm_hv_syndbg *syndbg = to_hv_syndbg(vcpu);
|
||||
|
||||
if (!kvm_hv_is_syndbg_enabled(vcpu) && !host)
|
||||
return 1;
|
||||
|
||||
trace_kvm_hv_syndbg_set_msr(vcpu->vcpu_id,
|
||||
vcpu_to_hv_vcpu(vcpu)->vp_index, msr, data);
|
||||
to_hv_vcpu(vcpu)->vp_index, msr, data);
|
||||
switch (msr) {
|
||||
case HV_X64_MSR_SYNDBG_CONTROL:
|
||||
syndbg->control.control = data;
|
||||
@ -349,7 +352,7 @@ static int syndbg_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
|
||||
|
||||
static int syndbg_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
|
||||
{
|
||||
struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu);
|
||||
struct kvm_hv_syndbg *syndbg = to_hv_syndbg(vcpu);
|
||||
|
||||
if (!kvm_hv_is_syndbg_enabled(vcpu) && !host)
|
||||
return 1;
|
||||
@ -377,9 +380,7 @@ static int syndbg_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
|
||||
break;
|
||||
}
|
||||
|
||||
trace_kvm_hv_syndbg_get_msr(vcpu->vcpu_id,
|
||||
vcpu_to_hv_vcpu(vcpu)->vp_index, msr,
|
||||
*pdata);
|
||||
trace_kvm_hv_syndbg_get_msr(vcpu->vcpu_id, kvm_hv_get_vpindex(vcpu), msr, *pdata);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -421,7 +422,7 @@ static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata,
|
||||
|
||||
static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
|
||||
struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic);
|
||||
struct kvm_lapic_irq irq;
|
||||
int ret, vector;
|
||||
|
||||
@ -457,7 +458,7 @@ int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint)
|
||||
|
||||
void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)
|
||||
{
|
||||
struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
|
||||
struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu);
|
||||
int i;
|
||||
|
||||
trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector);
|
||||
@ -514,7 +515,7 @@ static void synic_init(struct kvm_vcpu_hv_synic *synic)
|
||||
|
||||
static u64 get_time_ref_counter(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_hv *hv = &kvm->arch.hyperv;
|
||||
struct kvm_hv *hv = to_kvm_hv(kvm);
|
||||
struct kvm_vcpu *vcpu;
|
||||
u64 tsc;
|
||||
|
||||
@ -534,10 +535,10 @@ static u64 get_time_ref_counter(struct kvm *kvm)
|
||||
static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
|
||||
bool vcpu_kick)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
|
||||
struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
|
||||
|
||||
set_bit(stimer->index,
|
||||
vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap);
|
||||
to_hv_vcpu(vcpu)->stimer_pending_bitmap);
|
||||
kvm_make_request(KVM_REQ_HV_STIMER, vcpu);
|
||||
if (vcpu_kick)
|
||||
kvm_vcpu_kick(vcpu);
|
||||
@ -545,14 +546,14 @@ static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
|
||||
|
||||
static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
|
||||
struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
|
||||
|
||||
trace_kvm_hv_stimer_cleanup(stimer_to_vcpu(stimer)->vcpu_id,
|
||||
trace_kvm_hv_stimer_cleanup(hv_stimer_to_vcpu(stimer)->vcpu_id,
|
||||
stimer->index);
|
||||
|
||||
hrtimer_cancel(&stimer->timer);
|
||||
clear_bit(stimer->index,
|
||||
vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap);
|
||||
to_hv_vcpu(vcpu)->stimer_pending_bitmap);
|
||||
stimer->msg_pending = false;
|
||||
stimer->exp_time = 0;
|
||||
}
|
||||
@ -562,7 +563,7 @@ static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer)
|
||||
struct kvm_vcpu_hv_stimer *stimer;
|
||||
|
||||
stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer);
|
||||
trace_kvm_hv_stimer_callback(stimer_to_vcpu(stimer)->vcpu_id,
|
||||
trace_kvm_hv_stimer_callback(hv_stimer_to_vcpu(stimer)->vcpu_id,
|
||||
stimer->index);
|
||||
stimer_mark_pending(stimer, true);
|
||||
|
||||
@ -579,7 +580,7 @@ static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
|
||||
u64 time_now;
|
||||
ktime_t ktime_now;
|
||||
|
||||
time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm);
|
||||
time_now = get_time_ref_counter(hv_stimer_to_vcpu(stimer)->kvm);
|
||||
ktime_now = ktime_get();
|
||||
|
||||
if (stimer->config.periodic) {
|
||||
@ -596,7 +597,7 @@ static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
|
||||
stimer->exp_time = time_now + stimer->count;
|
||||
|
||||
trace_kvm_hv_stimer_start_periodic(
|
||||
stimer_to_vcpu(stimer)->vcpu_id,
|
||||
hv_stimer_to_vcpu(stimer)->vcpu_id,
|
||||
stimer->index,
|
||||
time_now, stimer->exp_time);
|
||||
|
||||
@ -618,7 +619,7 @@ static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
|
||||
return 0;
|
||||
}
|
||||
|
||||
trace_kvm_hv_stimer_start_one_shot(stimer_to_vcpu(stimer)->vcpu_id,
|
||||
trace_kvm_hv_stimer_start_one_shot(hv_stimer_to_vcpu(stimer)->vcpu_id,
|
||||
stimer->index,
|
||||
time_now, stimer->count);
|
||||
|
||||
@ -633,13 +634,13 @@ static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
|
||||
{
|
||||
union hv_stimer_config new_config = {.as_uint64 = config},
|
||||
old_config = {.as_uint64 = stimer->config.as_uint64};
|
||||
struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
|
||||
struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
|
||||
struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
|
||||
struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu);
|
||||
|
||||
if (!synic->active && !host)
|
||||
return 1;
|
||||
|
||||
trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id,
|
||||
trace_kvm_hv_stimer_set_config(hv_stimer_to_vcpu(stimer)->vcpu_id,
|
||||
stimer->index, config, host);
|
||||
|
||||
stimer_cleanup(stimer);
|
||||
@ -657,13 +658,13 @@ static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
|
||||
static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count,
|
||||
bool host)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
|
||||
struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
|
||||
struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
|
||||
struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu);
|
||||
|
||||
if (!synic->active && !host)
|
||||
return 1;
|
||||
|
||||
trace_kvm_hv_stimer_set_count(stimer_to_vcpu(stimer)->vcpu_id,
|
||||
trace_kvm_hv_stimer_set_count(hv_stimer_to_vcpu(stimer)->vcpu_id,
|
||||
stimer->index, count, host);
|
||||
|
||||
stimer_cleanup(stimer);
|
||||
@ -694,7 +695,7 @@ static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount)
|
||||
static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint,
|
||||
struct hv_message *src_msg, bool no_retry)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
|
||||
struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic);
|
||||
int msg_off = offsetof(struct hv_message_page, sint_message[sint]);
|
||||
gfn_t msg_page_gfn;
|
||||
struct hv_message_header hv_hdr;
|
||||
@ -750,7 +751,7 @@ static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint,
|
||||
|
||||
static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
|
||||
struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
|
||||
struct hv_message *msg = &stimer->msg;
|
||||
struct hv_timer_message_payload *payload =
|
||||
(struct hv_timer_message_payload *)&msg->u.payload;
|
||||
@ -763,14 +764,14 @@ static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer)
|
||||
|
||||
payload->expiration_time = stimer->exp_time;
|
||||
payload->delivery_time = get_time_ref_counter(vcpu->kvm);
|
||||
return synic_deliver_msg(vcpu_to_synic(vcpu),
|
||||
return synic_deliver_msg(to_hv_synic(vcpu),
|
||||
stimer->config.sintx, msg,
|
||||
no_retry);
|
||||
}
|
||||
|
||||
static int stimer_notify_direct(struct kvm_vcpu_hv_stimer *stimer)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
|
||||
struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
|
||||
struct kvm_lapic_irq irq = {
|
||||
.delivery_mode = APIC_DM_FIXED,
|
||||
.vector = stimer->config.apic_vector
|
||||
@ -790,7 +791,7 @@ static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
|
||||
r = stimer_send_msg(stimer);
|
||||
else
|
||||
r = stimer_notify_direct(stimer);
|
||||
trace_kvm_hv_stimer_expiration(stimer_to_vcpu(stimer)->vcpu_id,
|
||||
trace_kvm_hv_stimer_expiration(hv_stimer_to_vcpu(stimer)->vcpu_id,
|
||||
stimer->index, direct, r);
|
||||
if (!r) {
|
||||
stimer->msg_pending = false;
|
||||
@ -801,11 +802,14 @@ static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
|
||||
|
||||
void kvm_hv_process_stimers(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
|
||||
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
|
||||
struct kvm_vcpu_hv_stimer *stimer;
|
||||
u64 time_now, exp_time;
|
||||
int i;
|
||||
|
||||
if (!hv_vcpu)
|
||||
return;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
|
||||
if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) {
|
||||
stimer = &hv_vcpu->stimer[i];
|
||||
@ -831,16 +835,27 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu)
|
||||
|
||||
void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
|
||||
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
|
||||
int i;
|
||||
|
||||
if (!hv_vcpu)
|
||||
return;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
|
||||
stimer_cleanup(&hv_vcpu->stimer[i]);
|
||||
|
||||
kfree(hv_vcpu);
|
||||
vcpu->arch.hyperv = NULL;
|
||||
}
|
||||
|
||||
bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!(vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE))
|
||||
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
|
||||
|
||||
if (!hv_vcpu)
|
||||
return false;
|
||||
|
||||
if (!(hv_vcpu->hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE))
|
||||
return false;
|
||||
return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
|
||||
}
|
||||
@ -880,28 +895,41 @@ static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index)
|
||||
stimer_prepare_msg(stimer);
|
||||
}
|
||||
|
||||
void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
static int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
|
||||
struct kvm_vcpu_hv *hv_vcpu;
|
||||
int i;
|
||||
|
||||
hv_vcpu = kzalloc(sizeof(struct kvm_vcpu_hv), GFP_KERNEL_ACCOUNT);
|
||||
if (!hv_vcpu)
|
||||
return -ENOMEM;
|
||||
|
||||
vcpu->arch.hyperv = hv_vcpu;
|
||||
hv_vcpu->vcpu = vcpu;
|
||||
|
||||
synic_init(&hv_vcpu->synic);
|
||||
|
||||
bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
|
||||
for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
|
||||
stimer_init(&hv_vcpu->stimer[i], i);
|
||||
}
|
||||
|
||||
void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
|
||||
|
||||
hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
|
||||
{
|
||||
struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
|
||||
struct kvm_vcpu_hv_synic *synic;
|
||||
int r;
|
||||
|
||||
if (!to_hv_vcpu(vcpu)) {
|
||||
r = kvm_hv_vcpu_init(vcpu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
synic = to_hv_synic(vcpu);
|
||||
|
||||
/*
|
||||
* Hyper-V SynIC auto EOI SINT's are
|
||||
@ -939,10 +967,9 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
|
||||
return r;
|
||||
}
|
||||
|
||||
static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu,
|
||||
u32 index, u64 *pdata)
|
||||
static int kvm_hv_msr_get_crash_data(struct kvm *kvm, u32 index, u64 *pdata)
|
||||
{
|
||||
struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
|
||||
struct kvm_hv *hv = to_kvm_hv(kvm);
|
||||
size_t size = ARRAY_SIZE(hv->hv_crash_param);
|
||||
|
||||
if (WARN_ON_ONCE(index >= size))
|
||||
@ -952,41 +979,26 @@ static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata)
|
||||
static int kvm_hv_msr_get_crash_ctl(struct kvm *kvm, u64 *pdata)
|
||||
{
|
||||
struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
|
||||
struct kvm_hv *hv = to_kvm_hv(kvm);
|
||||
|
||||
*pdata = hv->hv_crash_ctl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host)
|
||||
static int kvm_hv_msr_set_crash_ctl(struct kvm *kvm, u64 data)
|
||||
{
|
||||
struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
|
||||
struct kvm_hv *hv = to_kvm_hv(kvm);
|
||||
|
||||
if (host)
|
||||
hv->hv_crash_ctl = data & HV_CRASH_CTL_CRASH_NOTIFY;
|
||||
|
||||
if (!host && (data & HV_CRASH_CTL_CRASH_NOTIFY)) {
|
||||
|
||||
vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n",
|
||||
hv->hv_crash_param[0],
|
||||
hv->hv_crash_param[1],
|
||||
hv->hv_crash_param[2],
|
||||
hv->hv_crash_param[3],
|
||||
hv->hv_crash_param[4]);
|
||||
|
||||
/* Send notification about crash to user space */
|
||||
kvm_make_request(KVM_REQ_HV_CRASH, vcpu);
|
||||
}
|
||||
hv->hv_crash_ctl = data & HV_CRASH_CTL_CRASH_NOTIFY;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
|
||||
u32 index, u64 data)
|
||||
static int kvm_hv_msr_set_crash_data(struct kvm *kvm, u32 index, u64 data)
|
||||
{
|
||||
struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
|
||||
struct kvm_hv *hv = to_kvm_hv(kvm);
|
||||
size_t size = ARRAY_SIZE(hv->hv_crash_param);
|
||||
|
||||
if (WARN_ON_ONCE(index >= size))
|
||||
@ -1068,7 +1080,7 @@ static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock,
|
||||
void kvm_hv_setup_tsc_page(struct kvm *kvm,
|
||||
struct pvclock_vcpu_time_info *hv_clock)
|
||||
{
|
||||
struct kvm_hv *hv = &kvm->arch.hyperv;
|
||||
struct kvm_hv *hv = to_kvm_hv(kvm);
|
||||
u32 tsc_seq;
|
||||
u64 gfn;
|
||||
|
||||
@ -1078,7 +1090,7 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
|
||||
if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
|
||||
return;
|
||||
|
||||
mutex_lock(&kvm->arch.hyperv.hv_lock);
|
||||
mutex_lock(&hv->hv_lock);
|
||||
if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
|
||||
goto out_unlock;
|
||||
|
||||
@ -1122,14 +1134,14 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
|
||||
kvm_write_guest(kvm, gfn_to_gpa(gfn),
|
||||
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence));
|
||||
out_unlock:
|
||||
mutex_unlock(&kvm->arch.hyperv.hv_lock);
|
||||
mutex_unlock(&hv->hv_lock);
|
||||
}
|
||||
|
||||
static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
|
||||
bool host)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvm_hv *hv = &kvm->arch.hyperv;
|
||||
struct kvm_hv *hv = to_kvm_hv(kvm);
|
||||
|
||||
switch (msr) {
|
||||
case HV_X64_MSR_GUEST_OS_ID:
|
||||
@ -1139,9 +1151,9 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
|
||||
hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
|
||||
break;
|
||||
case HV_X64_MSR_HYPERCALL: {
|
||||
u64 gfn;
|
||||
unsigned long addr;
|
||||
u8 instructions[4];
|
||||
u8 instructions[9];
|
||||
int i = 0;
|
||||
u64 addr;
|
||||
|
||||
/* if guest os id is not set hypercall should remain disabled */
|
||||
if (!hv->hv_guest_os_id)
|
||||
@ -1150,16 +1162,33 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
|
||||
hv->hv_hypercall = data;
|
||||
break;
|
||||
}
|
||||
gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
|
||||
addr = gfn_to_hva(kvm, gfn);
|
||||
if (kvm_is_error_hva(addr))
|
||||
return 1;
|
||||
kvm_x86_ops.patch_hypercall(vcpu, instructions);
|
||||
((unsigned char *)instructions)[3] = 0xc3; /* ret */
|
||||
if (__copy_to_user((void __user *)addr, instructions, 4))
|
||||
|
||||
/*
|
||||
* If Xen and Hyper-V hypercalls are both enabled, disambiguate
|
||||
* the same way Xen itself does, by setting the bit 31 of EAX
|
||||
* which is RsvdZ in the 32-bit Hyper-V hypercall ABI and just
|
||||
* going to be clobbered on 64-bit.
|
||||
*/
|
||||
if (kvm_xen_hypercall_enabled(kvm)) {
|
||||
/* orl $0x80000000, %eax */
|
||||
instructions[i++] = 0x0d;
|
||||
instructions[i++] = 0x00;
|
||||
instructions[i++] = 0x00;
|
||||
instructions[i++] = 0x00;
|
||||
instructions[i++] = 0x80;
|
||||
}
|
||||
|
||||
/* vmcall/vmmcall */
|
||||
static_call(kvm_x86_patch_hypercall)(vcpu, instructions + i);
|
||||
i += 3;
|
||||
|
||||
/* ret */
|
||||
((unsigned char *)instructions)[i++] = 0xc3;
|
||||
|
||||
addr = data & HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK;
|
||||
if (kvm_vcpu_write_guest(vcpu, addr, instructions, i))
|
||||
return 1;
|
||||
hv->hv_hypercall = data;
|
||||
mark_page_dirty(kvm, gfn);
|
||||
break;
|
||||
}
|
||||
case HV_X64_MSR_REFERENCE_TSC:
|
||||
@ -1168,11 +1197,25 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
|
||||
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
|
||||
break;
|
||||
case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
|
||||
return kvm_hv_msr_set_crash_data(vcpu,
|
||||
return kvm_hv_msr_set_crash_data(kvm,
|
||||
msr - HV_X64_MSR_CRASH_P0,
|
||||
data);
|
||||
case HV_X64_MSR_CRASH_CTL:
|
||||
return kvm_hv_msr_set_crash_ctl(vcpu, data, host);
|
||||
if (host)
|
||||
return kvm_hv_msr_set_crash_ctl(kvm, data);
|
||||
|
||||
if (data & HV_CRASH_CTL_CRASH_NOTIFY) {
|
||||
vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n",
|
||||
hv->hv_crash_param[0],
|
||||
hv->hv_crash_param[1],
|
||||
hv->hv_crash_param[2],
|
||||
hv->hv_crash_param[3],
|
||||
hv->hv_crash_param[4]);
|
||||
|
||||
/* Send notification about crash to user space */
|
||||
kvm_make_request(KVM_REQ_HV_CRASH, vcpu);
|
||||
}
|
||||
break;
|
||||
case HV_X64_MSR_RESET:
|
||||
if (data == 1) {
|
||||
vcpu_debug(vcpu, "hyper-v reset requested\n");
|
||||
@ -1216,11 +1259,11 @@ static u64 current_task_runtime_100ns(void)
|
||||
|
||||
static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
|
||||
{
|
||||
struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
|
||||
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
|
||||
|
||||
switch (msr) {
|
||||
case HV_X64_MSR_VP_INDEX: {
|
||||
struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
|
||||
struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
|
||||
int vcpu_idx = kvm_vcpu_get_idx(vcpu);
|
||||
u32 new_vp_index = (u32)data;
|
||||
|
||||
@ -1291,14 +1334,14 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
|
||||
case HV_X64_MSR_SIMP:
|
||||
case HV_X64_MSR_EOM:
|
||||
case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
|
||||
return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host);
|
||||
return synic_set_msr(to_hv_synic(vcpu), msr, data, host);
|
||||
case HV_X64_MSR_STIMER0_CONFIG:
|
||||
case HV_X64_MSR_STIMER1_CONFIG:
|
||||
case HV_X64_MSR_STIMER2_CONFIG:
|
||||
case HV_X64_MSR_STIMER3_CONFIG: {
|
||||
int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
|
||||
|
||||
return stimer_set_config(vcpu_to_stimer(vcpu, timer_index),
|
||||
return stimer_set_config(to_hv_stimer(vcpu, timer_index),
|
||||
data, host);
|
||||
}
|
||||
case HV_X64_MSR_STIMER0_COUNT:
|
||||
@ -1307,7 +1350,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
|
||||
case HV_X64_MSR_STIMER3_COUNT: {
|
||||
int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
|
||||
|
||||
return stimer_set_count(vcpu_to_stimer(vcpu, timer_index),
|
||||
return stimer_set_count(to_hv_stimer(vcpu, timer_index),
|
||||
data, host);
|
||||
}
|
||||
case HV_X64_MSR_TSC_FREQUENCY:
|
||||
@ -1330,7 +1373,7 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
|
||||
{
|
||||
u64 data = 0;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvm_hv *hv = &kvm->arch.hyperv;
|
||||
struct kvm_hv *hv = to_kvm_hv(kvm);
|
||||
|
||||
switch (msr) {
|
||||
case HV_X64_MSR_GUEST_OS_ID:
|
||||
@ -1346,11 +1389,11 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
|
||||
data = hv->hv_tsc_page;
|
||||
break;
|
||||
case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
|
||||
return kvm_hv_msr_get_crash_data(vcpu,
|
||||
return kvm_hv_msr_get_crash_data(kvm,
|
||||
msr - HV_X64_MSR_CRASH_P0,
|
||||
pdata);
|
||||
case HV_X64_MSR_CRASH_CTL:
|
||||
return kvm_hv_msr_get_crash_ctl(vcpu, pdata);
|
||||
return kvm_hv_msr_get_crash_ctl(kvm, pdata);
|
||||
case HV_X64_MSR_RESET:
|
||||
data = 0;
|
||||
break;
|
||||
@ -1379,7 +1422,7 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
|
||||
bool host)
|
||||
{
|
||||
u64 data = 0;
|
||||
struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
|
||||
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
|
||||
|
||||
switch (msr) {
|
||||
case HV_X64_MSR_VP_INDEX:
|
||||
@ -1403,14 +1446,14 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
|
||||
case HV_X64_MSR_SIMP:
|
||||
case HV_X64_MSR_EOM:
|
||||
case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
|
||||
return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata, host);
|
||||
return synic_get_msr(to_hv_synic(vcpu), msr, pdata, host);
|
||||
case HV_X64_MSR_STIMER0_CONFIG:
|
||||
case HV_X64_MSR_STIMER1_CONFIG:
|
||||
case HV_X64_MSR_STIMER2_CONFIG:
|
||||
case HV_X64_MSR_STIMER3_CONFIG: {
|
||||
int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
|
||||
|
||||
return stimer_get_config(vcpu_to_stimer(vcpu, timer_index),
|
||||
return stimer_get_config(to_hv_stimer(vcpu, timer_index),
|
||||
pdata);
|
||||
}
|
||||
case HV_X64_MSR_STIMER0_COUNT:
|
||||
@ -1419,7 +1462,7 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
|
||||
case HV_X64_MSR_STIMER3_COUNT: {
|
||||
int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
|
||||
|
||||
return stimer_get_count(vcpu_to_stimer(vcpu, timer_index),
|
||||
return stimer_get_count(to_hv_stimer(vcpu, timer_index),
|
||||
pdata);
|
||||
}
|
||||
case HV_X64_MSR_TSC_FREQUENCY:
|
||||
@ -1438,12 +1481,22 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
|
||||
|
||||
int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
|
||||
{
|
||||
struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
|
||||
|
||||
if (!host && !vcpu->arch.hyperv_enabled)
|
||||
return 1;
|
||||
|
||||
if (!to_hv_vcpu(vcpu)) {
|
||||
if (kvm_hv_vcpu_init(vcpu))
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (kvm_hv_msr_partition_wide(msr)) {
|
||||
int r;
|
||||
|
||||
mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock);
|
||||
mutex_lock(&hv->hv_lock);
|
||||
r = kvm_hv_set_msr_pw(vcpu, msr, data, host);
|
||||
mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
|
||||
mutex_unlock(&hv->hv_lock);
|
||||
return r;
|
||||
} else
|
||||
return kvm_hv_set_msr(vcpu, msr, data, host);
|
||||
@ -1451,12 +1504,22 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
|
||||
|
||||
int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
|
||||
{
|
||||
struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
|
||||
|
||||
if (!host && !vcpu->arch.hyperv_enabled)
|
||||
return 1;
|
||||
|
||||
if (!to_hv_vcpu(vcpu)) {
|
||||
if (kvm_hv_vcpu_init(vcpu))
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (kvm_hv_msr_partition_wide(msr)) {
|
||||
int r;
|
||||
|
||||
mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock);
|
||||
mutex_lock(&hv->hv_lock);
|
||||
r = kvm_hv_get_msr_pw(vcpu, msr, pdata, host);
|
||||
mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
|
||||
mutex_unlock(&hv->hv_lock);
|
||||
return r;
|
||||
} else
|
||||
return kvm_hv_get_msr(vcpu, msr, pdata, host);
|
||||
@ -1466,7 +1529,7 @@ static __always_inline unsigned long *sparse_set_to_vcpu_mask(
|
||||
struct kvm *kvm, u64 *sparse_banks, u64 valid_bank_mask,
|
||||
u64 *vp_bitmap, unsigned long *vcpu_bitmap)
|
||||
{
|
||||
struct kvm_hv *hv = &kvm->arch.hyperv;
|
||||
struct kvm_hv *hv = to_kvm_hv(kvm);
|
||||
struct kvm_vcpu *vcpu;
|
||||
int i, bank, sbank = 0;
|
||||
|
||||
@ -1483,18 +1546,16 @@ static __always_inline unsigned long *sparse_set_to_vcpu_mask(
|
||||
|
||||
bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
if (test_bit(vcpu_to_hv_vcpu(vcpu)->vp_index,
|
||||
(unsigned long *)vp_bitmap))
|
||||
if (test_bit(kvm_hv_get_vpindex(vcpu), (unsigned long *)vp_bitmap))
|
||||
__set_bit(i, vcpu_bitmap);
|
||||
}
|
||||
return vcpu_bitmap;
|
||||
}
|
||||
|
||||
static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
|
||||
u16 rep_cnt, bool ex)
|
||||
static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, u64 ingpa, u16 rep_cnt, bool ex)
|
||||
{
|
||||
struct kvm *kvm = current_vcpu->kvm;
|
||||
struct kvm_vcpu_hv *hv_vcpu = ¤t_vcpu->arch.hyperv;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
|
||||
struct hv_tlb_flush_ex flush_ex;
|
||||
struct hv_tlb_flush flush;
|
||||
u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
|
||||
@ -1592,10 +1653,10 @@ static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector,
|
||||
}
|
||||
}
|
||||
|
||||
static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa,
|
||||
static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, u64 ingpa, u64 outgpa,
|
||||
bool ex, bool fast)
|
||||
{
|
||||
struct kvm *kvm = current_vcpu->kvm;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct hv_send_ipi_ex send_ipi_ex;
|
||||
struct hv_send_ipi send_ipi;
|
||||
u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
|
||||
@ -1666,9 +1727,20 @@ ret_success:
|
||||
return HV_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
bool kvm_hv_hypercall_enabled(struct kvm *kvm)
|
||||
void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return READ_ONCE(kvm->arch.hyperv.hv_guest_os_id) != 0;
|
||||
struct kvm_cpuid_entry2 *entry;
|
||||
|
||||
entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE, 0);
|
||||
if (entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX)
|
||||
vcpu->arch.hyperv_enabled = true;
|
||||
else
|
||||
vcpu->arch.hyperv_enabled = false;
|
||||
}
|
||||
|
||||
bool kvm_hv_hypercall_enabled(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.hyperv_enabled && to_kvm_hv(vcpu->kvm)->hv_guest_os_id;
|
||||
}
|
||||
|
||||
static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
|
||||
@ -1698,6 +1770,7 @@ static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
|
||||
|
||||
static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
|
||||
{
|
||||
struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
|
||||
struct eventfd_ctx *eventfd;
|
||||
|
||||
if (unlikely(!fast)) {
|
||||
@ -1726,7 +1799,7 @@ static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
|
||||
|
||||
/* the eventfd is protected by vcpu->kvm->srcu, but conn_to_evt isn't */
|
||||
rcu_read_lock();
|
||||
eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param);
|
||||
eventfd = idr_find(&hv->conn_to_evt, param);
|
||||
rcu_read_unlock();
|
||||
if (!eventfd)
|
||||
return HV_STATUS_INVALID_PORT_ID;
|
||||
@ -1745,7 +1818,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
|
||||
* hypercall generates UD from non zero cpl and real mode
|
||||
* per HYPER-V spec
|
||||
*/
|
||||
if (kvm_x86_ops.get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
|
||||
if (static_call(kvm_x86_get_cpl)(vcpu) != 0 || !is_protmode(vcpu)) {
|
||||
kvm_queue_exception(vcpu, UD_VECTOR);
|
||||
return 1;
|
||||
}
|
||||
@ -1793,7 +1866,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
|
||||
fallthrough; /* maybe userspace knows this conn_id */
|
||||
case HVCALL_POST_MESSAGE:
|
||||
/* don't bother userspace if it has no way to handle it */
|
||||
if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) {
|
||||
if (unlikely(rep || !to_hv_synic(vcpu)->active)) {
|
||||
ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
|
||||
break;
|
||||
}
|
||||
@ -1855,7 +1928,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
fallthrough;
|
||||
case HVCALL_RESET_DEBUG_SESSION: {
|
||||
struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu);
|
||||
struct kvm_hv_syndbg *syndbg = to_hv_syndbg(vcpu);
|
||||
|
||||
if (!kvm_hv_is_syndbg_enabled(vcpu)) {
|
||||
ret = HV_STATUS_INVALID_HYPERCALL_CODE;
|
||||
@ -1885,23 +1958,26 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
|
||||
|
||||
void kvm_hv_init_vm(struct kvm *kvm)
|
||||
{
|
||||
mutex_init(&kvm->arch.hyperv.hv_lock);
|
||||
idr_init(&kvm->arch.hyperv.conn_to_evt);
|
||||
struct kvm_hv *hv = to_kvm_hv(kvm);
|
||||
|
||||
mutex_init(&hv->hv_lock);
|
||||
idr_init(&hv->conn_to_evt);
|
||||
}
|
||||
|
||||
void kvm_hv_destroy_vm(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_hv *hv = to_kvm_hv(kvm);
|
||||
struct eventfd_ctx *eventfd;
|
||||
int i;
|
||||
|
||||
idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i)
|
||||
idr_for_each_entry(&hv->conn_to_evt, eventfd, i)
|
||||
eventfd_ctx_put(eventfd);
|
||||
idr_destroy(&kvm->arch.hyperv.conn_to_evt);
|
||||
idr_destroy(&hv->conn_to_evt);
|
||||
}
|
||||
|
||||
static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd)
|
||||
{
|
||||
struct kvm_hv *hv = &kvm->arch.hyperv;
|
||||
struct kvm_hv *hv = to_kvm_hv(kvm);
|
||||
struct eventfd_ctx *eventfd;
|
||||
int ret;
|
||||
|
||||
@ -1925,7 +2001,7 @@ static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd)
|
||||
|
||||
static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id)
|
||||
{
|
||||
struct kvm_hv *hv = &kvm->arch.hyperv;
|
||||
struct kvm_hv *hv = to_kvm_hv(kvm);
|
||||
struct eventfd_ctx *eventfd;
|
||||
|
||||
mutex_lock(&hv->hv_lock);
|
||||
@ -1997,8 +2073,7 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
|
||||
break;
|
||||
|
||||
case HYPERV_CPUID_INTERFACE:
|
||||
memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12);
|
||||
ent->eax = signature[0];
|
||||
ent->eax = HYPERV_CPUID_SIGNATURE_EAX;
|
||||
break;
|
||||
|
||||
case HYPERV_CPUID_VERSION:
|
||||
|
@ -50,38 +50,46 @@
|
||||
/* Hyper-V HV_X64_MSR_SYNDBG_OPTIONS bits */
|
||||
#define HV_X64_SYNDBG_OPTION_USE_HCALLS BIT(2)
|
||||
|
||||
static inline struct kvm_vcpu_hv *vcpu_to_hv_vcpu(struct kvm_vcpu *vcpu)
|
||||
static inline struct kvm_hv *to_kvm_hv(struct kvm *kvm)
|
||||
{
|
||||
return &vcpu->arch.hyperv;
|
||||
return &kvm->arch.hyperv;
|
||||
}
|
||||
|
||||
static inline struct kvm_vcpu *hv_vcpu_to_vcpu(struct kvm_vcpu_hv *hv_vcpu)
|
||||
static inline struct kvm_vcpu_hv *to_hv_vcpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_vcpu_arch *arch;
|
||||
|
||||
arch = container_of(hv_vcpu, struct kvm_vcpu_arch, hyperv);
|
||||
return container_of(arch, struct kvm_vcpu, arch);
|
||||
return vcpu->arch.hyperv;
|
||||
}
|
||||
|
||||
static inline struct kvm_vcpu_hv_synic *vcpu_to_synic(struct kvm_vcpu *vcpu)
|
||||
static inline struct kvm_vcpu_hv_synic *to_hv_synic(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return &vcpu->arch.hyperv.synic;
|
||||
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
|
||||
|
||||
return &hv_vcpu->synic;
|
||||
}
|
||||
|
||||
static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic)
|
||||
static inline struct kvm_vcpu *hv_synic_to_vcpu(struct kvm_vcpu_hv_synic *synic)
|
||||
{
|
||||
return hv_vcpu_to_vcpu(container_of(synic, struct kvm_vcpu_hv, synic));
|
||||
struct kvm_vcpu_hv *hv_vcpu = container_of(synic, struct kvm_vcpu_hv, synic);
|
||||
|
||||
return hv_vcpu->vcpu;
|
||||
}
|
||||
|
||||
static inline struct kvm_hv_syndbg *vcpu_to_hv_syndbg(struct kvm_vcpu *vcpu)
|
||||
static inline struct kvm_hv_syndbg *to_hv_syndbg(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return &vcpu->kvm->arch.hyperv.hv_syndbg;
|
||||
}
|
||||
|
||||
static inline u32 kvm_hv_get_vpindex(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
|
||||
|
||||
return hv_vcpu ? hv_vcpu->vp_index : kvm_vcpu_get_idx(vcpu);
|
||||
}
|
||||
|
||||
int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host);
|
||||
int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host);
|
||||
|
||||
bool kvm_hv_hypercall_enabled(struct kvm *kvm);
|
||||
bool kvm_hv_hypercall_enabled(struct kvm_vcpu *vcpu);
|
||||
int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
|
||||
|
||||
void kvm_hv_irq_routing_update(struct kvm *kvm);
|
||||
@ -89,32 +97,35 @@ int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint);
|
||||
void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector);
|
||||
int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages);
|
||||
|
||||
void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
|
||||
void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu);
|
||||
void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
|
||||
|
||||
bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu);
|
||||
bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu,
|
||||
struct hv_vp_assist_page *assist_page);
|
||||
|
||||
static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu,
|
||||
int timer_index)
|
||||
static inline struct kvm_vcpu_hv_stimer *to_hv_stimer(struct kvm_vcpu *vcpu,
|
||||
int timer_index)
|
||||
{
|
||||
return &vcpu_to_hv_vcpu(vcpu)->stimer[timer_index];
|
||||
return &to_hv_vcpu(vcpu)->stimer[timer_index];
|
||||
}
|
||||
|
||||
static inline struct kvm_vcpu *stimer_to_vcpu(struct kvm_vcpu_hv_stimer *stimer)
|
||||
static inline struct kvm_vcpu *hv_stimer_to_vcpu(struct kvm_vcpu_hv_stimer *stimer)
|
||||
{
|
||||
struct kvm_vcpu_hv *hv_vcpu;
|
||||
|
||||
hv_vcpu = container_of(stimer - stimer->index, struct kvm_vcpu_hv,
|
||||
stimer[0]);
|
||||
return hv_vcpu_to_vcpu(hv_vcpu);
|
||||
return hv_vcpu->vcpu;
|
||||
}
|
||||
|
||||
static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return !bitmap_empty(vcpu->arch.hyperv.stimer_pending_bitmap,
|
||||
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
|
||||
|
||||
if (!hv_vcpu)
|
||||
return false;
|
||||
|
||||
return !bitmap_empty(hv_vcpu->stimer_pending_bitmap,
|
||||
HV_SYNIC_STIMER_COUNT);
|
||||
}
|
||||
|
||||
@ -125,6 +136,7 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
|
||||
|
||||
void kvm_hv_init_vm(struct kvm *kvm);
|
||||
void kvm_hv_destroy_vm(struct kvm *kvm);
|
||||
void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu);
|
||||
int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args);
|
||||
int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
|
||||
struct kvm_cpuid_entry2 __user *entries);
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "irq.h"
|
||||
#include "i8254.h"
|
||||
#include "x86.h"
|
||||
#include "xen.h"
|
||||
|
||||
/*
|
||||
* check if there are pending timer events
|
||||
@ -56,6 +57,9 @@ int kvm_cpu_has_extint(struct kvm_vcpu *v)
|
||||
if (!lapic_in_kernel(v))
|
||||
return v->arch.interrupt.injected;
|
||||
|
||||
if (kvm_xen_has_interrupt(v))
|
||||
return 1;
|
||||
|
||||
if (!kvm_apic_accept_pic_intr(v))
|
||||
return 0;
|
||||
|
||||
@ -110,6 +114,9 @@ static int kvm_cpu_get_extint(struct kvm_vcpu *v)
|
||||
if (!lapic_in_kernel(v))
|
||||
return v->arch.interrupt.nr;
|
||||
|
||||
if (kvm_xen_has_interrupt(v))
|
||||
return v->kvm->arch.xen.upcall_vector;
|
||||
|
||||
if (irqchip_split(v->kvm)) {
|
||||
int vector = v->arch.pending_external_vector;
|
||||
|
||||
@ -143,8 +150,7 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
__kvm_migrate_apic_timer(vcpu);
|
||||
__kvm_migrate_pit_timer(vcpu);
|
||||
if (kvm_x86_ops.migrate_timers)
|
||||
kvm_x86_ops.migrate_timers(vcpu);
|
||||
static_call_cond(kvm_x86_migrate_timers)(vcpu);
|
||||
}
|
||||
|
||||
bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args)
|
||||
|
@ -68,7 +68,7 @@ static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, int reg)
|
||||
return 0;
|
||||
|
||||
if (!kvm_register_is_available(vcpu, reg))
|
||||
kvm_x86_ops.cache_reg(vcpu, reg);
|
||||
static_call(kvm_x86_cache_reg)(vcpu, reg);
|
||||
|
||||
return vcpu->arch.regs[reg];
|
||||
}
|
||||
@ -108,7 +108,7 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
|
||||
might_sleep(); /* on svm */
|
||||
|
||||
if (!kvm_register_is_available(vcpu, VCPU_EXREG_PDPTR))
|
||||
kvm_x86_ops.cache_reg(vcpu, VCPU_EXREG_PDPTR);
|
||||
static_call(kvm_x86_cache_reg)(vcpu, VCPU_EXREG_PDPTR);
|
||||
|
||||
return vcpu->arch.walk_mmu->pdptrs[index];
|
||||
}
|
||||
@ -118,7 +118,7 @@ static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
|
||||
ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS;
|
||||
if ((tmask & vcpu->arch.cr0_guest_owned_bits) &&
|
||||
!kvm_register_is_available(vcpu, VCPU_EXREG_CR0))
|
||||
kvm_x86_ops.cache_reg(vcpu, VCPU_EXREG_CR0);
|
||||
static_call(kvm_x86_cache_reg)(vcpu, VCPU_EXREG_CR0);
|
||||
return vcpu->arch.cr0 & mask;
|
||||
}
|
||||
|
||||
@ -132,14 +132,14 @@ static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask)
|
||||
ulong tmask = mask & KVM_POSSIBLE_CR4_GUEST_BITS;
|
||||
if ((tmask & vcpu->arch.cr4_guest_owned_bits) &&
|
||||
!kvm_register_is_available(vcpu, VCPU_EXREG_CR4))
|
||||
kvm_x86_ops.cache_reg(vcpu, VCPU_EXREG_CR4);
|
||||
static_call(kvm_x86_cache_reg)(vcpu, VCPU_EXREG_CR4);
|
||||
return vcpu->arch.cr4 & mask;
|
||||
}
|
||||
|
||||
static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
|
||||
kvm_x86_ops.cache_reg(vcpu, VCPU_EXREG_CR3);
|
||||
static_call(kvm_x86_cache_reg)(vcpu, VCPU_EXREG_CR3);
|
||||
return vcpu->arch.cr3;
|
||||
}
|
||||
|
||||
|
@ -205,7 +205,7 @@ struct x86_emulate_ops {
|
||||
ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr);
|
||||
int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val);
|
||||
int (*cpl)(struct x86_emulate_ctxt *ctxt);
|
||||
int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
|
||||
void (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
|
||||
int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
|
||||
u64 (*get_smbase)(struct x86_emulate_ctxt *ctxt);
|
||||
void (*set_smbase)(struct x86_emulate_ctxt *ctxt, u64 smbase);
|
||||
|
@ -91,8 +91,8 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
|
||||
return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
|
||||
}
|
||||
|
||||
struct static_key_deferred apic_hw_disabled __read_mostly;
|
||||
struct static_key_deferred apic_sw_disabled __read_mostly;
|
||||
__read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_hw_disabled, HZ);
|
||||
__read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_sw_disabled, HZ);
|
||||
|
||||
static inline int apic_enabled(struct kvm_lapic *apic)
|
||||
{
|
||||
@ -290,9 +290,9 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
|
||||
if (enabled != apic->sw_enabled) {
|
||||
apic->sw_enabled = enabled;
|
||||
if (enabled)
|
||||
static_key_slow_dec_deferred(&apic_sw_disabled);
|
||||
static_branch_slow_dec_deferred(&apic_sw_disabled);
|
||||
else
|
||||
static_key_slow_inc(&apic_sw_disabled.key);
|
||||
static_branch_inc(&apic_sw_disabled.key);
|
||||
|
||||
atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
|
||||
}
|
||||
@ -484,7 +484,7 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
|
||||
if (unlikely(vcpu->arch.apicv_active)) {
|
||||
/* need to update RVI */
|
||||
kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
|
||||
kvm_x86_ops.hwapic_irr_update(vcpu,
|
||||
static_call(kvm_x86_hwapic_irr_update)(vcpu,
|
||||
apic_find_highest_irr(apic));
|
||||
} else {
|
||||
apic->irr_pending = false;
|
||||
@ -515,7 +515,7 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
|
||||
* just set SVI.
|
||||
*/
|
||||
if (unlikely(vcpu->arch.apicv_active))
|
||||
kvm_x86_ops.hwapic_isr_update(vcpu, vec);
|
||||
static_call(kvm_x86_hwapic_isr_update)(vcpu, vec);
|
||||
else {
|
||||
++apic->isr_count;
|
||||
BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
|
||||
@ -563,8 +563,8 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
|
||||
* and must be left alone.
|
||||
*/
|
||||
if (unlikely(vcpu->arch.apicv_active))
|
||||
kvm_x86_ops.hwapic_isr_update(vcpu,
|
||||
apic_find_highest_isr(apic));
|
||||
static_call(kvm_x86_hwapic_isr_update)(vcpu,
|
||||
apic_find_highest_isr(apic));
|
||||
else {
|
||||
--apic->isr_count;
|
||||
BUG_ON(apic->isr_count < 0);
|
||||
@ -701,7 +701,7 @@ static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
|
||||
{
|
||||
int highest_irr;
|
||||
if (apic->vcpu->arch.apicv_active)
|
||||
highest_irr = kvm_x86_ops.sync_pir_to_irr(apic->vcpu);
|
||||
highest_irr = static_call(kvm_x86_sync_pir_to_irr)(apic->vcpu);
|
||||
else
|
||||
highest_irr = apic_find_highest_irr(apic);
|
||||
if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
|
||||
@ -1090,7 +1090,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
|
||||
apic->regs + APIC_TMR);
|
||||
}
|
||||
|
||||
if (kvm_x86_ops.deliver_posted_interrupt(vcpu, vector)) {
|
||||
if (static_call(kvm_x86_deliver_posted_interrupt)(vcpu, vector)) {
|
||||
kvm_lapic_set_irr(vector, apic);
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
@ -1245,7 +1245,8 @@ static int apic_set_eoi(struct kvm_lapic *apic)
|
||||
apic_clear_isr(vector, apic);
|
||||
apic_update_ppr(apic);
|
||||
|
||||
if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap))
|
||||
if (to_hv_vcpu(apic->vcpu) &&
|
||||
test_bit(vector, to_hv_synic(apic->vcpu)->vec_bitmap))
|
||||
kvm_hv_synic_send_eoi(apic->vcpu, vector);
|
||||
|
||||
kvm_ioapic_send_eoi(apic, vector);
|
||||
@ -1814,7 +1815,7 @@ static void cancel_hv_timer(struct kvm_lapic *apic)
|
||||
{
|
||||
WARN_ON(preemptible());
|
||||
WARN_ON(!apic->lapic_timer.hv_timer_in_use);
|
||||
kvm_x86_ops.cancel_hv_timer(apic->vcpu);
|
||||
static_call(kvm_x86_cancel_hv_timer)(apic->vcpu);
|
||||
apic->lapic_timer.hv_timer_in_use = false;
|
||||
}
|
||||
|
||||
@ -1831,7 +1832,7 @@ static bool start_hv_timer(struct kvm_lapic *apic)
|
||||
if (!ktimer->tscdeadline)
|
||||
return false;
|
||||
|
||||
if (kvm_x86_ops.set_hv_timer(vcpu, ktimer->tscdeadline, &expired))
|
||||
if (static_call(kvm_x86_set_hv_timer)(vcpu, ktimer->tscdeadline, &expired))
|
||||
return false;
|
||||
|
||||
ktimer->hv_timer_in_use = true;
|
||||
@ -2175,10 +2176,10 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
|
||||
hrtimer_cancel(&apic->lapic_timer.timer);
|
||||
|
||||
if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
|
||||
static_key_slow_dec_deferred(&apic_hw_disabled);
|
||||
static_branch_slow_dec_deferred(&apic_hw_disabled);
|
||||
|
||||
if (!apic->sw_enabled)
|
||||
static_key_slow_dec_deferred(&apic_sw_disabled);
|
||||
static_branch_slow_dec_deferred(&apic_sw_disabled);
|
||||
|
||||
if (apic->regs)
|
||||
free_page((unsigned long)apic->regs);
|
||||
@ -2250,9 +2251,9 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
|
||||
if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
|
||||
if (value & MSR_IA32_APICBASE_ENABLE) {
|
||||
kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
|
||||
static_key_slow_dec_deferred(&apic_hw_disabled);
|
||||
static_branch_slow_dec_deferred(&apic_hw_disabled);
|
||||
} else {
|
||||
static_key_slow_inc(&apic_hw_disabled.key);
|
||||
static_branch_inc(&apic_hw_disabled.key);
|
||||
atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
|
||||
}
|
||||
}
|
||||
@ -2261,7 +2262,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
|
||||
kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
|
||||
|
||||
if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE))
|
||||
kvm_x86_ops.set_virtual_apic_mode(vcpu);
|
||||
static_call(kvm_x86_set_virtual_apic_mode)(vcpu);
|
||||
|
||||
apic->base_address = apic->vcpu->arch.apic_base &
|
||||
MSR_IA32_APICBASE_BASE;
|
||||
@ -2338,9 +2339,9 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||
vcpu->arch.pv_eoi.msr_val = 0;
|
||||
apic_update_ppr(apic);
|
||||
if (vcpu->arch.apicv_active) {
|
||||
kvm_x86_ops.apicv_post_state_restore(vcpu);
|
||||
kvm_x86_ops.hwapic_irr_update(vcpu, -1);
|
||||
kvm_x86_ops.hwapic_isr_update(vcpu, -1);
|
||||
static_call(kvm_x86_apicv_post_state_restore)(vcpu);
|
||||
static_call(kvm_x86_hwapic_irr_update)(vcpu, -1);
|
||||
static_call(kvm_x86_hwapic_isr_update)(vcpu, -1);
|
||||
}
|
||||
|
||||
vcpu->arch.apic_arb_prio = 0;
|
||||
@ -2449,7 +2450,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
|
||||
* thinking that APIC state has changed.
|
||||
*/
|
||||
vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
|
||||
static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
|
||||
static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */
|
||||
kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
|
||||
|
||||
return 0;
|
||||
@ -2512,7 +2513,7 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
|
||||
apic_clear_irr(vector, apic);
|
||||
if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) {
|
||||
if (to_hv_vcpu(vcpu) && test_bit(vector, to_hv_synic(vcpu)->auto_eoi_bitmap)) {
|
||||
/*
|
||||
* For auto-EOI interrupts, there might be another pending
|
||||
* interrupt above PPR, so check whether to raise another
|
||||
@ -2601,10 +2602,10 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
|
||||
kvm_apic_update_apicv(vcpu);
|
||||
apic->highest_isr_cache = -1;
|
||||
if (vcpu->arch.apicv_active) {
|
||||
kvm_x86_ops.apicv_post_state_restore(vcpu);
|
||||
kvm_x86_ops.hwapic_irr_update(vcpu,
|
||||
static_call(kvm_x86_apicv_post_state_restore)(vcpu);
|
||||
static_call(kvm_x86_hwapic_irr_update)(vcpu,
|
||||
apic_find_highest_irr(apic));
|
||||
kvm_x86_ops.hwapic_isr_update(vcpu,
|
||||
static_call(kvm_x86_hwapic_isr_update)(vcpu,
|
||||
apic_find_highest_isr(apic));
|
||||
}
|
||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
@ -2904,13 +2905,6 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_lapic_init(void)
|
||||
{
|
||||
/* do not patch jump label more than once per second */
|
||||
jump_label_rate_limit(&apic_hw_disabled, HZ);
|
||||
jump_label_rate_limit(&apic_sw_disabled, HZ);
|
||||
}
|
||||
|
||||
void kvm_lapic_exit(void)
|
||||
{
|
||||
static_key_deferred_flush(&apic_hw_disabled);
|
||||
|
@ -6,6 +6,8 @@
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
#include "hyperv.h"
|
||||
|
||||
#define KVM_APIC_INIT 0
|
||||
#define KVM_APIC_SIPI 1
|
||||
#define KVM_APIC_LVT_NUM 6
|
||||
@ -125,13 +127,7 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
|
||||
int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
|
||||
int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
|
||||
|
||||
static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
|
||||
}
|
||||
|
||||
int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len);
|
||||
void kvm_lapic_init(void);
|
||||
void kvm_lapic_exit(void);
|
||||
|
||||
#define VEC_POS(v) ((v) & (32 - 1))
|
||||
@ -172,29 +168,29 @@ static inline void kvm_lapic_set_reg(struct kvm_lapic *apic, int reg_off, u32 va
|
||||
__kvm_lapic_set_reg(apic->regs, reg_off, val);
|
||||
}
|
||||
|
||||
extern struct static_key kvm_no_apic_vcpu;
|
||||
DECLARE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
|
||||
|
||||
static inline bool lapic_in_kernel(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (static_key_false(&kvm_no_apic_vcpu))
|
||||
if (static_branch_unlikely(&kvm_has_noapic_vcpu))
|
||||
return vcpu->arch.apic;
|
||||
return true;
|
||||
}
|
||||
|
||||
extern struct static_key_deferred apic_hw_disabled;
|
||||
extern struct static_key_false_deferred apic_hw_disabled;
|
||||
|
||||
static inline int kvm_apic_hw_enabled(struct kvm_lapic *apic)
|
||||
{
|
||||
if (static_key_false(&apic_hw_disabled.key))
|
||||
if (static_branch_unlikely(&apic_hw_disabled.key))
|
||||
return apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
|
||||
return MSR_IA32_APICBASE_ENABLE;
|
||||
}
|
||||
|
||||
extern struct static_key_deferred apic_sw_disabled;
|
||||
extern struct static_key_false_deferred apic_sw_disabled;
|
||||
|
||||
static inline bool kvm_apic_sw_enabled(struct kvm_lapic *apic)
|
||||
{
|
||||
if (static_key_false(&apic_sw_disabled.key))
|
||||
if (static_branch_unlikely(&apic_sw_disabled.key))
|
||||
return apic->sw_enabled;
|
||||
return true;
|
||||
}
|
||||
|
@ -102,7 +102,7 @@ static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu)
|
||||
if (!VALID_PAGE(root_hpa))
|
||||
return;
|
||||
|
||||
kvm_x86_ops.load_mmu_pgd(vcpu, root_hpa | kvm_get_active_pcid(vcpu),
|
||||
static_call(kvm_x86_load_mmu_pgd)(vcpu, root_hpa | kvm_get_active_pcid(vcpu),
|
||||
vcpu->arch.mmu->shadow_root_level);
|
||||
}
|
||||
|
||||
@ -152,7 +152,7 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
|
||||
*
|
||||
* TODO: introduce APIs to split these two cases.
|
||||
*/
|
||||
static inline int is_writable_pte(unsigned long pte)
|
||||
static inline bool is_writable_pte(unsigned long pte)
|
||||
{
|
||||
return pte & PT_WRITABLE_MASK;
|
||||
}
|
||||
@ -174,8 +174,8 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
||||
unsigned pte_access, unsigned pte_pkey,
|
||||
unsigned pfec)
|
||||
{
|
||||
int cpl = kvm_x86_ops.get_cpl(vcpu);
|
||||
unsigned long rflags = kvm_x86_ops.get_rflags(vcpu);
|
||||
int cpl = static_call(kvm_x86_get_cpl)(vcpu);
|
||||
unsigned long rflags = static_call(kvm_x86_get_rflags)(vcpu);
|
||||
|
||||
/*
|
||||
* If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1.
|
||||
|
@ -190,7 +190,7 @@ static void kvm_flush_remote_tlbs_with_range(struct kvm *kvm,
|
||||
int ret = -ENOTSUPP;
|
||||
|
||||
if (range && kvm_x86_ops.tlb_remote_flush_with_range)
|
||||
ret = kvm_x86_ops.tlb_remote_flush_with_range(kvm, range);
|
||||
ret = static_call(kvm_x86_tlb_remote_flush_with_range)(kvm, range);
|
||||
|
||||
if (ret)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
@ -844,17 +844,17 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
|
||||
int i, count = 0;
|
||||
|
||||
if (!rmap_head->val) {
|
||||
rmap_printk("pte_list_add: %p %llx 0->1\n", spte, *spte);
|
||||
rmap_printk("%p %llx 0->1\n", spte, *spte);
|
||||
rmap_head->val = (unsigned long)spte;
|
||||
} else if (!(rmap_head->val & 1)) {
|
||||
rmap_printk("pte_list_add: %p %llx 1->many\n", spte, *spte);
|
||||
rmap_printk("%p %llx 1->many\n", spte, *spte);
|
||||
desc = mmu_alloc_pte_list_desc(vcpu);
|
||||
desc->sptes[0] = (u64 *)rmap_head->val;
|
||||
desc->sptes[1] = spte;
|
||||
rmap_head->val = (unsigned long)desc | 1;
|
||||
++count;
|
||||
} else {
|
||||
rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte);
|
||||
rmap_printk("%p %llx many->many\n", spte, *spte);
|
||||
desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
|
||||
while (desc->sptes[PTE_LIST_EXT-1]) {
|
||||
count += PTE_LIST_EXT;
|
||||
@ -906,14 +906,14 @@ static void __pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
|
||||
pr_err("%s: %p 0->BUG\n", __func__, spte);
|
||||
BUG();
|
||||
} else if (!(rmap_head->val & 1)) {
|
||||
rmap_printk("%s: %p 1->0\n", __func__, spte);
|
||||
rmap_printk("%p 1->0\n", spte);
|
||||
if ((u64 *)rmap_head->val != spte) {
|
||||
pr_err("%s: %p 1->BUG\n", __func__, spte);
|
||||
BUG();
|
||||
}
|
||||
rmap_head->val = 0;
|
||||
} else {
|
||||
rmap_printk("%s: %p many->many\n", __func__, spte);
|
||||
rmap_printk("%p many->many\n", spte);
|
||||
desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
|
||||
prev_desc = NULL;
|
||||
while (desc) {
|
||||
@ -1115,7 +1115,7 @@ static bool spte_write_protect(u64 *sptep, bool pt_protect)
|
||||
!(pt_protect && spte_can_locklessly_be_made_writable(spte)))
|
||||
return false;
|
||||
|
||||
rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep);
|
||||
rmap_printk("spte %p %llx\n", sptep, *sptep);
|
||||
|
||||
if (pt_protect)
|
||||
spte &= ~SPTE_MMU_WRITEABLE;
|
||||
@ -1142,7 +1142,7 @@ static bool spte_clear_dirty(u64 *sptep)
|
||||
{
|
||||
u64 spte = *sptep;
|
||||
|
||||
rmap_printk("rmap_clear_dirty: spte %p %llx\n", sptep, *sptep);
|
||||
rmap_printk("spte %p %llx\n", sptep, *sptep);
|
||||
|
||||
MMU_WARN_ON(!spte_ad_enabled(spte));
|
||||
spte &= ~shadow_dirty_mask;
|
||||
@ -1184,7 +1184,7 @@ static bool spte_set_dirty(u64 *sptep)
|
||||
{
|
||||
u64 spte = *sptep;
|
||||
|
||||
rmap_printk("rmap_set_dirty: spte %p %llx\n", sptep, *sptep);
|
||||
rmap_printk("spte %p %llx\n", sptep, *sptep);
|
||||
|
||||
/*
|
||||
* Similar to the !kvm_x86_ops.slot_disable_log_dirty case,
|
||||
@ -1225,7 +1225,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
|
||||
{
|
||||
struct kvm_rmap_head *rmap_head;
|
||||
|
||||
if (kvm->arch.tdp_mmu_enabled)
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
kvm_tdp_mmu_clear_dirty_pt_masked(kvm, slot,
|
||||
slot->base_gfn + gfn_offset, mask, true);
|
||||
while (mask) {
|
||||
@ -1254,7 +1254,7 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
|
||||
{
|
||||
struct kvm_rmap_head *rmap_head;
|
||||
|
||||
if (kvm->arch.tdp_mmu_enabled)
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
kvm_tdp_mmu_clear_dirty_pt_masked(kvm, slot,
|
||||
slot->base_gfn + gfn_offset, mask, false);
|
||||
while (mask) {
|
||||
@ -1283,8 +1283,9 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
|
||||
gfn_t gfn_offset, unsigned long mask)
|
||||
{
|
||||
if (kvm_x86_ops.enable_log_dirty_pt_masked)
|
||||
kvm_x86_ops.enable_log_dirty_pt_masked(kvm, slot, gfn_offset,
|
||||
mask);
|
||||
static_call(kvm_x86_enable_log_dirty_pt_masked)(kvm, slot,
|
||||
gfn_offset,
|
||||
mask);
|
||||
else
|
||||
kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
|
||||
}
|
||||
@ -1292,7 +1293,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
|
||||
int kvm_cpu_dirty_log_size(void)
|
||||
{
|
||||
if (kvm_x86_ops.cpu_dirty_log_size)
|
||||
return kvm_x86_ops.cpu_dirty_log_size();
|
||||
return static_call(kvm_x86_cpu_dirty_log_size)();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1309,7 +1310,7 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
|
||||
write_protected |= __rmap_write_protect(kvm, rmap_head, true);
|
||||
}
|
||||
|
||||
if (kvm->arch.tdp_mmu_enabled)
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
write_protected |=
|
||||
kvm_tdp_mmu_write_protect_gfn(kvm, slot, gfn);
|
||||
|
||||
@ -1331,7 +1332,7 @@ static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
|
||||
bool flush = false;
|
||||
|
||||
while ((sptep = rmap_get_first(rmap_head, &iter))) {
|
||||
rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep);
|
||||
rmap_printk("spte %p %llx.\n", sptep, *sptep);
|
||||
|
||||
pte_list_remove(rmap_head, sptep);
|
||||
flush = true;
|
||||
@ -1363,7 +1364,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
|
||||
|
||||
restart:
|
||||
for_each_rmap_spte(rmap_head, &iter, sptep) {
|
||||
rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n",
|
||||
rmap_printk("spte %p %llx gfn %llx (%d)\n",
|
||||
sptep, *sptep, gfn, level);
|
||||
|
||||
need_flush = 1;
|
||||
@ -1456,16 +1457,17 @@ static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator)
|
||||
slot_rmap_walk_okay(_iter_); \
|
||||
slot_rmap_walk_next(_iter_))
|
||||
|
||||
static int kvm_handle_hva_range(struct kvm *kvm,
|
||||
unsigned long start,
|
||||
unsigned long end,
|
||||
unsigned long data,
|
||||
int (*handler)(struct kvm *kvm,
|
||||
struct kvm_rmap_head *rmap_head,
|
||||
struct kvm_memory_slot *slot,
|
||||
gfn_t gfn,
|
||||
int level,
|
||||
unsigned long data))
|
||||
static __always_inline int
|
||||
kvm_handle_hva_range(struct kvm *kvm,
|
||||
unsigned long start,
|
||||
unsigned long end,
|
||||
unsigned long data,
|
||||
int (*handler)(struct kvm *kvm,
|
||||
struct kvm_rmap_head *rmap_head,
|
||||
struct kvm_memory_slot *slot,
|
||||
gfn_t gfn,
|
||||
int level,
|
||||
unsigned long data))
|
||||
{
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
@ -1521,7 +1523,7 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
|
||||
|
||||
r = kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
|
||||
|
||||
if (kvm->arch.tdp_mmu_enabled)
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
r |= kvm_tdp_mmu_zap_hva_range(kvm, start, end);
|
||||
|
||||
return r;
|
||||
@ -1533,7 +1535,7 @@ int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
|
||||
|
||||
r = kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
|
||||
|
||||
if (kvm->arch.tdp_mmu_enabled)
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
r |= kvm_tdp_mmu_set_spte_hva(kvm, hva, &pte);
|
||||
|
||||
return r;
|
||||
@ -1588,7 +1590,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||
int young = false;
|
||||
|
||||
young = kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
|
||||
if (kvm->arch.tdp_mmu_enabled)
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
young |= kvm_tdp_mmu_age_hva_range(kvm, start, end);
|
||||
|
||||
return young;
|
||||
@ -1599,7 +1601,7 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
|
||||
int young = false;
|
||||
|
||||
young = kvm_handle_hva(kvm, hva, 0, kvm_test_age_rmapp);
|
||||
if (kvm->arch.tdp_mmu_enabled)
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
young |= kvm_tdp_mmu_test_age_hva(kvm, hva);
|
||||
|
||||
return young;
|
||||
@ -1723,13 +1725,6 @@ static int nonpaging_sync_page(struct kvm_vcpu *vcpu,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void nonpaging_update_pte(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu_page *sp, u64 *spte,
|
||||
const void *pte)
|
||||
{
|
||||
WARN_ON(1);
|
||||
}
|
||||
|
||||
#define KVM_PAGE_ARRAY_NR 16
|
||||
|
||||
struct kvm_mmu_pages {
|
||||
@ -2016,9 +2011,9 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
|
||||
flush |= kvm_sync_page(vcpu, sp, &invalid_list);
|
||||
mmu_pages_clear_parents(&parents);
|
||||
}
|
||||
if (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)) {
|
||||
if (need_resched() || rwlock_needbreak(&vcpu->kvm->mmu_lock)) {
|
||||
kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
|
||||
cond_resched_lock(&vcpu->kvm->mmu_lock);
|
||||
cond_resched_rwlock_write(&vcpu->kvm->mmu_lock);
|
||||
flush = false;
|
||||
}
|
||||
}
|
||||
@ -2417,7 +2412,7 @@ static unsigned long kvm_mmu_zap_oldest_mmu_pages(struct kvm *kvm,
|
||||
return 0;
|
||||
|
||||
restart:
|
||||
list_for_each_entry_safe(sp, tmp, &kvm->arch.active_mmu_pages, link) {
|
||||
list_for_each_entry_safe_reverse(sp, tmp, &kvm->arch.active_mmu_pages, link) {
|
||||
/*
|
||||
* Don't zap active root pages, the page itself can't be freed
|
||||
* and zapping it will just force vCPUs to realloc and reload.
|
||||
@ -2470,7 +2465,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages)
|
||||
{
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
write_lock(&kvm->mmu_lock);
|
||||
|
||||
if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) {
|
||||
kvm_mmu_zap_oldest_mmu_pages(kvm, kvm->arch.n_used_mmu_pages -
|
||||
@ -2481,7 +2476,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages)
|
||||
|
||||
kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages;
|
||||
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
|
||||
@ -2492,7 +2487,7 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
|
||||
|
||||
pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
|
||||
r = 0;
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
write_lock(&kvm->mmu_lock);
|
||||
for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
|
||||
pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
|
||||
sp->role.word);
|
||||
@ -2500,7 +2495,7 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
|
||||
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
|
||||
}
|
||||
kvm_mmu_commit_zap_page(kvm, &invalid_list);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
|
||||
return r;
|
||||
}
|
||||
@ -3161,7 +3156,7 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
|
||||
sp = to_shadow_page(*root_hpa & PT64_BASE_ADDR_MASK);
|
||||
|
||||
if (kvm_mmu_put_root(kvm, sp)) {
|
||||
if (sp->tdp_mmu_page)
|
||||
if (is_tdp_mmu_page(sp))
|
||||
kvm_tdp_mmu_free_root(kvm, sp);
|
||||
else if (sp->role.invalid)
|
||||
kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
|
||||
@ -3192,7 +3187,7 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
write_lock(&kvm->mmu_lock);
|
||||
|
||||
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
|
||||
if (roots_to_free & KVM_MMU_ROOT_PREVIOUS(i))
|
||||
@ -3215,7 +3210,7 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
||||
}
|
||||
|
||||
kvm_mmu_commit_zap_page(kvm, &invalid_list);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_free_roots);
|
||||
|
||||
@ -3236,16 +3231,16 @@ static hpa_t mmu_alloc_root(struct kvm_vcpu *vcpu, gfn_t gfn, gva_t gva,
|
||||
{
|
||||
struct kvm_mmu_page *sp;
|
||||
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
write_lock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
if (make_mmu_pages_available(vcpu)) {
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
write_unlock(&vcpu->kvm->mmu_lock);
|
||||
return INVALID_PAGE;
|
||||
}
|
||||
sp = kvm_mmu_get_page(vcpu, gfn, gva, level, direct, ACC_ALL);
|
||||
++sp->root_count;
|
||||
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
write_unlock(&vcpu->kvm->mmu_lock);
|
||||
return __pa(sp->spt);
|
||||
}
|
||||
|
||||
@ -3255,7 +3250,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
|
||||
hpa_t root;
|
||||
unsigned i;
|
||||
|
||||
if (vcpu->kvm->arch.tdp_mmu_enabled) {
|
||||
if (is_tdp_mmu_enabled(vcpu->kvm)) {
|
||||
root = kvm_tdp_mmu_get_vcpu_root_hpa(vcpu);
|
||||
|
||||
if (!VALID_PAGE(root))
|
||||
@ -3416,17 +3411,17 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
|
||||
!smp_load_acquire(&sp->unsync_children))
|
||||
return;
|
||||
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
write_lock(&vcpu->kvm->mmu_lock);
|
||||
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
|
||||
|
||||
mmu_sync_children(vcpu, sp);
|
||||
|
||||
kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
write_unlock(&vcpu->kvm->mmu_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
write_lock(&vcpu->kvm->mmu_lock);
|
||||
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
|
||||
|
||||
for (i = 0; i < 4; ++i) {
|
||||
@ -3440,7 +3435,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
write_unlock(&vcpu->kvm->mmu_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots);
|
||||
|
||||
@ -3724,7 +3719,12 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
|
||||
return r;
|
||||
|
||||
r = RET_PF_RETRY;
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))
|
||||
read_lock(&vcpu->kvm->mmu_lock);
|
||||
else
|
||||
write_lock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
|
||||
goto out_unlock;
|
||||
r = make_mmu_pages_available(vcpu);
|
||||
@ -3739,7 +3739,10 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
|
||||
prefault, is_tdp);
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))
|
||||
read_unlock(&vcpu->kvm->mmu_lock);
|
||||
else
|
||||
write_unlock(&vcpu->kvm->mmu_lock);
|
||||
kvm_release_pfn_clean(pfn);
|
||||
return r;
|
||||
}
|
||||
@ -3813,7 +3816,6 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
|
||||
context->gva_to_gpa = nonpaging_gva_to_gpa;
|
||||
context->sync_page = nonpaging_sync_page;
|
||||
context->invlpg = NULL;
|
||||
context->update_pte = nonpaging_update_pte;
|
||||
context->root_level = 0;
|
||||
context->shadow_root_level = PT32E_ROOT_LEVEL;
|
||||
context->direct_map = true;
|
||||
@ -3984,20 +3986,27 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu,
|
||||
static void
|
||||
__reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
|
||||
struct rsvd_bits_validate *rsvd_check,
|
||||
int maxphyaddr, int level, bool nx, bool gbpages,
|
||||
u64 pa_bits_rsvd, int level, bool nx, bool gbpages,
|
||||
bool pse, bool amd)
|
||||
{
|
||||
u64 exb_bit_rsvd = 0;
|
||||
u64 gbpages_bit_rsvd = 0;
|
||||
u64 nonleaf_bit8_rsvd = 0;
|
||||
u64 high_bits_rsvd;
|
||||
|
||||
rsvd_check->bad_mt_xwr = 0;
|
||||
|
||||
if (!nx)
|
||||
exb_bit_rsvd = rsvd_bits(63, 63);
|
||||
if (!gbpages)
|
||||
gbpages_bit_rsvd = rsvd_bits(7, 7);
|
||||
|
||||
if (level == PT32E_ROOT_LEVEL)
|
||||
high_bits_rsvd = pa_bits_rsvd & rsvd_bits(0, 62);
|
||||
else
|
||||
high_bits_rsvd = pa_bits_rsvd & rsvd_bits(0, 51);
|
||||
|
||||
/* Note, NX doesn't exist in PDPTEs, this is handled below. */
|
||||
if (!nx)
|
||||
high_bits_rsvd |= rsvd_bits(63, 63);
|
||||
|
||||
/*
|
||||
* Non-leaf PML4Es and PDPEs reserve bit 8 (which would be the G bit for
|
||||
* leaf entries) on AMD CPUs only.
|
||||
@ -4026,45 +4035,39 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
|
||||
rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
|
||||
break;
|
||||
case PT32E_ROOT_LEVEL:
|
||||
rsvd_check->rsvd_bits_mask[0][2] =
|
||||
rsvd_bits(maxphyaddr, 63) |
|
||||
rsvd_bits(5, 8) | rsvd_bits(1, 2); /* PDPTE */
|
||||
rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
|
||||
rsvd_bits(maxphyaddr, 62); /* PDE */
|
||||
rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd |
|
||||
rsvd_bits(maxphyaddr, 62); /* PTE */
|
||||
rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd |
|
||||
rsvd_bits(maxphyaddr, 62) |
|
||||
rsvd_bits(13, 20); /* large page */
|
||||
rsvd_check->rsvd_bits_mask[0][2] = rsvd_bits(63, 63) |
|
||||
high_bits_rsvd |
|
||||
rsvd_bits(5, 8) |
|
||||
rsvd_bits(1, 2); /* PDPTE */
|
||||
rsvd_check->rsvd_bits_mask[0][1] = high_bits_rsvd; /* PDE */
|
||||
rsvd_check->rsvd_bits_mask[0][0] = high_bits_rsvd; /* PTE */
|
||||
rsvd_check->rsvd_bits_mask[1][1] = high_bits_rsvd |
|
||||
rsvd_bits(13, 20); /* large page */
|
||||
rsvd_check->rsvd_bits_mask[1][0] =
|
||||
rsvd_check->rsvd_bits_mask[0][0];
|
||||
break;
|
||||
case PT64_ROOT_5LEVEL:
|
||||
rsvd_check->rsvd_bits_mask[0][4] = exb_bit_rsvd |
|
||||
nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
|
||||
rsvd_bits(maxphyaddr, 51);
|
||||
rsvd_check->rsvd_bits_mask[0][4] = high_bits_rsvd |
|
||||
nonleaf_bit8_rsvd |
|
||||
rsvd_bits(7, 7);
|
||||
rsvd_check->rsvd_bits_mask[1][4] =
|
||||
rsvd_check->rsvd_bits_mask[0][4];
|
||||
fallthrough;
|
||||
case PT64_ROOT_4LEVEL:
|
||||
rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd |
|
||||
nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
|
||||
rsvd_bits(maxphyaddr, 51);
|
||||
rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd |
|
||||
gbpages_bit_rsvd |
|
||||
rsvd_bits(maxphyaddr, 51);
|
||||
rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
|
||||
rsvd_bits(maxphyaddr, 51);
|
||||
rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd |
|
||||
rsvd_bits(maxphyaddr, 51);
|
||||
rsvd_check->rsvd_bits_mask[0][3] = high_bits_rsvd |
|
||||
nonleaf_bit8_rsvd |
|
||||
rsvd_bits(7, 7);
|
||||
rsvd_check->rsvd_bits_mask[0][2] = high_bits_rsvd |
|
||||
gbpages_bit_rsvd;
|
||||
rsvd_check->rsvd_bits_mask[0][1] = high_bits_rsvd;
|
||||
rsvd_check->rsvd_bits_mask[0][0] = high_bits_rsvd;
|
||||
rsvd_check->rsvd_bits_mask[1][3] =
|
||||
rsvd_check->rsvd_bits_mask[0][3];
|
||||
rsvd_check->rsvd_bits_mask[1][2] = exb_bit_rsvd |
|
||||
gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51) |
|
||||
rsvd_bits(13, 29);
|
||||
rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd |
|
||||
rsvd_bits(maxphyaddr, 51) |
|
||||
rsvd_bits(13, 20); /* large page */
|
||||
rsvd_check->rsvd_bits_mask[1][2] = high_bits_rsvd |
|
||||
gbpages_bit_rsvd |
|
||||
rsvd_bits(13, 29);
|
||||
rsvd_check->rsvd_bits_mask[1][1] = high_bits_rsvd |
|
||||
rsvd_bits(13, 20); /* large page */
|
||||
rsvd_check->rsvd_bits_mask[1][0] =
|
||||
rsvd_check->rsvd_bits_mask[0][0];
|
||||
break;
|
||||
@ -4075,8 +4078,8 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu *context)
|
||||
{
|
||||
__reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check,
|
||||
cpuid_maxphyaddr(vcpu), context->root_level,
|
||||
context->nx,
|
||||
vcpu->arch.reserved_gpa_bits,
|
||||
context->root_level, context->nx,
|
||||
guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
|
||||
is_pse(vcpu),
|
||||
guest_cpuid_is_amd_or_hygon(vcpu));
|
||||
@ -4084,27 +4087,22 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
|
||||
|
||||
static void
|
||||
__reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
|
||||
int maxphyaddr, bool execonly)
|
||||
u64 pa_bits_rsvd, bool execonly)
|
||||
{
|
||||
u64 high_bits_rsvd = pa_bits_rsvd & rsvd_bits(0, 51);
|
||||
u64 bad_mt_xwr;
|
||||
|
||||
rsvd_check->rsvd_bits_mask[0][4] =
|
||||
rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
|
||||
rsvd_check->rsvd_bits_mask[0][3] =
|
||||
rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
|
||||
rsvd_check->rsvd_bits_mask[0][2] =
|
||||
rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
|
||||
rsvd_check->rsvd_bits_mask[0][1] =
|
||||
rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
|
||||
rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
|
||||
rsvd_check->rsvd_bits_mask[0][4] = high_bits_rsvd | rsvd_bits(3, 7);
|
||||
rsvd_check->rsvd_bits_mask[0][3] = high_bits_rsvd | rsvd_bits(3, 7);
|
||||
rsvd_check->rsvd_bits_mask[0][2] = high_bits_rsvd | rsvd_bits(3, 6);
|
||||
rsvd_check->rsvd_bits_mask[0][1] = high_bits_rsvd | rsvd_bits(3, 6);
|
||||
rsvd_check->rsvd_bits_mask[0][0] = high_bits_rsvd;
|
||||
|
||||
/* large page */
|
||||
rsvd_check->rsvd_bits_mask[1][4] = rsvd_check->rsvd_bits_mask[0][4];
|
||||
rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3];
|
||||
rsvd_check->rsvd_bits_mask[1][2] =
|
||||
rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29);
|
||||
rsvd_check->rsvd_bits_mask[1][1] =
|
||||
rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20);
|
||||
rsvd_check->rsvd_bits_mask[1][2] = high_bits_rsvd | rsvd_bits(12, 29);
|
||||
rsvd_check->rsvd_bits_mask[1][1] = high_bits_rsvd | rsvd_bits(12, 20);
|
||||
rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0];
|
||||
|
||||
bad_mt_xwr = 0xFFull << (2 * 8); /* bits 3..5 must not be 2 */
|
||||
@ -4123,7 +4121,12 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu *context, bool execonly)
|
||||
{
|
||||
__reset_rsvds_bits_mask_ept(&context->guest_rsvd_check,
|
||||
cpuid_maxphyaddr(vcpu), execonly);
|
||||
vcpu->arch.reserved_gpa_bits, execonly);
|
||||
}
|
||||
|
||||
static inline u64 reserved_hpa_bits(void)
|
||||
{
|
||||
return rsvd_bits(shadow_phys_bits, 63);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4145,7 +4148,7 @@ reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
|
||||
*/
|
||||
shadow_zero_check = &context->shadow_zero_check;
|
||||
__reset_rsvds_bits_mask(vcpu, shadow_zero_check,
|
||||
shadow_phys_bits,
|
||||
reserved_hpa_bits(),
|
||||
context->shadow_root_level, uses_nx,
|
||||
guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
|
||||
is_pse(vcpu), true);
|
||||
@ -4182,14 +4185,13 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
|
||||
|
||||
if (boot_cpu_is_amd())
|
||||
__reset_rsvds_bits_mask(vcpu, shadow_zero_check,
|
||||
shadow_phys_bits,
|
||||
reserved_hpa_bits(),
|
||||
context->shadow_root_level, false,
|
||||
boot_cpu_has(X86_FEATURE_GBPAGES),
|
||||
true, true);
|
||||
else
|
||||
__reset_rsvds_bits_mask_ept(shadow_zero_check,
|
||||
shadow_phys_bits,
|
||||
false);
|
||||
reserved_hpa_bits(), false);
|
||||
|
||||
if (!shadow_me_mask)
|
||||
return;
|
||||
@ -4209,7 +4211,7 @@ reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu *context, bool execonly)
|
||||
{
|
||||
__reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
|
||||
shadow_phys_bits, execonly);
|
||||
reserved_hpa_bits(), execonly);
|
||||
}
|
||||
|
||||
#define BYTE_MASK(access) \
|
||||
@ -4395,7 +4397,6 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
|
||||
context->gva_to_gpa = paging64_gva_to_gpa;
|
||||
context->sync_page = paging64_sync_page;
|
||||
context->invlpg = paging64_invlpg;
|
||||
context->update_pte = paging64_update_pte;
|
||||
context->shadow_root_level = level;
|
||||
context->direct_map = false;
|
||||
}
|
||||
@ -4424,7 +4425,6 @@ static void paging32_init_context(struct kvm_vcpu *vcpu,
|
||||
context->gva_to_gpa = paging32_gva_to_gpa;
|
||||
context->sync_page = paging32_sync_page;
|
||||
context->invlpg = paging32_invlpg;
|
||||
context->update_pte = paging32_update_pte;
|
||||
context->shadow_root_level = PT32E_ROOT_LEVEL;
|
||||
context->direct_map = false;
|
||||
}
|
||||
@ -4506,7 +4506,6 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
|
||||
context->page_fault = kvm_tdp_page_fault;
|
||||
context->sync_page = nonpaging_sync_page;
|
||||
context->invlpg = NULL;
|
||||
context->update_pte = nonpaging_update_pte;
|
||||
context->shadow_root_level = kvm_mmu_get_tdp_level(vcpu);
|
||||
context->direct_map = true;
|
||||
context->get_guest_pgd = get_cr3;
|
||||
@ -4678,7 +4677,6 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
|
||||
context->gva_to_gpa = ept_gva_to_gpa;
|
||||
context->sync_page = ept_sync_page;
|
||||
context->invlpg = ept_invlpg;
|
||||
context->update_pte = ept_update_pte;
|
||||
context->root_level = level;
|
||||
context->direct_map = false;
|
||||
context->mmu_role.as_u64 = new_role.as_u64;
|
||||
@ -4811,7 +4809,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
|
||||
if (r)
|
||||
goto out;
|
||||
kvm_mmu_load_pgd(vcpu);
|
||||
kvm_x86_ops.tlb_flush_current(vcpu);
|
||||
static_call(kvm_x86_tlb_flush_current)(vcpu);
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
@ -4826,19 +4824,6 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_unload);
|
||||
|
||||
static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
|
||||
struct kvm_mmu_page *sp, u64 *spte,
|
||||
const void *new)
|
||||
{
|
||||
if (sp->role.level != PG_LEVEL_4K) {
|
||||
++vcpu->kvm->stat.mmu_pde_zapped;
|
||||
return;
|
||||
}
|
||||
|
||||
++vcpu->kvm->stat.mmu_pte_updated;
|
||||
vcpu->arch.mmu->update_pte(vcpu, sp, spte, new);
|
||||
}
|
||||
|
||||
static bool need_remote_flush(u64 old, u64 new)
|
||||
{
|
||||
if (!is_shadow_present_pte(old))
|
||||
@ -4954,22 +4939,6 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte)
|
||||
return spte;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ignore various flags when determining if a SPTE can be immediately
|
||||
* overwritten for the current MMU.
|
||||
* - level: explicitly checked in mmu_pte_write_new_pte(), and will never
|
||||
* match the current MMU role, as MMU's level tracks the root level.
|
||||
* - access: updated based on the new guest PTE
|
||||
* - quadrant: handled by get_written_sptes()
|
||||
* - invalid: always false (loop only walks valid shadow pages)
|
||||
*/
|
||||
static const union kvm_mmu_page_role role_ign = {
|
||||
.level = 0xf,
|
||||
.access = 0x7,
|
||||
.quadrant = 0x3,
|
||||
.invalid = 0x1,
|
||||
};
|
||||
|
||||
static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
const u8 *new, int bytes,
|
||||
struct kvm_page_track_notifier_node *node)
|
||||
@ -4999,7 +4968,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
*/
|
||||
mmu_topup_memory_caches(vcpu, true);
|
||||
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
write_lock(&vcpu->kvm->mmu_lock);
|
||||
|
||||
gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes);
|
||||
|
||||
@ -5020,14 +4989,10 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
|
||||
local_flush = true;
|
||||
while (npte--) {
|
||||
u32 base_role = vcpu->arch.mmu->mmu_role.base.word;
|
||||
|
||||
entry = *spte;
|
||||
mmu_page_zap_pte(vcpu->kvm, sp, spte, NULL);
|
||||
if (gentry &&
|
||||
!((sp->role.word ^ base_role) & ~role_ign.word) &&
|
||||
rmap_can_add(vcpu))
|
||||
mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
|
||||
if (gentry && sp->role.level != PG_LEVEL_4K)
|
||||
++vcpu->kvm->stat.mmu_pde_zapped;
|
||||
if (need_remote_flush(entry, *spte))
|
||||
remote_flush = true;
|
||||
++spte;
|
||||
@ -5035,7 +5000,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
}
|
||||
kvm_mmu_flush_or_zap(vcpu, &invalid_list, remote_flush, local_flush);
|
||||
kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE);
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
write_unlock(&vcpu->kvm->mmu_lock);
|
||||
}
|
||||
|
||||
int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
|
||||
@ -5125,7 +5090,7 @@ void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
||||
if (is_noncanonical_address(gva, vcpu))
|
||||
return;
|
||||
|
||||
kvm_x86_ops.tlb_flush_gva(vcpu, gva);
|
||||
static_call(kvm_x86_tlb_flush_gva)(vcpu, gva);
|
||||
}
|
||||
|
||||
if (!mmu->invlpg)
|
||||
@ -5182,7 +5147,7 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
|
||||
}
|
||||
|
||||
if (tlb_flush)
|
||||
kvm_x86_ops.tlb_flush_gva(vcpu, gva);
|
||||
static_call(kvm_x86_tlb_flush_gva)(vcpu, gva);
|
||||
|
||||
++vcpu->stat.invlpg;
|
||||
|
||||
@ -5233,14 +5198,14 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
if (iterator.rmap)
|
||||
flush |= fn(kvm, iterator.rmap);
|
||||
|
||||
if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
|
||||
if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
|
||||
if (flush && lock_flush_tlb) {
|
||||
kvm_flush_remote_tlbs_with_address(kvm,
|
||||
start_gfn,
|
||||
iterator.gfn - start_gfn + 1);
|
||||
flush = false;
|
||||
}
|
||||
cond_resched_lock(&kvm->mmu_lock);
|
||||
cond_resched_rwlock_write(&kvm->mmu_lock);
|
||||
}
|
||||
}
|
||||
|
||||
@ -5390,7 +5355,7 @@ restart:
|
||||
* be in active use by the guest.
|
||||
*/
|
||||
if (batch >= BATCH_ZAP_PAGES &&
|
||||
cond_resched_lock(&kvm->mmu_lock)) {
|
||||
cond_resched_rwlock_write(&kvm->mmu_lock)) {
|
||||
batch = 0;
|
||||
goto restart;
|
||||
}
|
||||
@ -5423,7 +5388,7 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
|
||||
{
|
||||
lockdep_assert_held(&kvm->slots_lock);
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
write_lock(&kvm->mmu_lock);
|
||||
trace_kvm_mmu_zap_all_fast(kvm);
|
||||
|
||||
/*
|
||||
@ -5447,10 +5412,10 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
|
||||
|
||||
kvm_zap_obsolete_pages(kvm);
|
||||
|
||||
if (kvm->arch.tdp_mmu_enabled)
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
kvm_tdp_mmu_zap_all(kvm);
|
||||
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
|
||||
@ -5492,7 +5457,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
|
||||
int i;
|
||||
bool flush;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
write_lock(&kvm->mmu_lock);
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
|
||||
slots = __kvm_memslots(kvm, i);
|
||||
kvm_for_each_memslot(memslot, slots) {
|
||||
@ -5510,13 +5475,13 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
|
||||
}
|
||||
}
|
||||
|
||||
if (kvm->arch.tdp_mmu_enabled) {
|
||||
if (is_tdp_mmu_enabled(kvm)) {
|
||||
flush = kvm_tdp_mmu_zap_gfn_range(kvm, gfn_start, gfn_end);
|
||||
if (flush)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
}
|
||||
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
static bool slot_rmap_write_protect(struct kvm *kvm,
|
||||
@ -5531,12 +5496,12 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
|
||||
{
|
||||
bool flush;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
write_lock(&kvm->mmu_lock);
|
||||
flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect,
|
||||
start_level, KVM_MAX_HUGEPAGE_LEVEL, false);
|
||||
if (kvm->arch.tdp_mmu_enabled)
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, PG_LEVEL_4K);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
|
||||
/*
|
||||
* We can flush all the TLBs out of the mmu lock without TLB
|
||||
@ -5596,13 +5561,13 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *memslot)
|
||||
{
|
||||
/* FIXME: const-ify all uses of struct kvm_memory_slot. */
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
write_lock(&kvm->mmu_lock);
|
||||
slot_handle_leaf(kvm, (struct kvm_memory_slot *)memslot,
|
||||
kvm_mmu_zap_collapsible_spte, true);
|
||||
|
||||
if (kvm->arch.tdp_mmu_enabled)
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
kvm_tdp_mmu_zap_collapsible_sptes(kvm, memslot);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
|
||||
@ -5625,11 +5590,11 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
|
||||
{
|
||||
bool flush;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
write_lock(&kvm->mmu_lock);
|
||||
flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty, false);
|
||||
if (kvm->arch.tdp_mmu_enabled)
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
flush |= kvm_tdp_mmu_clear_dirty_slot(kvm, memslot);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
|
||||
/*
|
||||
* It's also safe to flush TLBs out of mmu lock here as currently this
|
||||
@ -5647,12 +5612,12 @@ void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
|
||||
{
|
||||
bool flush;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
write_lock(&kvm->mmu_lock);
|
||||
flush = slot_handle_large_level(kvm, memslot, slot_rmap_write_protect,
|
||||
false);
|
||||
if (kvm->arch.tdp_mmu_enabled)
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, PG_LEVEL_2M);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
|
||||
if (flush)
|
||||
kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
|
||||
@ -5664,11 +5629,11 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm,
|
||||
{
|
||||
bool flush;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
write_lock(&kvm->mmu_lock);
|
||||
flush = slot_handle_all_level(kvm, memslot, __rmap_set_dirty, false);
|
||||
if (kvm->arch.tdp_mmu_enabled)
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
flush |= kvm_tdp_mmu_slot_set_dirty(kvm, memslot);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
|
||||
if (flush)
|
||||
kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
|
||||
@ -5681,23 +5646,23 @@ void kvm_mmu_zap_all(struct kvm *kvm)
|
||||
LIST_HEAD(invalid_list);
|
||||
int ign;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
write_lock(&kvm->mmu_lock);
|
||||
restart:
|
||||
list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
|
||||
if (WARN_ON(sp->role.invalid))
|
||||
continue;
|
||||
if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
|
||||
goto restart;
|
||||
if (cond_resched_lock(&kvm->mmu_lock))
|
||||
if (cond_resched_rwlock_write(&kvm->mmu_lock))
|
||||
goto restart;
|
||||
}
|
||||
|
||||
kvm_mmu_commit_zap_page(kvm, &invalid_list);
|
||||
|
||||
if (kvm->arch.tdp_mmu_enabled)
|
||||
if (is_tdp_mmu_enabled(kvm))
|
||||
kvm_tdp_mmu_zap_all(kvm);
|
||||
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
|
||||
@ -5757,7 +5722,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
|
||||
continue;
|
||||
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
write_lock(&kvm->mmu_lock);
|
||||
|
||||
if (kvm_has_zapped_obsolete_pages(kvm)) {
|
||||
kvm_mmu_commit_zap_page(kvm,
|
||||
@ -5768,7 +5733,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
|
||||
freed = kvm_mmu_zap_oldest_mmu_pages(kvm, sc->nr_to_scan);
|
||||
|
||||
unlock:
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
|
||||
/*
|
||||
@ -5988,7 +5953,7 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
|
||||
ulong to_zap;
|
||||
|
||||
rcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
write_lock(&kvm->mmu_lock);
|
||||
|
||||
ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
|
||||
to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0;
|
||||
@ -6005,22 +5970,22 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
|
||||
struct kvm_mmu_page,
|
||||
lpage_disallowed_link);
|
||||
WARN_ON_ONCE(!sp->lpage_disallowed);
|
||||
if (sp->tdp_mmu_page)
|
||||
if (is_tdp_mmu_page(sp)) {
|
||||
kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn,
|
||||
sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level));
|
||||
else {
|
||||
} else {
|
||||
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
|
||||
WARN_ON_ONCE(sp->lpage_disallowed);
|
||||
}
|
||||
|
||||
if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
|
||||
if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
|
||||
kvm_mmu_commit_zap_page(kvm, &invalid_list);
|
||||
cond_resched_lock(&kvm->mmu_lock);
|
||||
cond_resched_rwlock_write(&kvm->mmu_lock);
|
||||
}
|
||||
}
|
||||
kvm_mmu_commit_zap_page(kvm, &invalid_list);
|
||||
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
srcu_read_unlock(&kvm->srcu, rcu_idx);
|
||||
}
|
||||
|
||||
|
@ -234,7 +234,7 @@ static void audit_vcpu_spte(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
static bool mmu_audit;
|
||||
static struct static_key mmu_audit_key;
|
||||
static DEFINE_STATIC_KEY_FALSE(mmu_audit_key);
|
||||
|
||||
static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
|
||||
{
|
||||
@ -250,7 +250,7 @@ static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
|
||||
|
||||
static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
|
||||
{
|
||||
if (static_key_false((&mmu_audit_key)))
|
||||
if (static_branch_unlikely((&mmu_audit_key)))
|
||||
__kvm_mmu_audit(vcpu, point);
|
||||
}
|
||||
|
||||
@ -259,7 +259,7 @@ static void mmu_audit_enable(void)
|
||||
if (mmu_audit)
|
||||
return;
|
||||
|
||||
static_key_slow_inc(&mmu_audit_key);
|
||||
static_branch_inc(&mmu_audit_key);
|
||||
mmu_audit = true;
|
||||
}
|
||||
|
||||
@ -268,7 +268,7 @@ static void mmu_audit_disable(void)
|
||||
if (!mmu_audit)
|
||||
return;
|
||||
|
||||
static_key_slow_dec(&mmu_audit_key);
|
||||
static_branch_dec(&mmu_audit_key);
|
||||
mmu_audit = false;
|
||||
}
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
extern bool dbg;
|
||||
|
||||
#define pgprintk(x...) do { if (dbg) printk(x); } while (0)
|
||||
#define rmap_printk(x...) do { if (dbg) printk(x); } while (0)
|
||||
#define rmap_printk(fmt, args...) do { if (dbg) printk("%s: " fmt, __func__, ## args); } while (0)
|
||||
#define MMU_WARN_ON(x) WARN_ON(x)
|
||||
#else
|
||||
#define pgprintk(x...) do { } while (0)
|
||||
@ -56,7 +56,12 @@ struct kvm_mmu_page {
|
||||
/* Number of writes since the last time traversal visited this page. */
|
||||
atomic_t write_flooding_count;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
bool tdp_mmu_page;
|
||||
|
||||
/* Used for freeing the page asyncronously if it is a TDP MMU page. */
|
||||
struct rcu_head rcu_head;
|
||||
#endif
|
||||
};
|
||||
|
||||
extern struct kmem_cache *mmu_page_header_cache;
|
||||
|
@ -184,9 +184,9 @@ kvm_page_track_register_notifier(struct kvm *kvm,
|
||||
|
||||
head = &kvm->arch.track_notifier_head;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
write_lock(&kvm->mmu_lock);
|
||||
hlist_add_head_rcu(&n->node, &head->track_notifier_list);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
|
||||
|
||||
@ -202,9 +202,9 @@ kvm_page_track_unregister_notifier(struct kvm *kvm,
|
||||
|
||||
head = &kvm->arch.track_notifier_head;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
write_lock(&kvm->mmu_lock);
|
||||
hlist_del_rcu(&n->node);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
synchronize_srcu(&head->track_srcu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
|
||||
|
@ -868,7 +868,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
|
||||
}
|
||||
|
||||
r = RET_PF_RETRY;
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
write_lock(&vcpu->kvm->mmu_lock);
|
||||
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
|
||||
goto out_unlock;
|
||||
|
||||
@ -881,7 +881,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
|
||||
kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
write_unlock(&vcpu->kvm->mmu_lock);
|
||||
kvm_release_pfn_clean(pfn);
|
||||
return r;
|
||||
}
|
||||
@ -919,7 +919,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
write_lock(&vcpu->kvm->mmu_lock);
|
||||
for_each_shadow_entry_using_root(vcpu, root_hpa, gva, iterator) {
|
||||
level = iterator.level;
|
||||
sptep = iterator.sptep;
|
||||
@ -954,7 +954,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
|
||||
if (!is_shadow_present_pte(*sptep) || !sp->unsync_children)
|
||||
break;
|
||||
}
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
write_unlock(&vcpu->kvm->mmu_lock);
|
||||
}
|
||||
|
||||
/* Note, @addr is a GPA when gva_to_gpa() translates an L2 GPA to an L1 GPA. */
|
||||
|
@ -120,7 +120,7 @@ int make_spte(struct kvm_vcpu *vcpu, unsigned int pte_access, int level,
|
||||
if (level > PG_LEVEL_4K)
|
||||
spte |= PT_PAGE_SIZE_MASK;
|
||||
if (tdp_enabled)
|
||||
spte |= kvm_x86_ops.get_mt_mask(vcpu, gfn,
|
||||
spte |= static_call(kvm_x86_get_mt_mask)(vcpu, gfn,
|
||||
kvm_is_mmio_pfn(pfn));
|
||||
|
||||
if (host_writable)
|
||||
|
@ -130,6 +130,25 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
|
||||
PT64_EPT_EXECUTABLE_MASK)
|
||||
#define SHADOW_ACC_TRACK_SAVED_BITS_SHIFT PT64_SECOND_AVAIL_BITS_SHIFT
|
||||
|
||||
/*
|
||||
* If a thread running without exclusive control of the MMU lock must perform a
|
||||
* multi-part operation on an SPTE, it can set the SPTE to REMOVED_SPTE as a
|
||||
* non-present intermediate value. Other threads which encounter this value
|
||||
* should not modify the SPTE.
|
||||
*
|
||||
* This constant works because it is considered non-present on both AMD and
|
||||
* Intel CPUs and does not create a L1TF vulnerability because the pfn section
|
||||
* is zeroed out.
|
||||
*
|
||||
* Only used by the TDP MMU.
|
||||
*/
|
||||
#define REMOVED_SPTE (1ull << 59)
|
||||
|
||||
static inline bool is_removed_spte(u64 spte)
|
||||
{
|
||||
return spte == REMOVED_SPTE;
|
||||
}
|
||||
|
||||
/*
|
||||
* In some cases, we need to preserve the GFN of a non-present or reserved
|
||||
* SPTE when we usurp the upper five bits of the physical address space to
|
||||
@ -185,23 +204,19 @@ static inline bool is_access_track_spte(u64 spte)
|
||||
return !spte_ad_enabled(spte) && (spte & shadow_acc_track_mask) == 0;
|
||||
}
|
||||
|
||||
static inline int is_shadow_present_pte(u64 pte)
|
||||
static inline bool is_shadow_present_pte(u64 pte)
|
||||
{
|
||||
return (pte != 0) && !is_mmio_spte(pte);
|
||||
return (pte != 0) && !is_mmio_spte(pte) && !is_removed_spte(pte);
|
||||
}
|
||||
|
||||
static inline int is_large_pte(u64 pte)
|
||||
static inline bool is_large_pte(u64 pte)
|
||||
{
|
||||
return pte & PT_PAGE_SIZE_MASK;
|
||||
}
|
||||
|
||||
static inline int is_last_spte(u64 pte, int level)
|
||||
static inline bool is_last_spte(u64 pte, int level)
|
||||
{
|
||||
if (level == PG_LEVEL_4K)
|
||||
return 1;
|
||||
if (is_large_pte(pte))
|
||||
return 1;
|
||||
return 0;
|
||||
return (level == PG_LEVEL_4K) || is_large_pte(pte);
|
||||
}
|
||||
|
||||
static inline bool is_executable_pte(u64 spte)
|
||||
|
@ -12,7 +12,7 @@ static void tdp_iter_refresh_sptep(struct tdp_iter *iter)
|
||||
{
|
||||
iter->sptep = iter->pt_path[iter->level - 1] +
|
||||
SHADOW_PT_INDEX(iter->gfn << PAGE_SHIFT, iter->level);
|
||||
iter->old_spte = READ_ONCE(*iter->sptep);
|
||||
iter->old_spte = READ_ONCE(*rcu_dereference(iter->sptep));
|
||||
}
|
||||
|
||||
static gfn_t round_gfn_for_level(gfn_t gfn, int level)
|
||||
@ -22,21 +22,22 @@ static gfn_t round_gfn_for_level(gfn_t gfn, int level)
|
||||
|
||||
/*
|
||||
* Sets a TDP iterator to walk a pre-order traversal of the paging structure
|
||||
* rooted at root_pt, starting with the walk to translate goal_gfn.
|
||||
* rooted at root_pt, starting with the walk to translate next_last_level_gfn.
|
||||
*/
|
||||
void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
|
||||
int min_level, gfn_t goal_gfn)
|
||||
int min_level, gfn_t next_last_level_gfn)
|
||||
{
|
||||
WARN_ON(root_level < 1);
|
||||
WARN_ON(root_level > PT64_ROOT_MAX_LEVEL);
|
||||
|
||||
iter->goal_gfn = goal_gfn;
|
||||
iter->next_last_level_gfn = next_last_level_gfn;
|
||||
iter->yielded_gfn = iter->next_last_level_gfn;
|
||||
iter->root_level = root_level;
|
||||
iter->min_level = min_level;
|
||||
iter->level = root_level;
|
||||
iter->pt_path[iter->level - 1] = root_pt;
|
||||
iter->pt_path[iter->level - 1] = (tdp_ptep_t)root_pt;
|
||||
|
||||
iter->gfn = round_gfn_for_level(iter->goal_gfn, iter->level);
|
||||
iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
|
||||
tdp_iter_refresh_sptep(iter);
|
||||
|
||||
iter->valid = true;
|
||||
@ -47,7 +48,7 @@ void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
|
||||
* address of the child page table referenced by the SPTE. Returns null if
|
||||
* there is no such entry.
|
||||
*/
|
||||
u64 *spte_to_child_pt(u64 spte, int level)
|
||||
tdp_ptep_t spte_to_child_pt(u64 spte, int level)
|
||||
{
|
||||
/*
|
||||
* There's no child entry if this entry isn't present or is a
|
||||
@ -56,7 +57,7 @@ u64 *spte_to_child_pt(u64 spte, int level)
|
||||
if (!is_shadow_present_pte(spte) || is_last_spte(spte, level))
|
||||
return NULL;
|
||||
|
||||
return __va(spte_to_pfn(spte) << PAGE_SHIFT);
|
||||
return (tdp_ptep_t)__va(spte_to_pfn(spte) << PAGE_SHIFT);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -65,7 +66,7 @@ u64 *spte_to_child_pt(u64 spte, int level)
|
||||
*/
|
||||
static bool try_step_down(struct tdp_iter *iter)
|
||||
{
|
||||
u64 *child_pt;
|
||||
tdp_ptep_t child_pt;
|
||||
|
||||
if (iter->level == iter->min_level)
|
||||
return false;
|
||||
@ -74,7 +75,7 @@ static bool try_step_down(struct tdp_iter *iter)
|
||||
* Reread the SPTE before stepping down to avoid traversing into page
|
||||
* tables that are no longer linked from this entry.
|
||||
*/
|
||||
iter->old_spte = READ_ONCE(*iter->sptep);
|
||||
iter->old_spte = READ_ONCE(*rcu_dereference(iter->sptep));
|
||||
|
||||
child_pt = spte_to_child_pt(iter->old_spte, iter->level);
|
||||
if (!child_pt)
|
||||
@ -82,7 +83,7 @@ static bool try_step_down(struct tdp_iter *iter)
|
||||
|
||||
iter->level--;
|
||||
iter->pt_path[iter->level - 1] = child_pt;
|
||||
iter->gfn = round_gfn_for_level(iter->goal_gfn, iter->level);
|
||||
iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
|
||||
tdp_iter_refresh_sptep(iter);
|
||||
|
||||
return true;
|
||||
@ -106,9 +107,9 @@ static bool try_step_side(struct tdp_iter *iter)
|
||||
return false;
|
||||
|
||||
iter->gfn += KVM_PAGES_PER_HPAGE(iter->level);
|
||||
iter->goal_gfn = iter->gfn;
|
||||
iter->next_last_level_gfn = iter->gfn;
|
||||
iter->sptep++;
|
||||
iter->old_spte = READ_ONCE(*iter->sptep);
|
||||
iter->old_spte = READ_ONCE(*rcu_dereference(iter->sptep));
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -158,24 +159,7 @@ void tdp_iter_next(struct tdp_iter *iter)
|
||||
iter->valid = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Restart the walk over the paging structure from the root, starting from the
|
||||
* highest gfn the iterator had previously reached. Assumes that the entire
|
||||
* paging structure, except the root page, may have been completely torn down
|
||||
* and rebuilt.
|
||||
*/
|
||||
void tdp_iter_refresh_walk(struct tdp_iter *iter)
|
||||
{
|
||||
gfn_t goal_gfn = iter->goal_gfn;
|
||||
|
||||
if (iter->gfn > goal_gfn)
|
||||
goal_gfn = iter->gfn;
|
||||
|
||||
tdp_iter_start(iter, iter->pt_path[iter->root_level - 1],
|
||||
iter->root_level, iter->min_level, goal_gfn);
|
||||
}
|
||||
|
||||
u64 *tdp_iter_root_pt(struct tdp_iter *iter)
|
||||
tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter)
|
||||
{
|
||||
return iter->pt_path[iter->root_level - 1];
|
||||
}
|
||||
|
@ -7,6 +7,8 @@
|
||||
|
||||
#include "mmu.h"
|
||||
|
||||
typedef u64 __rcu *tdp_ptep_t;
|
||||
|
||||
/*
|
||||
* A TDP iterator performs a pre-order walk over a TDP paging structure.
|
||||
*/
|
||||
@ -15,11 +17,17 @@ struct tdp_iter {
|
||||
* The iterator will traverse the paging structure towards the mapping
|
||||
* for this GFN.
|
||||
*/
|
||||
gfn_t goal_gfn;
|
||||
gfn_t next_last_level_gfn;
|
||||
/*
|
||||
* The next_last_level_gfn at the time when the thread last
|
||||
* yielded. Only yielding when the next_last_level_gfn !=
|
||||
* yielded_gfn helps ensure forward progress.
|
||||
*/
|
||||
gfn_t yielded_gfn;
|
||||
/* Pointers to the page tables traversed to reach the current SPTE */
|
||||
u64 *pt_path[PT64_ROOT_MAX_LEVEL];
|
||||
tdp_ptep_t pt_path[PT64_ROOT_MAX_LEVEL];
|
||||
/* A pointer to the current SPTE */
|
||||
u64 *sptep;
|
||||
tdp_ptep_t sptep;
|
||||
/* The lowest GFN mapped by the current SPTE */
|
||||
gfn_t gfn;
|
||||
/* The level of the root page given to the iterator */
|
||||
@ -49,12 +57,11 @@ struct tdp_iter {
|
||||
#define for_each_tdp_pte(iter, root, root_level, start, end) \
|
||||
for_each_tdp_pte_min_level(iter, root, root_level, PG_LEVEL_4K, start, end)
|
||||
|
||||
u64 *spte_to_child_pt(u64 pte, int level);
|
||||
tdp_ptep_t spte_to_child_pt(u64 pte, int level);
|
||||
|
||||
void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
|
||||
int min_level, gfn_t goal_gfn);
|
||||
int min_level, gfn_t next_last_level_gfn);
|
||||
void tdp_iter_next(struct tdp_iter *iter);
|
||||
void tdp_iter_refresh_walk(struct tdp_iter *iter);
|
||||
u64 *tdp_iter_root_pt(struct tdp_iter *iter);
|
||||
tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter);
|
||||
|
||||
#endif /* __KVM_X86_MMU_TDP_ITER_H */
|
||||
|
@ -7,32 +7,23 @@
|
||||
#include "tdp_mmu.h"
|
||||
#include "spte.h"
|
||||
|
||||
#include <asm/cmpxchg.h>
|
||||
#include <trace/events/kvm.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static bool __read_mostly tdp_mmu_enabled = false;
|
||||
module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644);
|
||||
#endif
|
||||
|
||||
static bool is_tdp_mmu_enabled(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return tdp_enabled && READ_ONCE(tdp_mmu_enabled);
|
||||
#else
|
||||
return false;
|
||||
#endif /* CONFIG_X86_64 */
|
||||
}
|
||||
|
||||
/* Initializes the TDP MMU for the VM, if enabled. */
|
||||
void kvm_mmu_init_tdp_mmu(struct kvm *kvm)
|
||||
{
|
||||
if (!is_tdp_mmu_enabled())
|
||||
if (!tdp_enabled || !READ_ONCE(tdp_mmu_enabled))
|
||||
return;
|
||||
|
||||
/* This should not be changed for the lifetime of the VM. */
|
||||
kvm->arch.tdp_mmu_enabled = true;
|
||||
|
||||
INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots);
|
||||
spin_lock_init(&kvm->arch.tdp_mmu_pages_lock);
|
||||
INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages);
|
||||
}
|
||||
|
||||
@ -42,6 +33,12 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
|
||||
return;
|
||||
|
||||
WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
|
||||
|
||||
/*
|
||||
* Ensure that all the outstanding RCU callbacks to free shadow pages
|
||||
* can run before the VM is torn down.
|
||||
*/
|
||||
rcu_barrier();
|
||||
}
|
||||
|
||||
static void tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root)
|
||||
@ -53,7 +50,7 @@ static void tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root)
|
||||
static inline bool tdp_mmu_next_root_valid(struct kvm *kvm,
|
||||
struct kvm_mmu_page *root)
|
||||
{
|
||||
lockdep_assert_held(&kvm->mmu_lock);
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
|
||||
if (list_entry_is_head(root, &kvm->arch.tdp_mmu_roots, link))
|
||||
return false;
|
||||
@ -88,22 +85,6 @@ static inline struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
|
||||
#define for_each_tdp_mmu_root(_kvm, _root) \
|
||||
list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link)
|
||||
|
||||
bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa)
|
||||
{
|
||||
struct kvm_mmu_page *sp;
|
||||
|
||||
if (!kvm->arch.tdp_mmu_enabled)
|
||||
return false;
|
||||
if (WARN_ON(!VALID_PAGE(hpa)))
|
||||
return false;
|
||||
|
||||
sp = to_shadow_page(hpa);
|
||||
if (WARN_ON(!sp))
|
||||
return false;
|
||||
|
||||
return sp->tdp_mmu_page && sp->root_count;
|
||||
}
|
||||
|
||||
static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
gfn_t start, gfn_t end, bool can_yield);
|
||||
|
||||
@ -111,7 +92,7 @@ void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root)
|
||||
{
|
||||
gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
|
||||
|
||||
lockdep_assert_held(&kvm->mmu_lock);
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
|
||||
WARN_ON(root->root_count);
|
||||
WARN_ON(!root->tdp_mmu_page);
|
||||
@ -164,13 +145,13 @@ static struct kvm_mmu_page *get_tdp_mmu_vcpu_root(struct kvm_vcpu *vcpu)
|
||||
|
||||
role = page_role_for_level(vcpu, vcpu->arch.mmu->shadow_root_level);
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
write_lock(&kvm->mmu_lock);
|
||||
|
||||
/* Check for an existing root before allocating a new one. */
|
||||
for_each_tdp_mmu_root(kvm, root) {
|
||||
if (root->role.word == role.word) {
|
||||
kvm_mmu_get_root(kvm, root);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
return root;
|
||||
}
|
||||
}
|
||||
@ -180,7 +161,7 @@ static struct kvm_mmu_page *get_tdp_mmu_vcpu_root(struct kvm_vcpu *vcpu)
|
||||
|
||||
list_add(&root->link, &kvm->arch.tdp_mmu_roots);
|
||||
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
|
||||
return root;
|
||||
}
|
||||
@ -196,8 +177,31 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
|
||||
return __pa(root->spt);
|
||||
}
|
||||
|
||||
static void tdp_mmu_free_sp(struct kvm_mmu_page *sp)
|
||||
{
|
||||
free_page((unsigned long)sp->spt);
|
||||
kmem_cache_free(mmu_page_header_cache, sp);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called through call_rcu in order to free TDP page table memory
|
||||
* safely with respect to other kernel threads that may be operating on
|
||||
* the memory.
|
||||
* By only accessing TDP MMU page table memory in an RCU read critical
|
||||
* section, and freeing it after a grace period, lockless access to that
|
||||
* memory won't use it after it is freed.
|
||||
*/
|
||||
static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head)
|
||||
{
|
||||
struct kvm_mmu_page *sp = container_of(head, struct kvm_mmu_page,
|
||||
rcu_head);
|
||||
|
||||
tdp_mmu_free_sp(sp);
|
||||
}
|
||||
|
||||
static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
|
||||
u64 old_spte, u64 new_spte, int level);
|
||||
u64 old_spte, u64 new_spte, int level,
|
||||
bool shared);
|
||||
|
||||
static int kvm_mmu_page_as_id(struct kvm_mmu_page *sp)
|
||||
{
|
||||
@ -234,6 +238,128 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* tdp_mmu_link_page - Add a new page to the list of pages used by the TDP MMU
|
||||
*
|
||||
* @kvm: kvm instance
|
||||
* @sp: the new page
|
||||
* @shared: This operation may not be running under the exclusive use of
|
||||
* the MMU lock and the operation must synchronize with other
|
||||
* threads that might be adding or removing pages.
|
||||
* @account_nx: This page replaces a NX large page and should be marked for
|
||||
* eventual reclaim.
|
||||
*/
|
||||
static void tdp_mmu_link_page(struct kvm *kvm, struct kvm_mmu_page *sp,
|
||||
bool shared, bool account_nx)
|
||||
{
|
||||
if (shared)
|
||||
spin_lock(&kvm->arch.tdp_mmu_pages_lock);
|
||||
else
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
|
||||
list_add(&sp->link, &kvm->arch.tdp_mmu_pages);
|
||||
if (account_nx)
|
||||
account_huge_nx_page(kvm, sp);
|
||||
|
||||
if (shared)
|
||||
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* tdp_mmu_unlink_page - Remove page from the list of pages used by the TDP MMU
|
||||
*
|
||||
* @kvm: kvm instance
|
||||
* @sp: the page to be removed
|
||||
* @shared: This operation may not be running under the exclusive use of
|
||||
* the MMU lock and the operation must synchronize with other
|
||||
* threads that might be adding or removing pages.
|
||||
*/
|
||||
static void tdp_mmu_unlink_page(struct kvm *kvm, struct kvm_mmu_page *sp,
|
||||
bool shared)
|
||||
{
|
||||
if (shared)
|
||||
spin_lock(&kvm->arch.tdp_mmu_pages_lock);
|
||||
else
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
|
||||
list_del(&sp->link);
|
||||
if (sp->lpage_disallowed)
|
||||
unaccount_huge_nx_page(kvm, sp);
|
||||
|
||||
if (shared)
|
||||
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* handle_removed_tdp_mmu_page - handle a pt removed from the TDP structure
|
||||
*
|
||||
* @kvm: kvm instance
|
||||
* @pt: the page removed from the paging structure
|
||||
* @shared: This operation may not be running under the exclusive use
|
||||
* of the MMU lock and the operation must synchronize with other
|
||||
* threads that might be modifying SPTEs.
|
||||
*
|
||||
* Given a page table that has been removed from the TDP paging structure,
|
||||
* iterates through the page table to clear SPTEs and free child page tables.
|
||||
*/
|
||||
static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt,
|
||||
bool shared)
|
||||
{
|
||||
struct kvm_mmu_page *sp = sptep_to_sp(pt);
|
||||
int level = sp->role.level;
|
||||
gfn_t base_gfn = sp->gfn;
|
||||
u64 old_child_spte;
|
||||
u64 *sptep;
|
||||
gfn_t gfn;
|
||||
int i;
|
||||
|
||||
trace_kvm_mmu_prepare_zap_page(sp);
|
||||
|
||||
tdp_mmu_unlink_page(kvm, sp, shared);
|
||||
|
||||
for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
|
||||
sptep = pt + i;
|
||||
gfn = base_gfn + (i * KVM_PAGES_PER_HPAGE(level - 1));
|
||||
|
||||
if (shared) {
|
||||
/*
|
||||
* Set the SPTE to a nonpresent value that other
|
||||
* threads will not overwrite. If the SPTE was
|
||||
* already marked as removed then another thread
|
||||
* handling a page fault could overwrite it, so
|
||||
* set the SPTE until it is set from some other
|
||||
* value to the removed SPTE value.
|
||||
*/
|
||||
for (;;) {
|
||||
old_child_spte = xchg(sptep, REMOVED_SPTE);
|
||||
if (!is_removed_spte(old_child_spte))
|
||||
break;
|
||||
cpu_relax();
|
||||
}
|
||||
} else {
|
||||
old_child_spte = READ_ONCE(*sptep);
|
||||
|
||||
/*
|
||||
* Marking the SPTE as a removed SPTE is not
|
||||
* strictly necessary here as the MMU lock will
|
||||
* stop other threads from concurrently modifying
|
||||
* this SPTE. Using the removed SPTE value keeps
|
||||
* the two branches consistent and simplifies
|
||||
* the function.
|
||||
*/
|
||||
WRITE_ONCE(*sptep, REMOVED_SPTE);
|
||||
}
|
||||
handle_changed_spte(kvm, kvm_mmu_page_as_id(sp), gfn,
|
||||
old_child_spte, REMOVED_SPTE, level - 1,
|
||||
shared);
|
||||
}
|
||||
|
||||
kvm_flush_remote_tlbs_with_address(kvm, gfn,
|
||||
KVM_PAGES_PER_HPAGE(level));
|
||||
|
||||
call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback);
|
||||
}
|
||||
|
||||
/**
|
||||
* handle_changed_spte - handle bookkeeping associated with an SPTE change
|
||||
* @kvm: kvm instance
|
||||
@ -242,22 +368,22 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn,
|
||||
* @old_spte: The value of the SPTE before the change
|
||||
* @new_spte: The value of the SPTE after the change
|
||||
* @level: the level of the PT the SPTE is part of in the paging structure
|
||||
* @shared: This operation may not be running under the exclusive use of
|
||||
* the MMU lock and the operation must synchronize with other
|
||||
* threads that might be modifying SPTEs.
|
||||
*
|
||||
* Handle bookkeeping that might result from the modification of a SPTE.
|
||||
* This function must be called for all TDP SPTE modifications.
|
||||
*/
|
||||
static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
|
||||
u64 old_spte, u64 new_spte, int level)
|
||||
u64 old_spte, u64 new_spte, int level,
|
||||
bool shared)
|
||||
{
|
||||
bool was_present = is_shadow_present_pte(old_spte);
|
||||
bool is_present = is_shadow_present_pte(new_spte);
|
||||
bool was_leaf = was_present && is_last_spte(old_spte, level);
|
||||
bool is_leaf = is_present && is_last_spte(new_spte, level);
|
||||
bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
|
||||
u64 *pt;
|
||||
struct kvm_mmu_page *sp;
|
||||
u64 old_child_spte;
|
||||
int i;
|
||||
|
||||
WARN_ON(level > PT64_ROOT_MAX_LEVEL);
|
||||
WARN_ON(level < PG_LEVEL_4K);
|
||||
@ -298,15 +424,19 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
|
||||
*/
|
||||
if (!was_present && !is_present) {
|
||||
/*
|
||||
* If this change does not involve a MMIO SPTE, it is
|
||||
* unexpected. Log the change, though it should not impact the
|
||||
* guest since both the former and current SPTEs are nonpresent.
|
||||
* If this change does not involve a MMIO SPTE or removed SPTE,
|
||||
* it is unexpected. Log the change, though it should not
|
||||
* impact the guest since both the former and current SPTEs
|
||||
* are nonpresent.
|
||||
*/
|
||||
if (WARN_ON(!is_mmio_spte(old_spte) && !is_mmio_spte(new_spte)))
|
||||
if (WARN_ON(!is_mmio_spte(old_spte) &&
|
||||
!is_mmio_spte(new_spte) &&
|
||||
!is_removed_spte(new_spte)))
|
||||
pr_err("Unexpected SPTE change! Nonpresent SPTEs\n"
|
||||
"should not be replaced with another,\n"
|
||||
"different nonpresent SPTE, unless one or both\n"
|
||||
"are MMIO SPTEs.\n"
|
||||
"are MMIO SPTEs, or the new SPTE is\n"
|
||||
"a temporary removed SPTE.\n"
|
||||
"as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d",
|
||||
as_id, gfn, old_spte, new_spte, level);
|
||||
return;
|
||||
@ -321,54 +451,127 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
|
||||
* Recursively handle child PTs if the change removed a subtree from
|
||||
* the paging structure.
|
||||
*/
|
||||
if (was_present && !was_leaf && (pfn_changed || !is_present)) {
|
||||
pt = spte_to_child_pt(old_spte, level);
|
||||
sp = sptep_to_sp(pt);
|
||||
|
||||
trace_kvm_mmu_prepare_zap_page(sp);
|
||||
|
||||
list_del(&sp->link);
|
||||
|
||||
if (sp->lpage_disallowed)
|
||||
unaccount_huge_nx_page(kvm, sp);
|
||||
|
||||
for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
|
||||
old_child_spte = READ_ONCE(*(pt + i));
|
||||
WRITE_ONCE(*(pt + i), 0);
|
||||
handle_changed_spte(kvm, as_id,
|
||||
gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)),
|
||||
old_child_spte, 0, level - 1);
|
||||
}
|
||||
|
||||
kvm_flush_remote_tlbs_with_address(kvm, gfn,
|
||||
KVM_PAGES_PER_HPAGE(level));
|
||||
|
||||
free_page((unsigned long)pt);
|
||||
kmem_cache_free(mmu_page_header_cache, sp);
|
||||
}
|
||||
if (was_present && !was_leaf && (pfn_changed || !is_present))
|
||||
handle_removed_tdp_mmu_page(kvm,
|
||||
spte_to_child_pt(old_spte, level), shared);
|
||||
}
|
||||
|
||||
static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
|
||||
u64 old_spte, u64 new_spte, int level)
|
||||
u64 old_spte, u64 new_spte, int level,
|
||||
bool shared)
|
||||
{
|
||||
__handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level);
|
||||
__handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level,
|
||||
shared);
|
||||
handle_changed_spte_acc_track(old_spte, new_spte, level);
|
||||
handle_changed_spte_dirty_log(kvm, as_id, gfn, old_spte,
|
||||
new_spte, level);
|
||||
}
|
||||
|
||||
static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
|
||||
u64 new_spte, bool record_acc_track,
|
||||
bool record_dirty_log)
|
||||
/*
|
||||
* tdp_mmu_set_spte_atomic - Set a TDP MMU SPTE atomically and handle the
|
||||
* associated bookkeeping
|
||||
*
|
||||
* @kvm: kvm instance
|
||||
* @iter: a tdp_iter instance currently on the SPTE that should be set
|
||||
* @new_spte: The value the SPTE should be set to
|
||||
* Returns: true if the SPTE was set, false if it was not. If false is returned,
|
||||
* this function will have no side-effects.
|
||||
*/
|
||||
static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
|
||||
struct tdp_iter *iter,
|
||||
u64 new_spte)
|
||||
{
|
||||
u64 *root_pt = tdp_iter_root_pt(iter);
|
||||
struct kvm_mmu_page *root = sptep_to_sp(root_pt);
|
||||
int as_id = kvm_mmu_page_as_id(root);
|
||||
|
||||
WRITE_ONCE(*iter->sptep, new_spte);
|
||||
lockdep_assert_held_read(&kvm->mmu_lock);
|
||||
|
||||
/*
|
||||
* Do not change removed SPTEs. Only the thread that froze the SPTE
|
||||
* may modify it.
|
||||
*/
|
||||
if (iter->old_spte == REMOVED_SPTE)
|
||||
return false;
|
||||
|
||||
if (cmpxchg64(rcu_dereference(iter->sptep), iter->old_spte,
|
||||
new_spte) != iter->old_spte)
|
||||
return false;
|
||||
|
||||
handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte,
|
||||
iter->level, true);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
|
||||
struct tdp_iter *iter)
|
||||
{
|
||||
/*
|
||||
* Freeze the SPTE by setting it to a special,
|
||||
* non-present value. This will stop other threads from
|
||||
* immediately installing a present entry in its place
|
||||
* before the TLBs are flushed.
|
||||
*/
|
||||
if (!tdp_mmu_set_spte_atomic(kvm, iter, REMOVED_SPTE))
|
||||
return false;
|
||||
|
||||
kvm_flush_remote_tlbs_with_address(kvm, iter->gfn,
|
||||
KVM_PAGES_PER_HPAGE(iter->level));
|
||||
|
||||
/*
|
||||
* No other thread can overwrite the removed SPTE as they
|
||||
* must either wait on the MMU lock or use
|
||||
* tdp_mmu_set_spte_atomic which will not overrite the
|
||||
* special removed SPTE value. No bookkeeping is needed
|
||||
* here since the SPTE is going from non-present
|
||||
* to non-present.
|
||||
*/
|
||||
WRITE_ONCE(*iter->sptep, 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* __tdp_mmu_set_spte - Set a TDP MMU SPTE and handle the associated bookkeeping
|
||||
* @kvm: kvm instance
|
||||
* @iter: a tdp_iter instance currently on the SPTE that should be set
|
||||
* @new_spte: The value the SPTE should be set to
|
||||
* @record_acc_track: Notify the MM subsystem of changes to the accessed state
|
||||
* of the page. Should be set unless handling an MMU
|
||||
* notifier for access tracking. Leaving record_acc_track
|
||||
* unset in that case prevents page accesses from being
|
||||
* double counted.
|
||||
* @record_dirty_log: Record the page as dirty in the dirty bitmap if
|
||||
* appropriate for the change being made. Should be set
|
||||
* unless performing certain dirty logging operations.
|
||||
* Leaving record_dirty_log unset in that case prevents page
|
||||
* writes from being double counted.
|
||||
*/
|
||||
static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
|
||||
u64 new_spte, bool record_acc_track,
|
||||
bool record_dirty_log)
|
||||
{
|
||||
tdp_ptep_t root_pt = tdp_iter_root_pt(iter);
|
||||
struct kvm_mmu_page *root = sptep_to_sp(root_pt);
|
||||
int as_id = kvm_mmu_page_as_id(root);
|
||||
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
|
||||
/*
|
||||
* No thread should be using this function to set SPTEs to the
|
||||
* temporary removed SPTE value.
|
||||
* If operating under the MMU lock in read mode, tdp_mmu_set_spte_atomic
|
||||
* should be used. If operating under the MMU lock in write mode, the
|
||||
* use of the removed SPTE should not be necessary.
|
||||
*/
|
||||
WARN_ON(iter->old_spte == REMOVED_SPTE);
|
||||
|
||||
WRITE_ONCE(*rcu_dereference(iter->sptep), new_spte);
|
||||
|
||||
__handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte,
|
||||
iter->level);
|
||||
iter->level, false);
|
||||
if (record_acc_track)
|
||||
handle_changed_spte_acc_track(iter->old_spte, new_spte,
|
||||
iter->level);
|
||||
@ -413,27 +616,46 @@ static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm,
|
||||
_mmu->shadow_root_level, _start, _end)
|
||||
|
||||
/*
|
||||
* Flush the TLB if the process should drop kvm->mmu_lock.
|
||||
* Return whether the caller still needs to flush the tlb.
|
||||
* Yield if the MMU lock is contended or this thread needs to return control
|
||||
* to the scheduler.
|
||||
*
|
||||
* If this function should yield and flush is set, it will perform a remote
|
||||
* TLB flush before yielding.
|
||||
*
|
||||
* If this function yields, it will also reset the tdp_iter's walk over the
|
||||
* paging structure and the calling function should skip to the next
|
||||
* iteration to allow the iterator to continue its traversal from the
|
||||
* paging structure root.
|
||||
*
|
||||
* Return true if this function yielded and the iterator's traversal was reset.
|
||||
* Return false if a yield was not needed.
|
||||
*/
|
||||
static bool tdp_mmu_iter_flush_cond_resched(struct kvm *kvm, struct tdp_iter *iter)
|
||||
static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm,
|
||||
struct tdp_iter *iter, bool flush)
|
||||
{
|
||||
if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
cond_resched_lock(&kvm->mmu_lock);
|
||||
tdp_iter_refresh_walk(iter);
|
||||
/* Ensure forward progress has been made before yielding. */
|
||||
if (iter->next_last_level_gfn == iter->yielded_gfn)
|
||||
return false;
|
||||
} else {
|
||||
|
||||
if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
|
||||
rcu_read_unlock();
|
||||
|
||||
if (flush)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
|
||||
cond_resched_rwlock_write(&kvm->mmu_lock);
|
||||
rcu_read_lock();
|
||||
|
||||
WARN_ON(iter->gfn > iter->next_last_level_gfn);
|
||||
|
||||
tdp_iter_start(iter, iter->pt_path[iter->root_level - 1],
|
||||
iter->root_level, iter->min_level,
|
||||
iter->next_last_level_gfn);
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static void tdp_mmu_iter_cond_resched(struct kvm *kvm, struct tdp_iter *iter)
|
||||
{
|
||||
if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
|
||||
cond_resched_lock(&kvm->mmu_lock);
|
||||
tdp_iter_refresh_walk(iter);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -453,7 +675,15 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
struct tdp_iter iter;
|
||||
bool flush_needed = false;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
tdp_root_for_each_pte(iter, root, start, end) {
|
||||
if (can_yield &&
|
||||
tdp_mmu_iter_cond_resched(kvm, &iter, flush_needed)) {
|
||||
flush_needed = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!is_shadow_present_pte(iter.old_spte))
|
||||
continue;
|
||||
|
||||
@ -468,12 +698,10 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
continue;
|
||||
|
||||
tdp_mmu_set_spte(kvm, &iter, 0);
|
||||
|
||||
if (can_yield)
|
||||
flush_needed = tdp_mmu_iter_flush_cond_resched(kvm, &iter);
|
||||
else
|
||||
flush_needed = true;
|
||||
flush_needed = true;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
return flush_needed;
|
||||
}
|
||||
|
||||
@ -517,21 +745,18 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write,
|
||||
int ret = 0;
|
||||
int make_spte_ret = 0;
|
||||
|
||||
if (unlikely(is_noslot_pfn(pfn))) {
|
||||
if (unlikely(is_noslot_pfn(pfn)))
|
||||
new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL);
|
||||
trace_mark_mmio_spte(iter->sptep, iter->gfn, new_spte);
|
||||
} else {
|
||||
else
|
||||
make_spte_ret = make_spte(vcpu, ACC_ALL, iter->level, iter->gfn,
|
||||
pfn, iter->old_spte, prefault, true,
|
||||
map_writable, !shadow_accessed_mask,
|
||||
&new_spte);
|
||||
trace_kvm_mmu_set_spte(iter->level, iter->gfn, iter->sptep);
|
||||
}
|
||||
|
||||
if (new_spte == iter->old_spte)
|
||||
ret = RET_PF_SPURIOUS;
|
||||
else
|
||||
tdp_mmu_set_spte(vcpu->kvm, iter, new_spte);
|
||||
else if (!tdp_mmu_set_spte_atomic(vcpu->kvm, iter, new_spte))
|
||||
return RET_PF_RETRY;
|
||||
|
||||
/*
|
||||
* If the page fault was caused by a write but the page is write
|
||||
@ -545,10 +770,16 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write,
|
||||
}
|
||||
|
||||
/* If a MMIO SPTE is installed, the MMIO will need to be emulated. */
|
||||
if (unlikely(is_mmio_spte(new_spte)))
|
||||
if (unlikely(is_mmio_spte(new_spte))) {
|
||||
trace_mark_mmio_spte(rcu_dereference(iter->sptep), iter->gfn,
|
||||
new_spte);
|
||||
ret = RET_PF_EMULATE;
|
||||
} else
|
||||
trace_kvm_mmu_set_spte(iter->level, iter->gfn,
|
||||
rcu_dereference(iter->sptep));
|
||||
|
||||
trace_kvm_mmu_set_spte(iter->level, iter->gfn, iter->sptep);
|
||||
trace_kvm_mmu_set_spte(iter->level, iter->gfn,
|
||||
rcu_dereference(iter->sptep));
|
||||
if (!prefault)
|
||||
vcpu->stat.pf_fixed++;
|
||||
|
||||
@ -586,6 +817,9 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
|
||||
huge_page_disallowed, &req_level);
|
||||
|
||||
trace_kvm_mmu_spte_requested(gpa, level, pfn);
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
|
||||
if (nx_huge_page_workaround_enabled)
|
||||
disallowed_hugepage_adjust(iter.old_spte, gfn,
|
||||
@ -601,49 +835,61 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
|
||||
*/
|
||||
if (is_shadow_present_pte(iter.old_spte) &&
|
||||
is_large_pte(iter.old_spte)) {
|
||||
tdp_mmu_set_spte(vcpu->kvm, &iter, 0);
|
||||
|
||||
kvm_flush_remote_tlbs_with_address(vcpu->kvm, iter.gfn,
|
||||
KVM_PAGES_PER_HPAGE(iter.level));
|
||||
if (!tdp_mmu_zap_spte_atomic(vcpu->kvm, &iter))
|
||||
break;
|
||||
|
||||
/*
|
||||
* The iter must explicitly re-read the spte here
|
||||
* because the new value informs the !present
|
||||
* path below.
|
||||
*/
|
||||
iter.old_spte = READ_ONCE(*iter.sptep);
|
||||
iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
|
||||
}
|
||||
|
||||
if (!is_shadow_present_pte(iter.old_spte)) {
|
||||
sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level);
|
||||
list_add(&sp->link, &vcpu->kvm->arch.tdp_mmu_pages);
|
||||
child_pt = sp->spt;
|
||||
clear_page(child_pt);
|
||||
|
||||
new_spte = make_nonleaf_spte(child_pt,
|
||||
!shadow_accessed_mask);
|
||||
|
||||
trace_kvm_mmu_get_page(sp, true);
|
||||
if (huge_page_disallowed && req_level >= iter.level)
|
||||
account_huge_nx_page(vcpu->kvm, sp);
|
||||
if (tdp_mmu_set_spte_atomic(vcpu->kvm, &iter,
|
||||
new_spte)) {
|
||||
tdp_mmu_link_page(vcpu->kvm, sp, true,
|
||||
huge_page_disallowed &&
|
||||
req_level >= iter.level);
|
||||
|
||||
tdp_mmu_set_spte(vcpu->kvm, &iter, new_spte);
|
||||
trace_kvm_mmu_get_page(sp, true);
|
||||
} else {
|
||||
tdp_mmu_free_sp(sp);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (WARN_ON(iter.level != level))
|
||||
if (iter.level != level) {
|
||||
rcu_read_unlock();
|
||||
return RET_PF_RETRY;
|
||||
}
|
||||
|
||||
ret = tdp_mmu_map_handle_target_level(vcpu, write, map_writable, &iter,
|
||||
pfn, prefault);
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start,
|
||||
unsigned long end, unsigned long data,
|
||||
int (*handler)(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
struct kvm_mmu_page *root, gfn_t start,
|
||||
gfn_t end, unsigned long data))
|
||||
static __always_inline int
|
||||
kvm_tdp_mmu_handle_hva_range(struct kvm *kvm,
|
||||
unsigned long start,
|
||||
unsigned long end,
|
||||
unsigned long data,
|
||||
int (*handler)(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
struct kvm_mmu_page *root,
|
||||
gfn_t start,
|
||||
gfn_t end,
|
||||
unsigned long data))
|
||||
{
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
@ -705,6 +951,8 @@ static int age_gfn_range(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
int young = 0;
|
||||
u64 new_spte = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
tdp_root_for_each_leaf_pte(iter, root, start, end) {
|
||||
/*
|
||||
* If we have a non-accessed entry we don't need to change the
|
||||
@ -736,6 +984,8 @@ static int age_gfn_range(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
trace_kvm_age_page(iter.gfn, iter.level, slot, young);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
return young;
|
||||
}
|
||||
|
||||
@ -781,6 +1031,8 @@ static int set_tdp_spte(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
u64 new_spte;
|
||||
int need_flush = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
WARN_ON(pte_huge(*ptep));
|
||||
|
||||
new_pfn = pte_pfn(*ptep);
|
||||
@ -809,6 +1061,8 @@ static int set_tdp_spte(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
if (need_flush)
|
||||
kvm_flush_remote_tlbs_with_address(kvm, gfn, 1);
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -832,21 +1086,27 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
u64 new_spte;
|
||||
bool spte_set = false;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL);
|
||||
|
||||
for_each_tdp_pte_min_level(iter, root->spt, root->role.level,
|
||||
min_level, start, end) {
|
||||
if (tdp_mmu_iter_cond_resched(kvm, &iter, false))
|
||||
continue;
|
||||
|
||||
if (!is_shadow_present_pte(iter.old_spte) ||
|
||||
!is_last_spte(iter.old_spte, iter.level))
|
||||
!is_last_spte(iter.old_spte, iter.level) ||
|
||||
!(iter.old_spte & PT_WRITABLE_MASK))
|
||||
continue;
|
||||
|
||||
new_spte = iter.old_spte & ~PT_WRITABLE_MASK;
|
||||
|
||||
tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte);
|
||||
spte_set = true;
|
||||
|
||||
tdp_mmu_iter_cond_resched(kvm, &iter);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
return spte_set;
|
||||
}
|
||||
|
||||
@ -888,7 +1148,12 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
u64 new_spte;
|
||||
bool spte_set = false;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
tdp_root_for_each_leaf_pte(iter, root, start, end) {
|
||||
if (tdp_mmu_iter_cond_resched(kvm, &iter, false))
|
||||
continue;
|
||||
|
||||
if (spte_ad_need_write_protect(iter.old_spte)) {
|
||||
if (is_writable_pte(iter.old_spte))
|
||||
new_spte = iter.old_spte & ~PT_WRITABLE_MASK;
|
||||
@ -903,9 +1168,9 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
|
||||
tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte);
|
||||
spte_set = true;
|
||||
|
||||
tdp_mmu_iter_cond_resched(kvm, &iter);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
return spte_set;
|
||||
}
|
||||
|
||||
@ -947,6 +1212,8 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
struct tdp_iter iter;
|
||||
u64 new_spte;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
tdp_root_for_each_leaf_pte(iter, root, gfn + __ffs(mask),
|
||||
gfn + BITS_PER_LONG) {
|
||||
if (!mask)
|
||||
@ -956,6 +1223,8 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
!(mask & (1UL << (iter.gfn - gfn))))
|
||||
continue;
|
||||
|
||||
mask &= ~(1UL << (iter.gfn - gfn));
|
||||
|
||||
if (wrprot || spte_ad_need_write_protect(iter.old_spte)) {
|
||||
if (is_writable_pte(iter.old_spte))
|
||||
new_spte = iter.old_spte & ~PT_WRITABLE_MASK;
|
||||
@ -969,9 +1238,9 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
}
|
||||
|
||||
tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte);
|
||||
|
||||
mask &= ~(1UL << (iter.gfn - gfn));
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
@ -989,7 +1258,7 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
|
||||
struct kvm_mmu_page *root;
|
||||
int root_as_id;
|
||||
|
||||
lockdep_assert_held(&kvm->mmu_lock);
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
for_each_tdp_mmu_root(kvm, root) {
|
||||
root_as_id = kvm_mmu_page_as_id(root);
|
||||
if (root_as_id != slot->as_id)
|
||||
@ -1011,18 +1280,23 @@ static bool set_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
u64 new_spte;
|
||||
bool spte_set = false;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
tdp_root_for_each_pte(iter, root, start, end) {
|
||||
if (!is_shadow_present_pte(iter.old_spte))
|
||||
if (tdp_mmu_iter_cond_resched(kvm, &iter, false))
|
||||
continue;
|
||||
|
||||
if (!is_shadow_present_pte(iter.old_spte) ||
|
||||
iter.old_spte & shadow_dirty_mask)
|
||||
continue;
|
||||
|
||||
new_spte = iter.old_spte | shadow_dirty_mask;
|
||||
|
||||
tdp_mmu_set_spte(kvm, &iter, new_spte);
|
||||
spte_set = true;
|
||||
|
||||
tdp_mmu_iter_cond_resched(kvm, &iter);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
return spte_set;
|
||||
}
|
||||
|
||||
@ -1060,7 +1334,14 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
|
||||
kvm_pfn_t pfn;
|
||||
bool spte_set = false;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
tdp_root_for_each_pte(iter, root, start, end) {
|
||||
if (tdp_mmu_iter_cond_resched(kvm, &iter, spte_set)) {
|
||||
spte_set = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!is_shadow_present_pte(iter.old_spte) ||
|
||||
!is_last_spte(iter.old_spte, iter.level))
|
||||
continue;
|
||||
@ -1072,9 +1353,10 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
|
||||
|
||||
tdp_mmu_set_spte(kvm, &iter, 0);
|
||||
|
||||
spte_set = tdp_mmu_iter_flush_cond_resched(kvm, &iter);
|
||||
spte_set = true;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
if (spte_set)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
}
|
||||
@ -1111,6 +1393,8 @@ static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
u64 new_spte;
|
||||
bool spte_set = false;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
tdp_root_for_each_leaf_pte(iter, root, gfn, gfn + 1) {
|
||||
if (!is_writable_pte(iter.old_spte))
|
||||
break;
|
||||
@ -1122,6 +1406,8 @@ static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
spte_set = true;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
return spte_set;
|
||||
}
|
||||
|
||||
@ -1137,7 +1423,7 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
|
||||
int root_as_id;
|
||||
bool spte_set = false;
|
||||
|
||||
lockdep_assert_held(&kvm->mmu_lock);
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
for_each_tdp_mmu_root(kvm, root) {
|
||||
root_as_id = kvm_mmu_page_as_id(root);
|
||||
if (root_as_id != slot->as_id)
|
||||
@ -1162,10 +1448,14 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
|
||||
|
||||
*root_level = vcpu->arch.mmu->shadow_root_level;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
|
||||
leaf = iter.level;
|
||||
sptes[leaf] = iter.old_spte;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
return leaf;
|
||||
}
|
||||
|
@ -5,10 +5,6 @@
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
void kvm_mmu_init_tdp_mmu(struct kvm *kvm);
|
||||
void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm);
|
||||
|
||||
bool is_tdp_mmu_root(struct kvm *kvm, hpa_t root);
|
||||
hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu);
|
||||
void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root);
|
||||
|
||||
@ -47,4 +43,32 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
|
||||
int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
|
||||
int *root_level);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
void kvm_mmu_init_tdp_mmu(struct kvm *kvm);
|
||||
void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm);
|
||||
static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return kvm->arch.tdp_mmu_enabled; }
|
||||
static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return sp->tdp_mmu_page; }
|
||||
#else
|
||||
static inline void kvm_mmu_init_tdp_mmu(struct kvm *kvm) {}
|
||||
static inline void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) {}
|
||||
static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return false; }
|
||||
static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return false; }
|
||||
#endif
|
||||
|
||||
static inline bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa)
|
||||
{
|
||||
struct kvm_mmu_page *sp;
|
||||
|
||||
if (!is_tdp_mmu_enabled(kvm))
|
||||
return false;
|
||||
if (WARN_ON(!VALID_PAGE(hpa)))
|
||||
return false;
|
||||
|
||||
sp = to_shadow_page(hpa);
|
||||
if (WARN_ON(!sp))
|
||||
return false;
|
||||
|
||||
return is_tdp_mmu_page(sp) && sp->root_count;
|
||||
}
|
||||
|
||||
#endif /* __KVM_X86_MMU_TDP_MMU_H */
|
||||
|
@ -75,7 +75,7 @@ bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
||||
/* variable MTRRs */
|
||||
WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR));
|
||||
|
||||
mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
|
||||
mask = kvm_vcpu_reserved_gpa_bits_raw(vcpu);
|
||||
if ((msr & 1) == 0) {
|
||||
/* MTRR base */
|
||||
if (!valid_mtrr_type(data & 0xff))
|
||||
@ -351,14 +351,14 @@ static void set_var_mtrr_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
||||
if (var_mtrr_range_is_valid(cur))
|
||||
list_del(&mtrr_state->var_ranges[index].node);
|
||||
|
||||
/* Extend the mask with all 1 bits to the left, since those
|
||||
* bits must implicitly be 0. The bits are then cleared
|
||||
* when reading them.
|
||||
/*
|
||||
* Set all illegal GPA bits in the mask, since those bits must
|
||||
* implicitly be 0. The bits are then cleared when reading them.
|
||||
*/
|
||||
if (!is_mtrr_mask)
|
||||
cur->base = data;
|
||||
else
|
||||
cur->mask = data | (-1LL << cpuid_maxphyaddr(vcpu));
|
||||
cur->mask = data | kvm_vcpu_reserved_gpa_bits_raw(vcpu);
|
||||
|
||||
/* add it to the list if it's enabled. */
|
||||
if (var_mtrr_range_is_valid(cur)) {
|
||||
@ -426,7 +426,7 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
||||
else
|
||||
*pdata = vcpu->arch.mtrr_state.var_ranges[index].mask;
|
||||
|
||||
*pdata &= (1ULL << cpuid_maxphyaddr(vcpu)) - 1;
|
||||
*pdata &= ~kvm_vcpu_reserved_gpa_bits_raw(vcpu);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -373,7 +373,7 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
|
||||
return 1;
|
||||
|
||||
if (!(kvm_read_cr4(vcpu) & X86_CR4_PCE) &&
|
||||
(kvm_x86_ops.get_cpl(vcpu) != 0) &&
|
||||
(static_call(kvm_x86_get_cpl)(vcpu) != 0) &&
|
||||
(kvm_read_cr0(vcpu) & X86_CR0_PE))
|
||||
return 1;
|
||||
|
||||
@ -383,8 +383,11 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
|
||||
|
||||
void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (lapic_in_kernel(vcpu))
|
||||
if (lapic_in_kernel(vcpu)) {
|
||||
if (kvm_x86_ops.pmu_ops->deliver_pmi)
|
||||
kvm_x86_ops.pmu_ops->deliver_pmi(vcpu);
|
||||
kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
|
||||
}
|
||||
}
|
||||
|
||||
bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
|
||||
@ -473,6 +476,9 @@ void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
|
||||
pmc_stop_counter(pmc);
|
||||
}
|
||||
|
||||
if (kvm_x86_ops.pmu_ops->cleanup)
|
||||
kvm_x86_ops.pmu_ops->cleanup(vcpu);
|
||||
|
||||
bitmap_zero(pmu->pmc_in_use, X86_PMC_IDX_MAX);
|
||||
}
|
||||
|
||||
|
@ -39,6 +39,8 @@ struct kvm_pmu_ops {
|
||||
void (*refresh)(struct kvm_vcpu *vcpu);
|
||||
void (*init)(struct kvm_vcpu *vcpu);
|
||||
void (*reset)(struct kvm_vcpu *vcpu);
|
||||
void (*deliver_pmi)(struct kvm_vcpu *vcpu);
|
||||
void (*cleanup)(struct kvm_vcpu *vcpu);
|
||||
};
|
||||
|
||||
static inline u64 pmc_bitmask(struct kvm_pmc *pmc)
|
||||
|
@ -298,6 +298,23 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
|
||||
u32 icrl, u32 icrh)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
int i;
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
bool m = kvm_apic_match_dest(vcpu, source,
|
||||
icrl & APIC_SHORT_MASK,
|
||||
GET_APIC_DEST_FIELD(icrh),
|
||||
icrl & APIC_DEST_MASK);
|
||||
|
||||
if (m && !avic_vcpu_is_running(vcpu))
|
||||
kvm_vcpu_wake_up(vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
|
||||
@ -324,28 +341,14 @@ int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
|
||||
kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
|
||||
kvm_lapic_reg_write(apic, APIC_ICR, icrl);
|
||||
break;
|
||||
case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
|
||||
int i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvm *kvm = svm->vcpu.kvm;
|
||||
struct kvm_lapic *apic = svm->vcpu.arch.apic;
|
||||
|
||||
case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING:
|
||||
/*
|
||||
* At this point, we expect that the AVIC HW has already
|
||||
* set the appropriate IRR bits on the valid target
|
||||
* vcpus. So, we just need to kick the appropriate vcpu.
|
||||
*/
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
bool m = kvm_apic_match_dest(vcpu, apic,
|
||||
icrl & APIC_SHORT_MASK,
|
||||
GET_APIC_DEST_FIELD(icrh),
|
||||
icrl & APIC_DEST_MASK);
|
||||
|
||||
if (m && !avic_vcpu_is_running(vcpu))
|
||||
kvm_vcpu_wake_up(vcpu);
|
||||
}
|
||||
avic_kick_target_vcpus(svm->vcpu.kvm, apic, icrl, icrh);
|
||||
break;
|
||||
}
|
||||
case AVIC_IPI_FAILURE_INVALID_TARGET:
|
||||
WARN_ONCE(1, "Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x\n",
|
||||
index, svm->vcpu.vcpu_id, icrh, icrl);
|
||||
|
@ -58,7 +58,7 @@ static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
|
||||
u64 pdpte;
|
||||
int ret;
|
||||
|
||||
ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(__sme_clr(cr3)), &pdpte,
|
||||
ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte,
|
||||
offset_in_page(cr3) + index * 8, 8);
|
||||
if (ret)
|
||||
return 0;
|
||||
@ -248,7 +248,7 @@ static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb12)
|
||||
if (vmcb12_lma) {
|
||||
if (!(vmcb12->save.cr4 & X86_CR4_PAE) ||
|
||||
!(vmcb12->save.cr0 & X86_CR0_PE) ||
|
||||
(vmcb12->save.cr3 & vcpu->arch.cr3_lm_rsvd_bits))
|
||||
kvm_vcpu_is_illegal_gpa(vcpu, vmcb12->save.cr3))
|
||||
return false;
|
||||
}
|
||||
if (!kvm_is_valid_cr4(&svm->vcpu, vmcb12->save.cr4))
|
||||
@ -345,7 +345,7 @@ static inline bool nested_npt_enabled(struct vcpu_svm *svm)
|
||||
static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
|
||||
bool nested_npt)
|
||||
{
|
||||
if (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63))
|
||||
if (kvm_vcpu_is_illegal_gpa(vcpu, cr3))
|
||||
return -EINVAL;
|
||||
|
||||
if (!nested_npt && is_pae_paging(vcpu) &&
|
||||
@ -392,7 +392,7 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
|
||||
svm->vmcb->save.rsp = vmcb12->save.rsp;
|
||||
svm->vmcb->save.rip = vmcb12->save.rip;
|
||||
svm->vmcb->save.dr7 = vmcb12->save.dr7 | DR7_FIXED_1;
|
||||
svm->vcpu.arch.dr6 = vmcb12->save.dr6 | DR6_FIXED_1 | DR6_RTM;
|
||||
svm->vcpu.arch.dr6 = vmcb12->save.dr6 | DR6_ACTIVE_LOW;
|
||||
svm->vmcb->save.cpl = vmcb12->save.cpl;
|
||||
}
|
||||
|
||||
|
@ -22,6 +22,7 @@
|
||||
|
||||
#include "x86.h"
|
||||
#include "svm.h"
|
||||
#include "svm_ops.h"
|
||||
#include "cpuid.h"
|
||||
#include "trace.h"
|
||||
|
||||
@ -1041,6 +1042,74 @@ e_unpin_memory:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
||||
{
|
||||
void __user *report = (void __user *)(uintptr_t)argp->data;
|
||||
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
||||
struct sev_data_attestation_report *data;
|
||||
struct kvm_sev_attestation_report params;
|
||||
void __user *p;
|
||||
void *blob = NULL;
|
||||
int ret;
|
||||
|
||||
if (!sev_guest(kvm))
|
||||
return -ENOTTY;
|
||||
|
||||
if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params)))
|
||||
return -EFAULT;
|
||||
|
||||
data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
|
||||
if (!data)
|
||||
return -ENOMEM;
|
||||
|
||||
/* User wants to query the blob length */
|
||||
if (!params.len)
|
||||
goto cmd;
|
||||
|
||||
p = (void __user *)(uintptr_t)params.uaddr;
|
||||
if (p) {
|
||||
if (params.len > SEV_FW_BLOB_MAX_SIZE) {
|
||||
ret = -EINVAL;
|
||||
goto e_free;
|
||||
}
|
||||
|
||||
ret = -ENOMEM;
|
||||
blob = kmalloc(params.len, GFP_KERNEL);
|
||||
if (!blob)
|
||||
goto e_free;
|
||||
|
||||
data->address = __psp_pa(blob);
|
||||
data->len = params.len;
|
||||
memcpy(data->mnonce, params.mnonce, sizeof(params.mnonce));
|
||||
}
|
||||
cmd:
|
||||
data->handle = sev->handle;
|
||||
ret = sev_issue_cmd(kvm, SEV_CMD_ATTESTATION_REPORT, data, &argp->error);
|
||||
/*
|
||||
* If we query the session length, FW responded with expected data.
|
||||
*/
|
||||
if (!params.len)
|
||||
goto done;
|
||||
|
||||
if (ret)
|
||||
goto e_free_blob;
|
||||
|
||||
if (blob) {
|
||||
if (copy_to_user(p, blob, params.len))
|
||||
ret = -EFAULT;
|
||||
}
|
||||
|
||||
done:
|
||||
params.len = data->len;
|
||||
if (copy_to_user(report, ¶ms, sizeof(params)))
|
||||
ret = -EFAULT;
|
||||
e_free_blob:
|
||||
kfree(blob);
|
||||
e_free:
|
||||
kfree(data);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
|
||||
{
|
||||
struct kvm_sev_cmd sev_cmd;
|
||||
@ -1091,6 +1160,9 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
|
||||
case KVM_SEV_LAUNCH_SECRET:
|
||||
r = sev_launch_secret(kvm, &sev_cmd);
|
||||
break;
|
||||
case KVM_SEV_GET_ATTESTATION_REPORT:
|
||||
r = sev_get_attestation_report(kvm, &sev_cmd);
|
||||
break;
|
||||
default:
|
||||
r = -EINVAL;
|
||||
goto out;
|
||||
@ -1994,29 +2066,17 @@ void sev_es_create_vcpu(struct vcpu_svm *svm)
|
||||
sev_enc_bit));
|
||||
}
|
||||
|
||||
void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu)
|
||||
void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu)
|
||||
{
|
||||
struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
|
||||
struct vmcb_save_area *hostsa;
|
||||
unsigned int i;
|
||||
|
||||
/*
|
||||
* As an SEV-ES guest, hardware will restore the host state on VMEXIT,
|
||||
* of which one step is to perform a VMLOAD. Since hardware does not
|
||||
* perform a VMSAVE on VMRUN, the host savearea must be updated.
|
||||
*/
|
||||
asm volatile(__ex("vmsave %0") : : "a" (__sme_page_pa(sd->save_area)) : "memory");
|
||||
|
||||
/*
|
||||
* Certain MSRs are restored on VMEXIT, only save ones that aren't
|
||||
* restored.
|
||||
*/
|
||||
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) {
|
||||
if (host_save_user_msrs[i].sev_es_restored)
|
||||
continue;
|
||||
|
||||
rdmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]);
|
||||
}
|
||||
vmsave(__sme_page_pa(sd->save_area));
|
||||
|
||||
/* XCR0 is restored on VMEXIT, save the current host value */
|
||||
hostsa = (struct vmcb_save_area *)(page_address(sd->save_area) + 0x400);
|
||||
@ -2029,22 +2089,6 @@ void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu)
|
||||
hostsa->xss = host_xss;
|
||||
}
|
||||
|
||||
void sev_es_vcpu_put(struct vcpu_svm *svm)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
/*
|
||||
* Certain MSRs are restored on VMEXIT and were saved with vmsave in
|
||||
* sev_es_vcpu_load() above. Only restore ones that weren't.
|
||||
*/
|
||||
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) {
|
||||
if (host_save_user_msrs[i].sev_es_restored)
|
||||
continue;
|
||||
|
||||
wrmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
@ -41,6 +41,7 @@
|
||||
#include "trace.h"
|
||||
|
||||
#include "svm.h"
|
||||
#include "svm_ops.h"
|
||||
|
||||
#define __ex(x) __kvm_handle_fault_on_reboot(x)
|
||||
|
||||
@ -200,9 +201,9 @@ module_param(sev_es, int, 0444);
|
||||
bool __read_mostly dump_invalid_vmcb;
|
||||
module_param(dump_invalid_vmcb, bool, 0644);
|
||||
|
||||
static u8 rsm_ins_bytes[] = "\x0f\xaa";
|
||||
static bool svm_gp_erratum_intercept = true;
|
||||
|
||||
static void svm_complete_interrupts(struct vcpu_svm *svm);
|
||||
static u8 rsm_ins_bytes[] = "\x0f\xaa";
|
||||
|
||||
static unsigned long iopm_base;
|
||||
|
||||
@ -246,21 +247,6 @@ u32 svm_msrpm_offset(u32 msr)
|
||||
|
||||
#define MAX_INST_SIZE 15
|
||||
|
||||
static inline void clgi(void)
|
||||
{
|
||||
asm volatile (__ex("clgi"));
|
||||
}
|
||||
|
||||
static inline void stgi(void)
|
||||
{
|
||||
asm volatile (__ex("stgi"));
|
||||
}
|
||||
|
||||
static inline void invlpga(unsigned long addr, u32 asid)
|
||||
{
|
||||
asm volatile (__ex("invlpga %1, %0") : : "c"(asid), "a"(addr));
|
||||
}
|
||||
|
||||
static int get_max_npt_level(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
@ -288,6 +274,9 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
|
||||
if (!(efer & EFER_SVME)) {
|
||||
svm_leave_nested(svm);
|
||||
svm_set_gif(svm, true);
|
||||
/* #GP intercept is still needed for vmware backdoor */
|
||||
if (!enable_vmware_backdoor)
|
||||
clr_exception_intercept(svm, GP_VECTOR);
|
||||
|
||||
/*
|
||||
* Free the nested guest state, unless we are in SMM.
|
||||
@ -304,6 +293,9 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
|
||||
vcpu->arch.efer = old_efer;
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (svm_gp_erratum_intercept)
|
||||
set_exception_intercept(svm, GP_VECTOR);
|
||||
}
|
||||
}
|
||||
|
||||
@ -925,6 +917,9 @@ static __init void svm_set_cpu_caps(void)
|
||||
|
||||
if (npt_enabled)
|
||||
kvm_cpu_cap_set(X86_FEATURE_NPT);
|
||||
|
||||
/* Nested VM can receive #VMEXIT instead of triggering #GP */
|
||||
kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
|
||||
}
|
||||
|
||||
/* CPUID 0x80000008 */
|
||||
@ -1032,6 +1027,9 @@ static __init int svm_hardware_setup(void)
|
||||
}
|
||||
}
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_SVME_ADDR_CHK))
|
||||
svm_gp_erratum_intercept = false;
|
||||
|
||||
if (vgif) {
|
||||
if (!boot_cpu_has(X86_FEATURE_VGIF))
|
||||
vgif = false;
|
||||
@ -1207,7 +1205,7 @@ static void init_vmcb(struct vcpu_svm *svm)
|
||||
|
||||
svm_set_efer(&svm->vcpu, 0);
|
||||
save->dr6 = 0xffff0ff0;
|
||||
kvm_set_rflags(&svm->vcpu, 2);
|
||||
kvm_set_rflags(&svm->vcpu, X86_EFLAGS_FIXED);
|
||||
save->rip = 0x0000fff0;
|
||||
svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
|
||||
|
||||
@ -1366,6 +1364,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
|
||||
svm->vmsa = page_address(vmsa_page);
|
||||
|
||||
svm->asid_generation = 0;
|
||||
svm->guest_state_loaded = false;
|
||||
init_vmcb(svm);
|
||||
|
||||
svm_init_osvw(vcpu);
|
||||
@ -1413,30 +1412,31 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
|
||||
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
|
||||
}
|
||||
|
||||
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
|
||||
int i;
|
||||
struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
|
||||
unsigned int i;
|
||||
|
||||
if (unlikely(cpu != vcpu->cpu)) {
|
||||
svm->asid_generation = 0;
|
||||
vmcb_mark_all_dirty(svm->vmcb);
|
||||
}
|
||||
if (svm->guest_state_loaded)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Certain MSRs are restored on VMEXIT (sev-es), or vmload of host save
|
||||
* area (non-sev-es). Save ones that aren't so we can restore them
|
||||
* individually later.
|
||||
*/
|
||||
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
|
||||
rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
|
||||
|
||||
/*
|
||||
* Save additional host state that will be restored on VMEXIT (sev-es)
|
||||
* or subsequent vmload of host save area.
|
||||
*/
|
||||
if (sev_es_guest(svm->vcpu.kvm)) {
|
||||
sev_es_vcpu_load(svm, cpu);
|
||||
sev_es_prepare_guest_switch(svm, vcpu->cpu);
|
||||
} else {
|
||||
#ifdef CONFIG_X86_64
|
||||
rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base);
|
||||
#endif
|
||||
savesegment(fs, svm->host.fs);
|
||||
savesegment(gs, svm->host.gs);
|
||||
svm->host.ldt = kvm_read_ldt();
|
||||
|
||||
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
|
||||
rdmsrl(host_save_user_msrs[i].index,
|
||||
svm->host_user_msrs[i]);
|
||||
vmsave(__sme_page_pa(sd->save_area));
|
||||
}
|
||||
|
||||
if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
|
||||
@ -1446,10 +1446,42 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio);
|
||||
}
|
||||
}
|
||||
|
||||
/* This assumes that the kernel never uses MSR_TSC_AUX */
|
||||
if (static_cpu_has(X86_FEATURE_RDTSCP))
|
||||
wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
|
||||
|
||||
svm->guest_state_loaded = true;
|
||||
}
|
||||
|
||||
static void svm_prepare_host_switch(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
unsigned int i;
|
||||
|
||||
if (!svm->guest_state_loaded)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Certain MSRs are restored on VMEXIT (sev-es), or vmload of host save
|
||||
* area (non-sev-es). Restore the ones that weren't.
|
||||
*/
|
||||
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
|
||||
wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
|
||||
|
||||
svm->guest_state_loaded = false;
|
||||
}
|
||||
|
||||
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
|
||||
|
||||
if (unlikely(cpu != vcpu->cpu)) {
|
||||
svm->asid_generation = 0;
|
||||
vmcb_mark_all_dirty(svm->vmcb);
|
||||
}
|
||||
|
||||
if (sd->current_vmcb != svm->vmcb) {
|
||||
sd->current_vmcb = svm->vmcb;
|
||||
indirect_branch_prediction_barrier();
|
||||
@ -1459,30 +1491,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
|
||||
static void svm_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
int i;
|
||||
|
||||
avic_vcpu_put(vcpu);
|
||||
svm_prepare_host_switch(vcpu);
|
||||
|
||||
++vcpu->stat.host_state_reload;
|
||||
if (sev_es_guest(svm->vcpu.kvm)) {
|
||||
sev_es_vcpu_put(svm);
|
||||
} else {
|
||||
kvm_load_ldt(svm->host.ldt);
|
||||
#ifdef CONFIG_X86_64
|
||||
loadsegment(fs, svm->host.fs);
|
||||
wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase);
|
||||
load_gs_index(svm->host.gs);
|
||||
#else
|
||||
#ifdef CONFIG_X86_32_LAZY_GS
|
||||
loadsegment(gs, svm->host.gs);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
|
||||
wrmsrl(host_save_user_msrs[i].index,
|
||||
svm->host_user_msrs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
|
||||
@ -1815,7 +1827,7 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
|
||||
vmcb_mark_dirty(svm->vmcb, VMCB_SEG);
|
||||
}
|
||||
|
||||
static void update_exception_bitmap(struct kvm_vcpu *vcpu)
|
||||
static void svm_update_exception_bitmap(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
@ -1865,7 +1877,7 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
|
||||
get_debugreg(vcpu->arch.db[2], 2);
|
||||
get_debugreg(vcpu->arch.db[3], 3);
|
||||
/*
|
||||
* We cannot reset svm->vmcb->save.dr6 to DR6_FIXED_1|DR6_RTM here,
|
||||
* We cannot reset svm->vmcb->save.dr6 to DR6_ACTIVE_LOW here,
|
||||
* because db_interception might need it. We can do it before vmentry.
|
||||
*/
|
||||
vcpu->arch.dr6 = svm->vmcb->save.dr6;
|
||||
@ -1916,7 +1928,7 @@ static int db_interception(struct vcpu_svm *svm)
|
||||
if (!(svm->vcpu.guest_debug &
|
||||
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
|
||||
!svm->nmi_singlestep) {
|
||||
u32 payload = (svm->vmcb->save.dr6 ^ DR6_RTM) & ~DR6_FIXED_1;
|
||||
u32 payload = svm->vmcb->save.dr6 ^ DR6_ACTIVE_LOW;
|
||||
kvm_queue_exception_p(&svm->vcpu, DB_VECTOR, payload);
|
||||
return 1;
|
||||
}
|
||||
@ -1962,24 +1974,6 @@ static int ac_interception(struct vcpu_svm *svm)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int gp_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = &svm->vcpu;
|
||||
u32 error_code = svm->vmcb->control.exit_info_1;
|
||||
|
||||
WARN_ON_ONCE(!enable_vmware_backdoor);
|
||||
|
||||
/*
|
||||
* VMware backdoor emulation on #GP interception only handles IN{S},
|
||||
* OUT{S}, and RDPMC, none of which generate a non-zero error code.
|
||||
*/
|
||||
if (error_code) {
|
||||
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
|
||||
return 1;
|
||||
}
|
||||
return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
|
||||
}
|
||||
|
||||
static bool is_erratum_383(void)
|
||||
{
|
||||
int err, i;
|
||||
@ -2178,6 +2172,102 @@ static int vmrun_interception(struct vcpu_svm *svm)
|
||||
return nested_svm_vmrun(svm);
|
||||
}
|
||||
|
||||
enum {
|
||||
NONE_SVM_INSTR,
|
||||
SVM_INSTR_VMRUN,
|
||||
SVM_INSTR_VMLOAD,
|
||||
SVM_INSTR_VMSAVE,
|
||||
};
|
||||
|
||||
/* Return NONE_SVM_INSTR if not SVM instrs, otherwise return decode result */
|
||||
static int svm_instr_opcode(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
|
||||
|
||||
if (ctxt->b != 0x1 || ctxt->opcode_len != 2)
|
||||
return NONE_SVM_INSTR;
|
||||
|
||||
switch (ctxt->modrm) {
|
||||
case 0xd8: /* VMRUN */
|
||||
return SVM_INSTR_VMRUN;
|
||||
case 0xda: /* VMLOAD */
|
||||
return SVM_INSTR_VMLOAD;
|
||||
case 0xdb: /* VMSAVE */
|
||||
return SVM_INSTR_VMSAVE;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return NONE_SVM_INSTR;
|
||||
}
|
||||
|
||||
static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
|
||||
{
|
||||
const int guest_mode_exit_codes[] = {
|
||||
[SVM_INSTR_VMRUN] = SVM_EXIT_VMRUN,
|
||||
[SVM_INSTR_VMLOAD] = SVM_EXIT_VMLOAD,
|
||||
[SVM_INSTR_VMSAVE] = SVM_EXIT_VMSAVE,
|
||||
};
|
||||
int (*const svm_instr_handlers[])(struct vcpu_svm *svm) = {
|
||||
[SVM_INSTR_VMRUN] = vmrun_interception,
|
||||
[SVM_INSTR_VMLOAD] = vmload_interception,
|
||||
[SVM_INSTR_VMSAVE] = vmsave_interception,
|
||||
};
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
if (is_guest_mode(vcpu)) {
|
||||
svm->vmcb->control.exit_code = guest_mode_exit_codes[opcode];
|
||||
svm->vmcb->control.exit_info_1 = 0;
|
||||
svm->vmcb->control.exit_info_2 = 0;
|
||||
|
||||
return nested_svm_vmexit(svm);
|
||||
} else
|
||||
return svm_instr_handlers[opcode](svm);
|
||||
}
|
||||
|
||||
/*
|
||||
* #GP handling code. Note that #GP can be triggered under the following two
|
||||
* cases:
|
||||
* 1) SVM VM-related instructions (VMRUN/VMSAVE/VMLOAD) that trigger #GP on
|
||||
* some AMD CPUs when EAX of these instructions are in the reserved memory
|
||||
* regions (e.g. SMM memory on host).
|
||||
* 2) VMware backdoor
|
||||
*/
|
||||
static int gp_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = &svm->vcpu;
|
||||
u32 error_code = svm->vmcb->control.exit_info_1;
|
||||
int opcode;
|
||||
|
||||
/* Both #GP cases have zero error_code */
|
||||
if (error_code)
|
||||
goto reinject;
|
||||
|
||||
/* Decode the instruction for usage later */
|
||||
if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK)
|
||||
goto reinject;
|
||||
|
||||
opcode = svm_instr_opcode(vcpu);
|
||||
|
||||
if (opcode == NONE_SVM_INSTR) {
|
||||
if (!enable_vmware_backdoor)
|
||||
goto reinject;
|
||||
|
||||
/*
|
||||
* VMware backdoor emulation on #GP interception only handles
|
||||
* IN{S}, OUT{S}, and RDPMC.
|
||||
*/
|
||||
if (!is_guest_mode(vcpu))
|
||||
return kvm_emulate_instruction(vcpu,
|
||||
EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE);
|
||||
} else
|
||||
return emulate_svm_instr(vcpu, opcode);
|
||||
|
||||
reinject:
|
||||
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
|
||||
return 1;
|
||||
}
|
||||
|
||||
void svm_set_gif(struct vcpu_svm *svm, bool value)
|
||||
{
|
||||
if (value) {
|
||||
@ -2265,11 +2355,8 @@ static int xsetbv_interception(struct vcpu_svm *svm)
|
||||
u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
|
||||
u32 index = kvm_rcx_read(&svm->vcpu);
|
||||
|
||||
if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
|
||||
return kvm_skip_emulated_instruction(&svm->vcpu);
|
||||
}
|
||||
|
||||
return 1;
|
||||
int err = kvm_set_xcr(&svm->vcpu, index, new_bv);
|
||||
return kvm_complete_insn_gp(&svm->vcpu, err);
|
||||
}
|
||||
|
||||
static int rdpru_interception(struct vcpu_svm *svm)
|
||||
@ -2530,6 +2617,7 @@ static int dr_interception(struct vcpu_svm *svm)
|
||||
{
|
||||
int reg, dr;
|
||||
unsigned long val;
|
||||
int err = 0;
|
||||
|
||||
if (svm->vcpu.guest_debug == 0) {
|
||||
/*
|
||||
@ -2547,20 +2635,16 @@ static int dr_interception(struct vcpu_svm *svm)
|
||||
|
||||
reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
|
||||
dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
|
||||
|
||||
if (dr >= 16) { /* mov to DRn */
|
||||
if (!kvm_require_dr(&svm->vcpu, dr - 16))
|
||||
return 1;
|
||||
if (dr >= 16) { /* mov to DRn */
|
||||
dr -= 16;
|
||||
val = kvm_register_read(&svm->vcpu, reg);
|
||||
kvm_set_dr(&svm->vcpu, dr - 16, val);
|
||||
err = kvm_set_dr(&svm->vcpu, dr, val);
|
||||
} else {
|
||||
if (!kvm_require_dr(&svm->vcpu, dr))
|
||||
return 1;
|
||||
kvm_get_dr(&svm->vcpu, dr, &val);
|
||||
kvm_register_write(&svm->vcpu, reg, val);
|
||||
}
|
||||
|
||||
return kvm_skip_emulated_instruction(&svm->vcpu);
|
||||
return kvm_complete_insn_gp(&svm->vcpu, err);
|
||||
}
|
||||
|
||||
static int cr8_write_interception(struct vcpu_svm *svm)
|
||||
@ -3354,7 +3438,7 @@ static void svm_set_irq(struct kvm_vcpu *vcpu)
|
||||
SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
|
||||
}
|
||||
|
||||
static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
|
||||
static void svm_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
@ -3479,7 +3563,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
|
||||
return !svm_interrupt_blocked(vcpu);
|
||||
}
|
||||
|
||||
static void enable_irq_window(struct kvm_vcpu *vcpu)
|
||||
static void svm_enable_irq_window(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
@ -3503,7 +3587,7 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
}
|
||||
|
||||
static void enable_nmi_window(struct kvm_vcpu *vcpu)
|
||||
static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
@ -3560,10 +3644,6 @@ static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
|
||||
invlpga(gva, svm->vmcb->control.asid);
|
||||
}
|
||||
|
||||
static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
@ -3708,16 +3788,11 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu,
|
||||
if (sev_es_guest(svm->vcpu.kvm)) {
|
||||
__svm_sev_es_vcpu_run(svm->vmcb_pa);
|
||||
} else {
|
||||
struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
|
||||
|
||||
__svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
native_wrmsrl(MSR_GS_BASE, svm->host.gs_base);
|
||||
#else
|
||||
loadsegment(fs, svm->host.fs);
|
||||
#ifndef CONFIG_X86_32_LAZY_GS
|
||||
loadsegment(gs, svm->host.gs);
|
||||
#endif
|
||||
#endif
|
||||
vmload(__sme_page_pa(sd->save_area));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3783,7 +3858,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
if (unlikely(svm->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))
|
||||
svm_set_dr6(svm, vcpu->arch.dr6);
|
||||
else
|
||||
svm_set_dr6(svm, DR6_FIXED_1 | DR6_RTM);
|
||||
svm_set_dr6(svm, DR6_ACTIVE_LOW);
|
||||
|
||||
clgi();
|
||||
kvm_load_guest_xsave_state(vcpu);
|
||||
@ -3978,7 +4053,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
||||
if (sev_guest(vcpu->kvm)) {
|
||||
best = kvm_find_cpuid_entry(vcpu, 0x8000001F, 0);
|
||||
if (best)
|
||||
vcpu->arch.cr3_lm_rsvd_bits &= ~(1UL << (best->ebx & 0x3f));
|
||||
vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f));
|
||||
}
|
||||
|
||||
if (!kvm_vcpu_apicv_active(vcpu))
|
||||
@ -4285,7 +4360,7 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void enable_smi_window(struct kvm_vcpu *vcpu)
|
||||
static void svm_enable_smi_window(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
@ -4439,7 +4514,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
|
||||
.vcpu_blocking = svm_vcpu_blocking,
|
||||
.vcpu_unblocking = svm_vcpu_unblocking,
|
||||
|
||||
.update_exception_bitmap = update_exception_bitmap,
|
||||
.update_exception_bitmap = svm_update_exception_bitmap,
|
||||
.get_msr_feature = svm_get_msr_feature,
|
||||
.get_msr = svm_get_msr,
|
||||
.set_msr = svm_set_msr,
|
||||
@ -4482,9 +4557,9 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
|
||||
.nmi_allowed = svm_nmi_allowed,
|
||||
.get_nmi_mask = svm_get_nmi_mask,
|
||||
.set_nmi_mask = svm_set_nmi_mask,
|
||||
.enable_nmi_window = enable_nmi_window,
|
||||
.enable_irq_window = enable_irq_window,
|
||||
.update_cr8_intercept = update_cr8_intercept,
|
||||
.enable_nmi_window = svm_enable_nmi_window,
|
||||
.enable_irq_window = svm_enable_irq_window,
|
||||
.update_cr8_intercept = svm_update_cr8_intercept,
|
||||
.set_virtual_apic_mode = svm_set_virtual_apic_mode,
|
||||
.refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
|
||||
.check_apicv_inhibit_reasons = svm_check_apicv_inhibit_reasons,
|
||||
@ -4527,7 +4602,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
|
||||
.smi_allowed = svm_smi_allowed,
|
||||
.pre_enter_smm = svm_pre_enter_smm,
|
||||
.pre_leave_smm = svm_pre_leave_smm,
|
||||
.enable_smi_window = enable_smi_window,
|
||||
.enable_smi_window = svm_enable_smi_window,
|
||||
|
||||
.mem_enc_op = svm_mem_enc_op,
|
||||
.mem_enc_reg_region = svm_register_enc_region,
|
||||
|
@ -23,22 +23,8 @@
|
||||
|
||||
#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
|
||||
|
||||
static const struct svm_host_save_msrs {
|
||||
u32 index; /* Index of the MSR */
|
||||
bool sev_es_restored; /* True if MSR is restored on SEV-ES VMEXIT */
|
||||
} host_save_user_msrs[] = {
|
||||
#ifdef CONFIG_X86_64
|
||||
{ .index = MSR_STAR, .sev_es_restored = true },
|
||||
{ .index = MSR_LSTAR, .sev_es_restored = true },
|
||||
{ .index = MSR_CSTAR, .sev_es_restored = true },
|
||||
{ .index = MSR_SYSCALL_MASK, .sev_es_restored = true },
|
||||
{ .index = MSR_KERNEL_GS_BASE, .sev_es_restored = true },
|
||||
{ .index = MSR_FS_BASE, .sev_es_restored = true },
|
||||
#endif
|
||||
{ .index = MSR_IA32_SYSENTER_CS, .sev_es_restored = true },
|
||||
{ .index = MSR_IA32_SYSENTER_ESP, .sev_es_restored = true },
|
||||
{ .index = MSR_IA32_SYSENTER_EIP, .sev_es_restored = true },
|
||||
{ .index = MSR_TSC_AUX, .sev_es_restored = false },
|
||||
static const u32 host_save_user_msrs[] = {
|
||||
MSR_TSC_AUX,
|
||||
};
|
||||
#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
|
||||
|
||||
@ -130,12 +116,6 @@ struct vcpu_svm {
|
||||
u64 next_rip;
|
||||
|
||||
u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
|
||||
struct {
|
||||
u16 fs;
|
||||
u16 gs;
|
||||
u16 ldt;
|
||||
u64 gs_base;
|
||||
} host;
|
||||
|
||||
u64 spec_ctrl;
|
||||
/*
|
||||
@ -192,6 +172,8 @@ struct vcpu_svm {
|
||||
u64 ghcb_sa_len;
|
||||
bool ghcb_sa_sync;
|
||||
bool ghcb_sa_free;
|
||||
|
||||
bool guest_state_loaded;
|
||||
};
|
||||
|
||||
struct svm_cpu_data {
|
||||
@ -587,9 +569,8 @@ int sev_handle_vmgexit(struct vcpu_svm *svm);
|
||||
int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
|
||||
void sev_es_init_vmcb(struct vcpu_svm *svm);
|
||||
void sev_es_create_vcpu(struct vcpu_svm *svm);
|
||||
void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu);
|
||||
void sev_es_vcpu_put(struct vcpu_svm *svm);
|
||||
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
|
||||
void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu);
|
||||
|
||||
/* vmenter.S */
|
||||
|
||||
|
69
arch/x86/kvm/svm/svm_ops.h
Normal file
69
arch/x86/kvm/svm/svm_ops.h
Normal file
@ -0,0 +1,69 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __KVM_X86_SVM_OPS_H
|
||||
#define __KVM_X86_SVM_OPS_H
|
||||
|
||||
#include <linux/compiler_types.h>
|
||||
|
||||
#include <asm/kvm_host.h>
|
||||
|
||||
#define svm_asm(insn, clobber...) \
|
||||
do { \
|
||||
asm_volatile_goto("1: " __stringify(insn) "\n\t" \
|
||||
_ASM_EXTABLE(1b, %l[fault]) \
|
||||
::: clobber : fault); \
|
||||
return; \
|
||||
fault: \
|
||||
kvm_spurious_fault(); \
|
||||
} while (0)
|
||||
|
||||
#define svm_asm1(insn, op1, clobber...) \
|
||||
do { \
|
||||
asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \
|
||||
_ASM_EXTABLE(1b, %l[fault]) \
|
||||
:: op1 : clobber : fault); \
|
||||
return; \
|
||||
fault: \
|
||||
kvm_spurious_fault(); \
|
||||
} while (0)
|
||||
|
||||
#define svm_asm2(insn, op1, op2, clobber...) \
|
||||
do { \
|
||||
asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \
|
||||
_ASM_EXTABLE(1b, %l[fault]) \
|
||||
:: op1, op2 : clobber : fault); \
|
||||
return; \
|
||||
fault: \
|
||||
kvm_spurious_fault(); \
|
||||
} while (0)
|
||||
|
||||
static inline void clgi(void)
|
||||
{
|
||||
svm_asm(clgi);
|
||||
}
|
||||
|
||||
static inline void stgi(void)
|
||||
{
|
||||
svm_asm(stgi);
|
||||
}
|
||||
|
||||
static inline void invlpga(unsigned long addr, u32 asid)
|
||||
{
|
||||
svm_asm2(invlpga, "c"(asid), "a"(addr));
|
||||
}
|
||||
|
||||
/*
|
||||
* Despite being a physical address, the portion of rAX that is consumed by
|
||||
* VMSAVE, VMLOAD, etc... is still controlled by the effective address size,
|
||||
* hence 'unsigned long' instead of 'hpa_t'.
|
||||
*/
|
||||
static inline void vmsave(unsigned long pa)
|
||||
{
|
||||
svm_asm1(vmsave, "a" (pa), "memory");
|
||||
}
|
||||
|
||||
static inline void vmload(unsigned long pa)
|
||||
{
|
||||
svm_asm1(vmload, "a" (pa), "memory");
|
||||
}
|
||||
|
||||
#endif /* __KVM_X86_SVM_OPS_H */
|
@ -92,6 +92,42 @@ TRACE_EVENT(kvm_hv_hypercall,
|
||||
__entry->outgpa)
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for Xen hypercall.
|
||||
*/
|
||||
TRACE_EVENT(kvm_xen_hypercall,
|
||||
TP_PROTO(unsigned long nr, unsigned long a0, unsigned long a1,
|
||||
unsigned long a2, unsigned long a3, unsigned long a4,
|
||||
unsigned long a5),
|
||||
TP_ARGS(nr, a0, a1, a2, a3, a4, a5),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned long, nr)
|
||||
__field(unsigned long, a0)
|
||||
__field(unsigned long, a1)
|
||||
__field(unsigned long, a2)
|
||||
__field(unsigned long, a3)
|
||||
__field(unsigned long, a4)
|
||||
__field(unsigned long, a5)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->nr = nr;
|
||||
__entry->a0 = a0;
|
||||
__entry->a1 = a1;
|
||||
__entry->a2 = a2;
|
||||
__entry->a3 = a3;
|
||||
__entry->a4 = a4;
|
||||
__entry->a4 = a5;
|
||||
),
|
||||
|
||||
TP_printk("nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx a4 0x%lx a5 %lx",
|
||||
__entry->nr, __entry->a0, __entry->a1, __entry->a2,
|
||||
__entry->a3, __entry->a4, __entry->a5)
|
||||
);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Tracepoint for PIO.
|
||||
*/
|
||||
@ -256,7 +292,7 @@ TRACE_EVENT(name, \
|
||||
__entry->guest_rip = kvm_rip_read(vcpu); \
|
||||
__entry->isa = isa; \
|
||||
__entry->vcpu_id = vcpu->vcpu_id; \
|
||||
kvm_x86_ops.get_exit_info(vcpu, &__entry->info1, \
|
||||
static_call(kvm_x86_get_exit_info)(vcpu, &__entry->info1, \
|
||||
&__entry->info2, \
|
||||
&__entry->intr_info, \
|
||||
&__entry->error_code); \
|
||||
@ -738,7 +774,7 @@ TRACE_EVENT(kvm_emulate_insn,
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->csbase = kvm_x86_ops.get_segment_base(vcpu, VCPU_SREG_CS);
|
||||
__entry->csbase = static_call(kvm_x86_get_segment_base)(vcpu, VCPU_SREG_CS);
|
||||
__entry->len = vcpu->arch.emulate_ctxt->fetch.ptr
|
||||
- vcpu->arch.emulate_ctxt->fetch.data;
|
||||
__entry->rip = vcpu->arch.emulate_ctxt->_eip - __entry->len;
|
||||
|
@ -19,6 +19,9 @@ extern int __read_mostly pt_mode;
|
||||
#define PT_MODE_HOST_GUEST 1
|
||||
|
||||
#define PMU_CAP_FW_WRITES (1ULL << 13)
|
||||
#define PMU_CAP_LBR_FMT 0x3f
|
||||
|
||||
#define DEBUGCTLMSR_LBR_MASK (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI)
|
||||
|
||||
struct nested_vmx_msrs {
|
||||
/*
|
||||
@ -262,6 +265,12 @@ static inline bool cpu_has_vmx_tsc_scaling(void)
|
||||
SECONDARY_EXEC_TSC_SCALING;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_bus_lock_detection(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_BUS_LOCK_DETECTION;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_apicv(void)
|
||||
{
|
||||
return cpu_has_vmx_apic_register_virt() &&
|
||||
@ -371,11 +380,28 @@ static inline bool vmx_pt_mode_is_host_guest(void)
|
||||
|
||||
static inline u64 vmx_get_perf_capabilities(void)
|
||||
{
|
||||
u64 perf_cap = 0;
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_PDCM))
|
||||
rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_cap);
|
||||
|
||||
perf_cap &= PMU_CAP_LBR_FMT;
|
||||
|
||||
/*
|
||||
* Since counters are virtualized, KVM would support full
|
||||
* width counting unconditionally, even if the host lacks it.
|
||||
*/
|
||||
return PMU_CAP_FW_WRITES;
|
||||
return PMU_CAP_FW_WRITES | perf_cap;
|
||||
}
|
||||
|
||||
static inline u64 vmx_supported_debugctl(void)
|
||||
{
|
||||
u64 debugctl = 0;
|
||||
|
||||
if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)
|
||||
debugctl |= DEBUGCTLMSR_LBR_MASK;
|
||||
|
||||
return debugctl;
|
||||
}
|
||||
|
||||
#endif /* __KVM_X86_VMX_CAPS_H */
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "nested.h"
|
||||
#include "pmu.h"
|
||||
#include "trace.h"
|
||||
#include "vmx.h"
|
||||
#include "x86.h"
|
||||
|
||||
static bool __read_mostly enable_shadow_vmcs = 1;
|
||||
@ -411,8 +412,8 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit
|
||||
if (nr == DB_VECTOR) {
|
||||
if (!has_payload) {
|
||||
payload = vcpu->arch.dr6;
|
||||
payload &= ~(DR6_FIXED_1 | DR6_BT);
|
||||
payload ^= DR6_RTM;
|
||||
payload &= ~DR6_BT;
|
||||
payload ^= DR6_ACTIVE_LOW;
|
||||
}
|
||||
*exit_qual = payload;
|
||||
} else
|
||||
@ -744,8 +745,7 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
|
||||
(CC(!nested_cpu_has_vid(vmcs12)) ||
|
||||
CC(!nested_exit_intr_ack_set(vcpu)) ||
|
||||
CC((vmcs12->posted_intr_nv & 0xff00)) ||
|
||||
CC((vmcs12->posted_intr_desc_addr & 0x3f)) ||
|
||||
CC((vmcs12->posted_intr_desc_addr >> cpuid_maxphyaddr(vcpu)))))
|
||||
CC(!kvm_vcpu_is_legal_aligned_gpa(vcpu, vmcs12->posted_intr_desc_addr, 64))))
|
||||
return -EINVAL;
|
||||
|
||||
/* tpr shadow is needed by all apicv features. */
|
||||
@ -758,13 +758,11 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
|
||||
static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
|
||||
u32 count, u64 addr)
|
||||
{
|
||||
int maxphyaddr;
|
||||
|
||||
if (count == 0)
|
||||
return 0;
|
||||
maxphyaddr = cpuid_maxphyaddr(vcpu);
|
||||
if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr ||
|
||||
(addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr)
|
||||
|
||||
if (!kvm_vcpu_is_legal_aligned_gpa(vcpu, addr, 16) ||
|
||||
!kvm_vcpu_is_legal_gpa(vcpu, (addr + count * sizeof(struct vmx_msr_entry) - 1)))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
@ -1062,14 +1060,6 @@ static void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
}
|
||||
|
||||
static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val)
|
||||
{
|
||||
unsigned long invalid_mask;
|
||||
|
||||
invalid_mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
|
||||
return (val & invalid_mask) == 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if the MMU needs to be sync'd on nested VM-Enter/VM-Exit.
|
||||
* tl;dr: the MMU needs a sync if L0 is using shadow paging and L1 didn't
|
||||
@ -1121,7 +1111,7 @@ static bool nested_vmx_transition_mmu_sync(struct kvm_vcpu *vcpu)
|
||||
static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept,
|
||||
enum vm_entry_failure_code *entry_failure_code)
|
||||
{
|
||||
if (CC(!nested_cr3_valid(vcpu, cr3))) {
|
||||
if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3))) {
|
||||
*entry_failure_code = ENTRY_FAIL_DEFAULT;
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -2532,7 +2522,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
|
||||
* bitwise-or of what L1 wants to trap for L2, and what we want to
|
||||
* trap. Note that CR0.TS also needs updating - we do this later.
|
||||
*/
|
||||
update_exception_bitmap(vcpu);
|
||||
vmx_update_exception_bitmap(vcpu);
|
||||
vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
|
||||
vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
|
||||
|
||||
@ -2635,7 +2625,6 @@ static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
|
||||
static bool nested_vmx_check_eptp(struct kvm_vcpu *vcpu, u64 new_eptp)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
int maxphyaddr = cpuid_maxphyaddr(vcpu);
|
||||
|
||||
/* Check for memory type validity */
|
||||
switch (new_eptp & VMX_EPTP_MT_MASK) {
|
||||
@ -2666,7 +2655,7 @@ static bool nested_vmx_check_eptp(struct kvm_vcpu *vcpu, u64 new_eptp)
|
||||
}
|
||||
|
||||
/* Reserved bits should not be set */
|
||||
if (CC(new_eptp >> maxphyaddr || ((new_eptp >> 7) & 0x1f)))
|
||||
if (CC(kvm_vcpu_is_illegal_gpa(vcpu, new_eptp) || ((new_eptp >> 7) & 0x1f)))
|
||||
return false;
|
||||
|
||||
/* AD, if set, should be supported */
|
||||
@ -2850,7 +2839,7 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
|
||||
|
||||
if (CC(!nested_host_cr0_valid(vcpu, vmcs12->host_cr0)) ||
|
||||
CC(!nested_host_cr4_valid(vcpu, vmcs12->host_cr4)) ||
|
||||
CC(!nested_cr3_valid(vcpu, vmcs12->host_cr3)))
|
||||
CC(kvm_vcpu_is_illegal_gpa(vcpu, vmcs12->host_cr3)))
|
||||
return -EINVAL;
|
||||
|
||||
if (CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu)) ||
|
||||
@ -3057,35 +3046,8 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
|
||||
vmx->loaded_vmcs->host_state.cr4 = cr4;
|
||||
}
|
||||
|
||||
asm(
|
||||
"sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */
|
||||
"cmp %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t"
|
||||
"je 1f \n\t"
|
||||
__ex("vmwrite %%" _ASM_SP ", %[HOST_RSP]") "\n\t"
|
||||
"mov %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t"
|
||||
"1: \n\t"
|
||||
"add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */
|
||||
|
||||
/* Check if vmlaunch or vmresume is needed */
|
||||
"cmpb $0, %c[launched](%[loaded_vmcs])\n\t"
|
||||
|
||||
/*
|
||||
* VMLAUNCH and VMRESUME clear RFLAGS.{CF,ZF} on VM-Exit, set
|
||||
* RFLAGS.CF on VM-Fail Invalid and set RFLAGS.ZF on VM-Fail
|
||||
* Valid. vmx_vmenter() directly "returns" RFLAGS, and so the
|
||||
* results of VM-Enter is captured via CC_{SET,OUT} to vm_fail.
|
||||
*/
|
||||
"call vmx_vmenter\n\t"
|
||||
|
||||
CC_SET(be)
|
||||
: ASM_CALL_CONSTRAINT, CC_OUT(be) (vm_fail)
|
||||
: [HOST_RSP]"r"((unsigned long)HOST_RSP),
|
||||
[loaded_vmcs]"r"(vmx->loaded_vmcs),
|
||||
[launched]"i"(offsetof(struct loaded_vmcs, launched)),
|
||||
[host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)),
|
||||
[wordsize]"i"(sizeof(ulong))
|
||||
: "memory"
|
||||
);
|
||||
vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
|
||||
vmx->loaded_vmcs->launched);
|
||||
|
||||
if (vmx->msr_autoload.host.nr)
|
||||
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
|
||||
@ -3330,7 +3292,11 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
|
||||
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
||||
enum vm_entry_failure_code entry_failure_code;
|
||||
bool evaluate_pending_interrupts;
|
||||
u32 exit_reason, failed_index;
|
||||
union vmx_exit_reason exit_reason = {
|
||||
.basic = EXIT_REASON_INVALID_STATE,
|
||||
.failed_vmentry = 1,
|
||||
};
|
||||
u32 failed_index;
|
||||
|
||||
if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
|
||||
kvm_vcpu_flush_tlb_current(vcpu);
|
||||
@ -3382,7 +3348,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
|
||||
|
||||
if (nested_vmx_check_guest_state(vcpu, vmcs12,
|
||||
&entry_failure_code)) {
|
||||
exit_reason = EXIT_REASON_INVALID_STATE;
|
||||
exit_reason.basic = EXIT_REASON_INVALID_STATE;
|
||||
vmcs12->exit_qualification = entry_failure_code;
|
||||
goto vmentry_fail_vmexit;
|
||||
}
|
||||
@ -3393,7 +3359,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
|
||||
vcpu->arch.tsc_offset += vmcs12->tsc_offset;
|
||||
|
||||
if (prepare_vmcs02(vcpu, vmcs12, &entry_failure_code)) {
|
||||
exit_reason = EXIT_REASON_INVALID_STATE;
|
||||
exit_reason.basic = EXIT_REASON_INVALID_STATE;
|
||||
vmcs12->exit_qualification = entry_failure_code;
|
||||
goto vmentry_fail_vmexit_guest_mode;
|
||||
}
|
||||
@ -3403,7 +3369,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
|
||||
vmcs12->vm_entry_msr_load_addr,
|
||||
vmcs12->vm_entry_msr_load_count);
|
||||
if (failed_index) {
|
||||
exit_reason = EXIT_REASON_MSR_LOAD_FAIL;
|
||||
exit_reason.basic = EXIT_REASON_MSR_LOAD_FAIL;
|
||||
vmcs12->exit_qualification = failed_index;
|
||||
goto vmentry_fail_vmexit_guest_mode;
|
||||
}
|
||||
@ -3471,7 +3437,7 @@ vmentry_fail_vmexit:
|
||||
return NVMX_VMENTRY_VMEXIT;
|
||||
|
||||
load_vmcs12_host_state(vcpu, vmcs12);
|
||||
vmcs12->vm_exit_reason = exit_reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
|
||||
vmcs12->vm_exit_reason = exit_reason.full;
|
||||
if (enable_shadow_vmcs || vmx->nested.hv_evmcs)
|
||||
vmx->nested.need_vmcs12_to_shadow_sync = true;
|
||||
return NVMX_VMENTRY_VMEXIT;
|
||||
@ -5559,7 +5525,12 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
|
||||
return kvm_skip_emulated_instruction(vcpu);
|
||||
|
||||
fail:
|
||||
nested_vmx_vmexit(vcpu, vmx->exit_reason,
|
||||
/*
|
||||
* This is effectively a reflected VM-Exit, as opposed to a synthesized
|
||||
* nested VM-Exit. Pass the original exit reason, i.e. don't hardcode
|
||||
* EXIT_REASON_VMFUNC as the exit reason.
|
||||
*/
|
||||
nested_vmx_vmexit(vcpu, vmx->exit_reason.full,
|
||||
vmx_get_intr_info(vcpu),
|
||||
vmx_get_exit_qual(vcpu));
|
||||
return 1;
|
||||
@ -5627,7 +5598,8 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
|
||||
* MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps.
|
||||
*/
|
||||
static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
|
||||
struct vmcs12 *vmcs12, u32 exit_reason)
|
||||
struct vmcs12 *vmcs12,
|
||||
union vmx_exit_reason exit_reason)
|
||||
{
|
||||
u32 msr_index = kvm_rcx_read(vcpu);
|
||||
gpa_t bitmap;
|
||||
@ -5641,7 +5613,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
|
||||
* First we need to figure out which of the four to use:
|
||||
*/
|
||||
bitmap = vmcs12->msr_bitmap;
|
||||
if (exit_reason == EXIT_REASON_MSR_WRITE)
|
||||
if (exit_reason.basic == EXIT_REASON_MSR_WRITE)
|
||||
bitmap += 2048;
|
||||
if (msr_index >= 0xc0000000) {
|
||||
msr_index -= 0xc0000000;
|
||||
@ -5778,11 +5750,12 @@ static bool nested_vmx_exit_handled_mtf(struct vmcs12 *vmcs12)
|
||||
* Return true if L0 wants to handle an exit from L2 regardless of whether or not
|
||||
* L1 wants the exit. Only call this when in is_guest_mode (L2).
|
||||
*/
|
||||
static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, u32 exit_reason)
|
||||
static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
|
||||
union vmx_exit_reason exit_reason)
|
||||
{
|
||||
u32 intr_info;
|
||||
|
||||
switch ((u16)exit_reason) {
|
||||
switch ((u16)exit_reason.basic) {
|
||||
case EXIT_REASON_EXCEPTION_NMI:
|
||||
intr_info = vmx_get_intr_info(vcpu);
|
||||
if (is_nmi(intr_info))
|
||||
@ -5838,12 +5811,13 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, u32 exit_reason)
|
||||
* Return 1 if L1 wants to intercept an exit from L2. Only call this when in
|
||||
* is_guest_mode (L2).
|
||||
*/
|
||||
static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, u32 exit_reason)
|
||||
static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
|
||||
union vmx_exit_reason exit_reason)
|
||||
{
|
||||
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
||||
u32 intr_info;
|
||||
|
||||
switch ((u16)exit_reason) {
|
||||
switch ((u16)exit_reason.basic) {
|
||||
case EXIT_REASON_EXCEPTION_NMI:
|
||||
intr_info = vmx_get_intr_info(vcpu);
|
||||
if (is_nmi(intr_info))
|
||||
@ -5962,7 +5936,7 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, u32 exit_reason)
|
||||
bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
u32 exit_reason = vmx->exit_reason;
|
||||
union vmx_exit_reason exit_reason = vmx->exit_reason;
|
||||
unsigned long exit_qual;
|
||||
u32 exit_intr_info;
|
||||
|
||||
@ -5981,7 +5955,7 @@ bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu)
|
||||
goto reflect_vmexit;
|
||||
}
|
||||
|
||||
trace_kvm_nested_vmexit(exit_reason, vcpu, KVM_ISA_VMX);
|
||||
trace_kvm_nested_vmexit(exit_reason.full, vcpu, KVM_ISA_VMX);
|
||||
|
||||
/* If L0 (KVM) wants the exit, it trumps L1's desires. */
|
||||
if (nested_vmx_l0_wants_exit(vcpu, exit_reason))
|
||||
@ -6007,7 +5981,7 @@ bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu)
|
||||
exit_qual = vmx_get_exit_qual(vcpu);
|
||||
|
||||
reflect_vmexit:
|
||||
nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info, exit_qual);
|
||||
nested_vmx_vmexit(vcpu, exit_reason.full, exit_intr_info, exit_qual);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -152,12 +152,17 @@ static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
|
||||
return &counters[array_index_nospec(idx, num_counters)];
|
||||
}
|
||||
|
||||
static inline bool fw_writes_is_enabled(struct kvm_vcpu *vcpu)
|
||||
static inline u64 vcpu_get_perf_capabilities(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
|
||||
return false;
|
||||
return 0;
|
||||
|
||||
return vcpu->arch.perf_capabilities & PMU_CAP_FW_WRITES;
|
||||
return vcpu->arch.perf_capabilities;
|
||||
}
|
||||
|
||||
static inline bool fw_writes_is_enabled(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return (vcpu_get_perf_capabilities(vcpu) & PMU_CAP_FW_WRITES) != 0;
|
||||
}
|
||||
|
||||
static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr)
|
||||
@ -168,6 +173,41 @@ static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr)
|
||||
return get_gp_pmc(pmu, msr, MSR_IA32_PMC0);
|
||||
}
|
||||
|
||||
bool intel_pmu_lbr_is_compatible(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* As a first step, a guest could only enable LBR feature if its
|
||||
* cpu model is the same as the host because the LBR registers
|
||||
* would be pass-through to the guest and they're model specific.
|
||||
*/
|
||||
return boot_cpu_data.x86_model == guest_cpuid_model(vcpu);
|
||||
}
|
||||
|
||||
bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct x86_pmu_lbr *lbr = vcpu_to_lbr_records(vcpu);
|
||||
|
||||
return lbr->nr && (vcpu_get_perf_capabilities(vcpu) & PMU_CAP_LBR_FMT);
|
||||
}
|
||||
|
||||
static bool intel_pmu_is_valid_lbr_msr(struct kvm_vcpu *vcpu, u32 index)
|
||||
{
|
||||
struct x86_pmu_lbr *records = vcpu_to_lbr_records(vcpu);
|
||||
bool ret = false;
|
||||
|
||||
if (!intel_pmu_lbr_is_enabled(vcpu))
|
||||
return ret;
|
||||
|
||||
ret = (index == MSR_LBR_SELECT) || (index == MSR_LBR_TOS) ||
|
||||
(index >= records->from && index < records->from + records->nr) ||
|
||||
(index >= records->to && index < records->to + records->nr);
|
||||
|
||||
if (!ret && records->info)
|
||||
ret = (index >= records->info && index < records->info + records->nr);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
@ -183,7 +223,8 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
|
||||
default:
|
||||
ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) ||
|
||||
get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) ||
|
||||
get_fixed_pmc(pmu, msr) || get_fw_gp_pmc(pmu, msr);
|
||||
get_fixed_pmc(pmu, msr) || get_fw_gp_pmc(pmu, msr) ||
|
||||
intel_pmu_is_valid_lbr_msr(vcpu, msr);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -202,6 +243,111 @@ static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
|
||||
return pmc;
|
||||
}
|
||||
|
||||
static inline void intel_pmu_release_guest_lbr_event(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
|
||||
|
||||
if (lbr_desc->event) {
|
||||
perf_event_release_kernel(lbr_desc->event);
|
||||
lbr_desc->event = NULL;
|
||||
vcpu_to_pmu(vcpu)->event_count--;
|
||||
}
|
||||
}
|
||||
|
||||
int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
struct perf_event *event;
|
||||
|
||||
/*
|
||||
* The perf_event_attr is constructed in the minimum efficient way:
|
||||
* - set 'pinned = true' to make it task pinned so that if another
|
||||
* cpu pinned event reclaims LBR, the event->oncpu will be set to -1;
|
||||
* - set '.exclude_host = true' to record guest branches behavior;
|
||||
*
|
||||
* - set '.config = INTEL_FIXED_VLBR_EVENT' to indicates host perf
|
||||
* schedule the event without a real HW counter but a fake one;
|
||||
* check is_guest_lbr_event() and __intel_get_event_constraints();
|
||||
*
|
||||
* - set 'sample_type = PERF_SAMPLE_BRANCH_STACK' and
|
||||
* 'branch_sample_type = PERF_SAMPLE_BRANCH_CALL_STACK |
|
||||
* PERF_SAMPLE_BRANCH_USER' to configure it as a LBR callstack
|
||||
* event, which helps KVM to save/restore guest LBR records
|
||||
* during host context switches and reduces quite a lot overhead,
|
||||
* check branch_user_callstack() and intel_pmu_lbr_sched_task();
|
||||
*/
|
||||
struct perf_event_attr attr = {
|
||||
.type = PERF_TYPE_RAW,
|
||||
.size = sizeof(attr),
|
||||
.config = INTEL_FIXED_VLBR_EVENT,
|
||||
.sample_type = PERF_SAMPLE_BRANCH_STACK,
|
||||
.pinned = true,
|
||||
.exclude_host = true,
|
||||
.branch_sample_type = PERF_SAMPLE_BRANCH_CALL_STACK |
|
||||
PERF_SAMPLE_BRANCH_USER,
|
||||
};
|
||||
|
||||
if (unlikely(lbr_desc->event)) {
|
||||
__set_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use);
|
||||
return 0;
|
||||
}
|
||||
|
||||
event = perf_event_create_kernel_counter(&attr, -1,
|
||||
current, NULL, NULL);
|
||||
if (IS_ERR(event)) {
|
||||
pr_debug_ratelimited("%s: failed %ld\n",
|
||||
__func__, PTR_ERR(event));
|
||||
return -ENOENT;
|
||||
}
|
||||
lbr_desc->event = event;
|
||||
pmu->event_count++;
|
||||
__set_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* It's safe to access LBR msrs from guest when they have not
|
||||
* been passthrough since the host would help restore or reset
|
||||
* the LBR msrs records when the guest LBR event is scheduled in.
|
||||
*/
|
||||
static bool intel_pmu_handle_lbr_msrs_access(struct kvm_vcpu *vcpu,
|
||||
struct msr_data *msr_info, bool read)
|
||||
{
|
||||
struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
|
||||
u32 index = msr_info->index;
|
||||
|
||||
if (!intel_pmu_is_valid_lbr_msr(vcpu, index))
|
||||
return false;
|
||||
|
||||
if (!lbr_desc->event && !intel_pmu_create_guest_lbr_event(vcpu))
|
||||
goto dummy;
|
||||
|
||||
/*
|
||||
* Disable irq to ensure the LBR feature doesn't get reclaimed by the
|
||||
* host at the time the value is read from the msr, and this avoids the
|
||||
* host LBR value to be leaked to the guest. If LBR has been reclaimed,
|
||||
* return 0 on guest reads.
|
||||
*/
|
||||
local_irq_disable();
|
||||
if (lbr_desc->event->state == PERF_EVENT_STATE_ACTIVE) {
|
||||
if (read)
|
||||
rdmsrl(index, msr_info->data);
|
||||
else
|
||||
wrmsrl(index, msr_info->data);
|
||||
__set_bit(INTEL_PMC_IDX_FIXED_VLBR, vcpu_to_pmu(vcpu)->pmc_in_use);
|
||||
local_irq_enable();
|
||||
return true;
|
||||
}
|
||||
clear_bit(INTEL_PMC_IDX_FIXED_VLBR, vcpu_to_pmu(vcpu)->pmc_in_use);
|
||||
local_irq_enable();
|
||||
|
||||
dummy:
|
||||
if (read)
|
||||
msr_info->data = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
@ -236,7 +382,8 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
|
||||
msr_info->data = pmc->eventsel;
|
||||
return 0;
|
||||
}
|
||||
} else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, true))
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
@ -307,7 +454,8 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
reprogram_gp_counter(pmc, data);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
} else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false))
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
@ -316,6 +464,8 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
|
||||
|
||||
struct x86_pmu_capability x86_pmu;
|
||||
struct kvm_cpuid_entry2 *entry;
|
||||
union cpuid10_eax eax;
|
||||
@ -327,7 +477,6 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
|
||||
pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
|
||||
pmu->version = 0;
|
||||
pmu->reserved_bits = 0xffffffff00200000ull;
|
||||
vcpu->arch.perf_capabilities = 0;
|
||||
|
||||
entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
|
||||
if (!entry)
|
||||
@ -340,8 +489,6 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
|
||||
return;
|
||||
|
||||
perf_get_x86_pmu_capability(&x86_pmu);
|
||||
if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
|
||||
vcpu->arch.perf_capabilities = vmx_get_perf_capabilities();
|
||||
|
||||
pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
|
||||
x86_pmu.num_counters_gp);
|
||||
@ -385,12 +532,21 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
|
||||
INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters);
|
||||
|
||||
nested_vmx_pmu_entry_exit_ctls_update(vcpu);
|
||||
|
||||
if (intel_pmu_lbr_is_compatible(vcpu))
|
||||
x86_perf_get_lbr(&lbr_desc->records);
|
||||
else
|
||||
lbr_desc->records.nr = 0;
|
||||
|
||||
if (lbr_desc->records.nr)
|
||||
bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_IDX_FIXED_VLBR, 1);
|
||||
}
|
||||
|
||||
static void intel_pmu_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int i;
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
|
||||
|
||||
for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
|
||||
pmu->gp_counters[i].type = KVM_PMC_GP;
|
||||
@ -405,6 +561,11 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
|
||||
pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED;
|
||||
pmu->fixed_counters[i].current_config = 0;
|
||||
}
|
||||
|
||||
vcpu->arch.perf_capabilities = vmx_get_perf_capabilities();
|
||||
lbr_desc->records.nr = 0;
|
||||
lbr_desc->event = NULL;
|
||||
lbr_desc->msr_passthrough = false;
|
||||
}
|
||||
|
||||
static void intel_pmu_reset(struct kvm_vcpu *vcpu)
|
||||
@ -429,6 +590,119 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
|
||||
|
||||
pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status =
|
||||
pmu->global_ovf_ctrl = 0;
|
||||
|
||||
intel_pmu_release_guest_lbr_event(vcpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* Emulate LBR_On_PMI behavior for 1 < pmu.version < 4.
|
||||
*
|
||||
* If Freeze_LBR_On_PMI = 1, the LBR is frozen on PMI and
|
||||
* the KVM emulates to clear the LBR bit (bit 0) in IA32_DEBUGCTL.
|
||||
*
|
||||
* Guest needs to re-enable LBR to resume branches recording.
|
||||
*/
|
||||
static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL);
|
||||
|
||||
if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) {
|
||||
data &= ~DEBUGCTLMSR_LBR;
|
||||
vmcs_write64(GUEST_IA32_DEBUGCTL, data);
|
||||
}
|
||||
}
|
||||
|
||||
static void intel_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u8 version = vcpu_to_pmu(vcpu)->version;
|
||||
|
||||
if (!intel_pmu_lbr_is_enabled(vcpu))
|
||||
return;
|
||||
|
||||
if (version > 1 && version < 4)
|
||||
intel_pmu_legacy_freezing_lbrs_on_pmi(vcpu);
|
||||
}
|
||||
|
||||
static void vmx_update_intercept_for_lbr_msrs(struct kvm_vcpu *vcpu, bool set)
|
||||
{
|
||||
struct x86_pmu_lbr *lbr = vcpu_to_lbr_records(vcpu);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < lbr->nr; i++) {
|
||||
vmx_set_intercept_for_msr(vcpu, lbr->from + i, MSR_TYPE_RW, set);
|
||||
vmx_set_intercept_for_msr(vcpu, lbr->to + i, MSR_TYPE_RW, set);
|
||||
if (lbr->info)
|
||||
vmx_set_intercept_for_msr(vcpu, lbr->info + i, MSR_TYPE_RW, set);
|
||||
}
|
||||
|
||||
vmx_set_intercept_for_msr(vcpu, MSR_LBR_SELECT, MSR_TYPE_RW, set);
|
||||
vmx_set_intercept_for_msr(vcpu, MSR_LBR_TOS, MSR_TYPE_RW, set);
|
||||
}
|
||||
|
||||
static inline void vmx_disable_lbr_msrs_passthrough(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
|
||||
|
||||
if (!lbr_desc->msr_passthrough)
|
||||
return;
|
||||
|
||||
vmx_update_intercept_for_lbr_msrs(vcpu, true);
|
||||
lbr_desc->msr_passthrough = false;
|
||||
}
|
||||
|
||||
static inline void vmx_enable_lbr_msrs_passthrough(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
|
||||
|
||||
if (lbr_desc->msr_passthrough)
|
||||
return;
|
||||
|
||||
vmx_update_intercept_for_lbr_msrs(vcpu, false);
|
||||
lbr_desc->msr_passthrough = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Higher priority host perf events (e.g. cpu pinned) could reclaim the
|
||||
* pmu resources (e.g. LBR) that were assigned to the guest. This is
|
||||
* usually done via ipi calls (more details in perf_install_in_context).
|
||||
*
|
||||
* Before entering the non-root mode (with irq disabled here), double
|
||||
* confirm that the pmu features enabled to the guest are not reclaimed
|
||||
* by higher priority host events. Otherwise, disallow vcpu's access to
|
||||
* the reclaimed features.
|
||||
*/
|
||||
void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
|
||||
struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
|
||||
|
||||
if (!lbr_desc->event) {
|
||||
vmx_disable_lbr_msrs_passthrough(vcpu);
|
||||
if (vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR)
|
||||
goto warn;
|
||||
if (test_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use))
|
||||
goto warn;
|
||||
return;
|
||||
}
|
||||
|
||||
if (lbr_desc->event->state < PERF_EVENT_STATE_ACTIVE) {
|
||||
vmx_disable_lbr_msrs_passthrough(vcpu);
|
||||
__clear_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use);
|
||||
goto warn;
|
||||
} else
|
||||
vmx_enable_lbr_msrs_passthrough(vcpu);
|
||||
|
||||
return;
|
||||
|
||||
warn:
|
||||
pr_warn_ratelimited("kvm: vcpu-%d: fail to passthrough LBR.\n",
|
||||
vcpu->vcpu_id);
|
||||
}
|
||||
|
||||
static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR))
|
||||
intel_pmu_release_guest_lbr_event(vcpu);
|
||||
}
|
||||
|
||||
struct kvm_pmu_ops intel_pmu_ops = {
|
||||
@ -445,4 +719,6 @@ struct kvm_pmu_ops intel_pmu_ops = {
|
||||
.refresh = intel_pmu_refresh,
|
||||
.init = intel_pmu_init,
|
||||
.reset = intel_pmu_reset,
|
||||
.deliver_pmi = intel_pmu_deliver_pmi,
|
||||
.cleanup = intel_pmu_cleanup,
|
||||
};
|
||||
|
@ -54,7 +54,7 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
|
||||
dest = cpu_physical_id(cpu);
|
||||
|
||||
if (x2apic_enabled())
|
||||
if (x2apic_mode)
|
||||
new.ndst = dest;
|
||||
else
|
||||
new.ndst = (dest << 8) & 0xFF00;
|
||||
@ -104,7 +104,7 @@ static void __pi_post_block(struct kvm_vcpu *vcpu)
|
||||
|
||||
dest = cpu_physical_id(vcpu->cpu);
|
||||
|
||||
if (x2apic_enabled())
|
||||
if (x2apic_mode)
|
||||
new.ndst = dest;
|
||||
else
|
||||
new.ndst = (dest << 8) & 0xFF00;
|
||||
@ -174,7 +174,7 @@ int pi_pre_block(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
dest = cpu_physical_id(vcpu->pre_pcpu);
|
||||
|
||||
if (x2apic_enabled())
|
||||
if (x2apic_mode)
|
||||
new.ndst = dest;
|
||||
else
|
||||
new.ndst = (dest << 8) & 0xFF00;
|
||||
|
@ -44,7 +44,7 @@
|
||||
* they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump
|
||||
* to vmx_vmexit.
|
||||
*/
|
||||
SYM_FUNC_START(vmx_vmenter)
|
||||
SYM_FUNC_START_LOCAL(vmx_vmenter)
|
||||
/* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */
|
||||
je 2f
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user