mirror of
https://github.com/torvalds/linux.git
synced 2024-11-10 14:11:52 +00:00
* ARM: selftests improvements, large PUD support for HugeTLB,
single-stepping fixes, improved tracing, various timer and vGIC fixes * x86: Processor Tracing virtualization, STIBP support, some correctness fixes, refactorings and splitting of vmx.c, use the Hyper-V range TLB flush hypercall, reduce order of vcpu struct, WBNOINVD support, do not use -ftrace for __noclone functions, nested guest support for PAUSE filtering on AMD, more Hyper-V enlightenments (direct mode for synthetic timers) * PPC: nested VFIO * s390: bugfixes only this time -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.22 (GNU/Linux) iQEcBAABAgAGBQJcH0vFAAoJEL/70l94x66Dw/wH/2FZp1YOM5OgiJzgqnXyDbyf dNEfWo472MtNiLsuf+ZAfJojVIu9cv7wtBfXNzW+75XZDfh/J88geHWNSiZDm3Fe aM4MOnGG0yF3hQrRQyEHe4IFhGFNERax8Ccv+OL44md9CjYrIrsGkRD08qwb+gNh P8T/3wJEKwUcVHA/1VHEIM8MlirxNENc78p6JKd/C7zb0emjGavdIpWFUMr3SNfs CemabhJUuwOYtwjRInyx1y34FzYwW3Ejuc9a9UoZ+COahUfkuxHE8u+EQS7vLVF6 2VGVu5SA0PqgmLlGhHthxLqVgQYo+dB22cRnsLtXlUChtVAq8q9uu5sKzvqEzuE= =b4Jx -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull KVM updates from Paolo Bonzini: "ARM: - selftests improvements - large PUD support for HugeTLB - single-stepping fixes - improved tracing - various timer and vGIC fixes x86: - Processor Tracing virtualization - STIBP support - some correctness fixes - refactorings and splitting of vmx.c - use the Hyper-V range TLB flush hypercall - reduce order of vcpu struct - WBNOINVD support - do not use -ftrace for __noclone functions - nested guest support for PAUSE filtering on AMD - more Hyper-V enlightenments (direct mode for synthetic timers) PPC: - nested VFIO s390: - bugfixes only this time" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (171 commits) KVM: x86: Add CPUID support for new instruction WBNOINVD kvm: selftests: ucall: fix exit mmio address guessing Revert "compiler-gcc: disable -ftracer for __noclone functions" KVM: VMX: Move VM-Enter + VM-Exit handling to non-inline sub-routines KVM: VMX: Explicitly reference RCX as the vmx_vcpu pointer in asm blobs KVM: x86: Use jmp to invoke kvm_spurious_fault() from .fixup MAINTAINERS: Add arch/x86/kvm sub-directories to existing KVM/x86 entry KVM/x86: Use SVM assembly instruction mnemonics instead of .byte streams KVM/MMU: Flush tlb directly in the kvm_zap_gfn_range() KVM/MMU: Flush tlb directly in kvm_set_pte_rmapp() KVM/MMU: Move tlb flush in kvm_set_pte_rmapp() to kvm_mmu_notifier_change_pte() KVM: Make kvm_set_spte_hva() return int KVM: Replace old tlb flush function with new one to flush a specified range. KVM/MMU: Add tlb flush with range helper function KVM/VMX: Add hv tlb range flush support x86/hyper-v: Add HvFlushGuestAddressList hypercall support KVM: Add tlb_remote_flush_with_range callback in kvm_x86_ops KVM: x86: Disable Intel PT when VMXON in L1 guest KVM: x86: Set intercept for Intel PT MSRs read/write KVM: x86: Implement Intel PT MSRs read/write emulation ...
This commit is contained in:
commit
42b00f122c
@ -305,6 +305,9 @@ the address space for which you want to return the dirty bitmap.
|
||||
They must be less than the value that KVM_CHECK_EXTENSION returns for
|
||||
the KVM_CAP_MULTI_ADDRESS_SPACE capability.
|
||||
|
||||
The bits in the dirty bitmap are cleared before the ioctl returns, unless
|
||||
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is enabled. For more information,
|
||||
see the description of the capability.
|
||||
|
||||
4.9 KVM_SET_MEMORY_ALIAS
|
||||
|
||||
@ -1129,10 +1132,15 @@ documentation when it pops into existence).
|
||||
|
||||
4.37 KVM_ENABLE_CAP
|
||||
|
||||
Capability: KVM_CAP_ENABLE_CAP, KVM_CAP_ENABLE_CAP_VM
|
||||
Architectures: x86 (only KVM_CAP_ENABLE_CAP_VM),
|
||||
mips (only KVM_CAP_ENABLE_CAP), ppc, s390
|
||||
Type: vcpu ioctl, vm ioctl (with KVM_CAP_ENABLE_CAP_VM)
|
||||
Capability: KVM_CAP_ENABLE_CAP
|
||||
Architectures: mips, ppc, s390
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_enable_cap (in)
|
||||
Returns: 0 on success; -1 on error
|
||||
|
||||
Capability: KVM_CAP_ENABLE_CAP_VM
|
||||
Architectures: all
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_enable_cap (in)
|
||||
Returns: 0 on success; -1 on error
|
||||
|
||||
@ -3753,6 +3761,102 @@ Coalesced pio is based on coalesced mmio. There is little difference
|
||||
between coalesced mmio and pio except that coalesced pio records accesses
|
||||
to I/O ports.
|
||||
|
||||
4.117 KVM_CLEAR_DIRTY_LOG (vm ioctl)
|
||||
|
||||
Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
|
||||
Architectures: x86
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_dirty_log (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
/* for KVM_CLEAR_DIRTY_LOG */
|
||||
struct kvm_clear_dirty_log {
|
||||
__u32 slot;
|
||||
__u32 num_pages;
|
||||
__u64 first_page;
|
||||
union {
|
||||
void __user *dirty_bitmap; /* one bit per page */
|
||||
__u64 padding;
|
||||
};
|
||||
};
|
||||
|
||||
The ioctl clears the dirty status of pages in a memory slot, according to
|
||||
the bitmap that is passed in struct kvm_clear_dirty_log's dirty_bitmap
|
||||
field. Bit 0 of the bitmap corresponds to page "first_page" in the
|
||||
memory slot, and num_pages is the size in bits of the input bitmap.
|
||||
Both first_page and num_pages must be a multiple of 64. For each bit
|
||||
that is set in the input bitmap, the corresponding page is marked "clean"
|
||||
in KVM's dirty bitmap, and dirty tracking is re-enabled for that page
|
||||
(for example via write-protection, or by clearing the dirty bit in
|
||||
a page table entry).
|
||||
|
||||
If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 specifies
|
||||
the address space for which you want to return the dirty bitmap.
|
||||
They must be less than the value that KVM_CHECK_EXTENSION returns for
|
||||
the KVM_CAP_MULTI_ADDRESS_SPACE capability.
|
||||
|
||||
This ioctl is mostly useful when KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
|
||||
is enabled; for more information, see the description of the capability.
|
||||
However, it can always be used as long as KVM_CHECK_EXTENSION confirms
|
||||
that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is present.
|
||||
|
||||
4.118 KVM_GET_SUPPORTED_HV_CPUID
|
||||
|
||||
Capability: KVM_CAP_HYPERV_CPUID
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_cpuid2 (in/out)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
struct kvm_cpuid2 {
|
||||
__u32 nent;
|
||||
__u32 padding;
|
||||
struct kvm_cpuid_entry2 entries[0];
|
||||
};
|
||||
|
||||
struct kvm_cpuid_entry2 {
|
||||
__u32 function;
|
||||
__u32 index;
|
||||
__u32 flags;
|
||||
__u32 eax;
|
||||
__u32 ebx;
|
||||
__u32 ecx;
|
||||
__u32 edx;
|
||||
__u32 padding[3];
|
||||
};
|
||||
|
||||
This ioctl returns x86 cpuid features leaves related to Hyper-V emulation in
|
||||
KVM. Userspace can use the information returned by this ioctl to construct
|
||||
cpuid information presented to guests consuming Hyper-V enlightenments (e.g.
|
||||
Windows or Hyper-V guests).
|
||||
|
||||
CPUID feature leaves returned by this ioctl are defined by Hyper-V Top Level
|
||||
Functional Specification (TLFS). These leaves can't be obtained with
|
||||
KVM_GET_SUPPORTED_CPUID ioctl because some of them intersect with KVM feature
|
||||
leaves (0x40000000, 0x40000001).
|
||||
|
||||
Currently, the following list of CPUID leaves are returned:
|
||||
HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS
|
||||
HYPERV_CPUID_INTERFACE
|
||||
HYPERV_CPUID_VERSION
|
||||
HYPERV_CPUID_FEATURES
|
||||
HYPERV_CPUID_ENLIGHTMENT_INFO
|
||||
HYPERV_CPUID_IMPLEMENT_LIMITS
|
||||
HYPERV_CPUID_NESTED_FEATURES
|
||||
|
||||
HYPERV_CPUID_NESTED_FEATURES leaf is only exposed when Enlightened VMCS was
|
||||
enabled on the corresponding vCPU (KVM_CAP_HYPERV_ENLIGHTENED_VMCS).
|
||||
|
||||
Userspace invokes KVM_GET_SUPPORTED_CPUID by passing a kvm_cpuid2 structure
|
||||
with the 'nent' field indicating the number of entries in the variable-size
|
||||
array 'entries'. If the number of entries is too low to describe all Hyper-V
|
||||
feature leaves, an error (E2BIG) is returned. If the number is more or equal
|
||||
to the number of Hyper-V feature leaves, the 'nent' field is adjusted to the
|
||||
number of valid entries in the 'entries' array, which is then filled.
|
||||
|
||||
'index' and 'flags' fields in 'struct kvm_cpuid_entry2' are currently reserved,
|
||||
userspace should not expect to get any particular value there.
|
||||
|
||||
5. The kvm_run structure
|
||||
------------------------
|
||||
|
||||
@ -4647,6 +4751,30 @@ and injected exceptions.
|
||||
* For the new DR6 bits, note that bit 16 is set iff the #DB exception
|
||||
will clear DR6.RTM.
|
||||
|
||||
7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
|
||||
|
||||
Architectures: all
|
||||
Parameters: args[0] whether feature should be enabled or not
|
||||
|
||||
With this capability enabled, KVM_GET_DIRTY_LOG will not automatically
|
||||
clear and write-protect all pages that are returned as dirty.
|
||||
Rather, userspace will have to do this operation separately using
|
||||
KVM_CLEAR_DIRTY_LOG.
|
||||
|
||||
At the cost of a slightly more complicated operation, this provides better
|
||||
scalability and responsiveness for two reasons. First,
|
||||
KVM_CLEAR_DIRTY_LOG ioctl can operate on a 64-page granularity rather
|
||||
than requiring to sync a full memslot; this ensures that KVM does not
|
||||
take spinlocks for an extended period of time. Second, in some cases a
|
||||
large amount of time can pass between a call to KVM_GET_DIRTY_LOG and
|
||||
userspace actually using the data in the page. Pages can be modified
|
||||
during this time, which is inefficint for both the guest and userspace:
|
||||
the guest will incur a higher penalty due to write protection faults,
|
||||
while userspace can see false reports of dirty pages. Manual reprotection
|
||||
helps reducing this time, improving guest performance and reducing the
|
||||
number of dirty log false positives.
|
||||
|
||||
|
||||
8. Other capabilities.
|
||||
----------------------
|
||||
|
||||
|
@ -8309,6 +8309,7 @@ W: http://www.linux-kvm.org
|
||||
T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
|
||||
S: Supported
|
||||
F: arch/x86/kvm/
|
||||
F: arch/x86/kvm/*/
|
||||
F: arch/x86/include/uapi/asm/kvm*
|
||||
F: arch/x86/include/asm/kvm*
|
||||
F: arch/x86/include/asm/pvclock-abi.h
|
||||
|
@ -23,6 +23,10 @@
|
||||
|
||||
#define ARM_EXIT_WITH_ABORT_BIT 31
|
||||
#define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_ABORT_BIT))
|
||||
#define ARM_EXCEPTION_IS_TRAP(x) \
|
||||
(ARM_EXCEPTION_CODE((x)) == ARM_EXCEPTION_PREF_ABORT || \
|
||||
ARM_EXCEPTION_CODE((x)) == ARM_EXCEPTION_DATA_ABORT || \
|
||||
ARM_EXCEPTION_CODE((x)) == ARM_EXCEPTION_HVC)
|
||||
#define ARM_ABORT_PENDING(x) !!((x) & (1U << ARM_EXIT_WITH_ABORT_BIT))
|
||||
|
||||
#define ARM_EXCEPTION_RESET 0
|
||||
|
@ -225,7 +225,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
|
||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
int kvm_unmap_hva_range(struct kvm *kvm,
|
||||
unsigned long start, unsigned long end);
|
||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||
int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||
|
||||
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
|
||||
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
|
||||
@ -296,11 +296,6 @@ static inline void kvm_arm_init_debug(void) {}
|
||||
static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
|
||||
static inline bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu,
|
||||
struct kvm_run *run)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
|
||||
struct kvm_device_attr *attr);
|
||||
|
@ -82,6 +82,67 @@ void kvm_clear_hyp_idmap(void);
|
||||
#define kvm_mk_pud(pmdp) __pud(__pa(pmdp) | PMD_TYPE_TABLE)
|
||||
#define kvm_mk_pgd(pudp) ({ BUILD_BUG(); 0; })
|
||||
|
||||
#define kvm_pfn_pte(pfn, prot) pfn_pte(pfn, prot)
|
||||
#define kvm_pfn_pmd(pfn, prot) pfn_pmd(pfn, prot)
|
||||
#define kvm_pfn_pud(pfn, prot) (__pud(0))
|
||||
|
||||
#define kvm_pud_pfn(pud) ({ WARN_ON(1); 0; })
|
||||
|
||||
|
||||
#define kvm_pmd_mkhuge(pmd) pmd_mkhuge(pmd)
|
||||
/* No support for pud hugepages */
|
||||
#define kvm_pud_mkhuge(pud) ( {WARN_ON(1); pud; })
|
||||
|
||||
/*
|
||||
* The following kvm_*pud*() functions are provided strictly to allow
|
||||
* sharing code with arm64. They should never be called in practice.
|
||||
*/
|
||||
static inline void kvm_set_s2pud_readonly(pud_t *pud)
|
||||
{
|
||||
WARN_ON(1);
|
||||
}
|
||||
|
||||
static inline bool kvm_s2pud_readonly(pud_t *pud)
|
||||
{
|
||||
WARN_ON(1);
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void kvm_set_pud(pud_t *pud, pud_t new_pud)
|
||||
{
|
||||
WARN_ON(1);
|
||||
}
|
||||
|
||||
static inline pud_t kvm_s2pud_mkwrite(pud_t pud)
|
||||
{
|
||||
WARN_ON(1);
|
||||
return pud;
|
||||
}
|
||||
|
||||
static inline pud_t kvm_s2pud_mkexec(pud_t pud)
|
||||
{
|
||||
WARN_ON(1);
|
||||
return pud;
|
||||
}
|
||||
|
||||
static inline bool kvm_s2pud_exec(pud_t *pud)
|
||||
{
|
||||
WARN_ON(1);
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline pud_t kvm_s2pud_mkyoung(pud_t pud)
|
||||
{
|
||||
BUG();
|
||||
return pud;
|
||||
}
|
||||
|
||||
static inline bool kvm_s2pud_young(pud_t pud)
|
||||
{
|
||||
WARN_ON(1);
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
|
||||
{
|
||||
pte_val(pte) |= L_PTE_S2_RDWR;
|
||||
|
@ -68,4 +68,12 @@ stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
|
||||
#define stage2_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
|
||||
#define stage2_pud_table_empty(kvm, pudp) false
|
||||
|
||||
static inline bool kvm_stage2_has_pud(struct kvm *kvm)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#define S2_PMD_MASK PMD_MASK
|
||||
#define S2_PMD_SIZE PMD_SIZE
|
||||
|
||||
#endif /* __ARM_S2_PGTABLE_H_ */
|
||||
|
@ -602,8 +602,8 @@ static int emulate_cp15(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
} else {
|
||||
/* If access function fails, it should complain. */
|
||||
kvm_err("Unsupported guest CP15 access at: %08lx\n",
|
||||
*vcpu_pc(vcpu));
|
||||
kvm_err("Unsupported guest CP15 access at: %08lx [%08lx]\n",
|
||||
*vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
|
||||
print_cp_instr(params);
|
||||
kvm_inject_undefined(vcpu);
|
||||
}
|
||||
|
@ -107,7 +107,7 @@
|
||||
TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK)
|
||||
|
||||
/* VTCR_EL2 Registers bits */
|
||||
#define VTCR_EL2_RES1 (1 << 31)
|
||||
#define VTCR_EL2_RES1 (1U << 31)
|
||||
#define VTCR_EL2_HD (1 << 22)
|
||||
#define VTCR_EL2_HA (1 << 21)
|
||||
#define VTCR_EL2_PS_SHIFT TCR_EL2_PS_SHIFT
|
||||
@ -323,10 +323,6 @@
|
||||
#define PAR_TO_HPFAR(par) \
|
||||
(((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8)
|
||||
|
||||
#define kvm_arm_exception_type \
|
||||
{0, "IRQ" }, \
|
||||
{1, "TRAP" }
|
||||
|
||||
#define ECN(x) { ESR_ELx_EC_##x, #x }
|
||||
|
||||
#define kvm_arm_exception_class \
|
||||
|
@ -25,6 +25,7 @@
|
||||
|
||||
#define ARM_EXIT_WITH_SERROR_BIT 31
|
||||
#define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT))
|
||||
#define ARM_EXCEPTION_IS_TRAP(x) (ARM_EXCEPTION_CODE((x)) == ARM_EXCEPTION_TRAP)
|
||||
#define ARM_SERROR_PENDING(x) !!((x) & (1U << ARM_EXIT_WITH_SERROR_BIT))
|
||||
|
||||
#define ARM_EXCEPTION_IRQ 0
|
||||
@ -34,6 +35,12 @@
|
||||
/* The hyp-stub will return this for any kvm_call_hyp() call */
|
||||
#define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR
|
||||
|
||||
#define kvm_arm_exception_type \
|
||||
{ARM_EXCEPTION_IRQ, "IRQ" }, \
|
||||
{ARM_EXCEPTION_EL1_SERROR, "SERROR" }, \
|
||||
{ARM_EXCEPTION_TRAP, "TRAP" }, \
|
||||
{ARM_EXCEPTION_HYP_GONE, "HYP_GONE" }
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <linux/mm.h>
|
||||
|
@ -24,6 +24,7 @@
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
#include <asm/debug-monitors.h>
|
||||
#include <asm/esr.h>
|
||||
#include <asm/kvm_arm.h>
|
||||
#include <asm/kvm_hyp.h>
|
||||
@ -147,14 +148,6 @@ static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
|
||||
{
|
||||
if (vcpu_mode_is_32bit(vcpu))
|
||||
kvm_skip_instr32(vcpu, is_wide_instr);
|
||||
else
|
||||
*vcpu_pc(vcpu) += 4;
|
||||
}
|
||||
|
||||
static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
*vcpu_cpsr(vcpu) |= PSR_AA32_T_BIT;
|
||||
@ -424,4 +417,30 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
|
||||
return data; /* Leave LE untouched */
|
||||
}
|
||||
|
||||
static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
|
||||
{
|
||||
if (vcpu_mode_is_32bit(vcpu))
|
||||
kvm_skip_instr32(vcpu, is_wide_instr);
|
||||
else
|
||||
*vcpu_pc(vcpu) += 4;
|
||||
|
||||
/* advance the singlestep state machine */
|
||||
*vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Skip an instruction which has been emulated at hyp while most guest sysregs
|
||||
* are live.
|
||||
*/
|
||||
static inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
*vcpu_pc(vcpu) = read_sysreg_el2(elr);
|
||||
vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(spsr);
|
||||
|
||||
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
|
||||
|
||||
write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, spsr);
|
||||
write_sysreg_el2(*vcpu_pc(vcpu), elr);
|
||||
}
|
||||
|
||||
#endif /* __ARM64_KVM_EMULATE_H__ */
|
||||
|
@ -319,7 +319,7 @@ struct kvm_vcpu_arch {
|
||||
*/
|
||||
#define __vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)])
|
||||
|
||||
u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg);
|
||||
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg);
|
||||
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
|
||||
|
||||
/*
|
||||
@ -360,7 +360,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
|
||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
int kvm_unmap_hva_range(struct kvm *kvm,
|
||||
unsigned long start, unsigned long end);
|
||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||
int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
|
||||
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
||||
|
||||
@ -449,7 +449,6 @@ void kvm_arm_init_debug(void);
|
||||
void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
|
||||
void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
|
||||
void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
|
||||
bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run);
|
||||
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
|
||||
struct kvm_device_attr *attr);
|
||||
int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
|
||||
|
@ -184,6 +184,17 @@ void kvm_clear_hyp_idmap(void);
|
||||
#define kvm_mk_pgd(pudp) \
|
||||
__pgd(__phys_to_pgd_val(__pa(pudp)) | PUD_TYPE_TABLE)
|
||||
|
||||
#define kvm_set_pud(pudp, pud) set_pud(pudp, pud)
|
||||
|
||||
#define kvm_pfn_pte(pfn, prot) pfn_pte(pfn, prot)
|
||||
#define kvm_pfn_pmd(pfn, prot) pfn_pmd(pfn, prot)
|
||||
#define kvm_pfn_pud(pfn, prot) pfn_pud(pfn, prot)
|
||||
|
||||
#define kvm_pud_pfn(pud) pud_pfn(pud)
|
||||
|
||||
#define kvm_pmd_mkhuge(pmd) pmd_mkhuge(pmd)
|
||||
#define kvm_pud_mkhuge(pud) pud_mkhuge(pud)
|
||||
|
||||
static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
|
||||
{
|
||||
pte_val(pte) |= PTE_S2_RDWR;
|
||||
@ -196,6 +207,12 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
|
||||
return pmd;
|
||||
}
|
||||
|
||||
static inline pud_t kvm_s2pud_mkwrite(pud_t pud)
|
||||
{
|
||||
pud_val(pud) |= PUD_S2_RDWR;
|
||||
return pud;
|
||||
}
|
||||
|
||||
static inline pte_t kvm_s2pte_mkexec(pte_t pte)
|
||||
{
|
||||
pte_val(pte) &= ~PTE_S2_XN;
|
||||
@ -208,6 +225,12 @@ static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
|
||||
return pmd;
|
||||
}
|
||||
|
||||
static inline pud_t kvm_s2pud_mkexec(pud_t pud)
|
||||
{
|
||||
pud_val(pud) &= ~PUD_S2_XN;
|
||||
return pud;
|
||||
}
|
||||
|
||||
static inline void kvm_set_s2pte_readonly(pte_t *ptep)
|
||||
{
|
||||
pteval_t old_pteval, pteval;
|
||||
@ -246,6 +269,31 @@ static inline bool kvm_s2pmd_exec(pmd_t *pmdp)
|
||||
return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN);
|
||||
}
|
||||
|
||||
static inline void kvm_set_s2pud_readonly(pud_t *pudp)
|
||||
{
|
||||
kvm_set_s2pte_readonly((pte_t *)pudp);
|
||||
}
|
||||
|
||||
static inline bool kvm_s2pud_readonly(pud_t *pudp)
|
||||
{
|
||||
return kvm_s2pte_readonly((pte_t *)pudp);
|
||||
}
|
||||
|
||||
static inline bool kvm_s2pud_exec(pud_t *pudp)
|
||||
{
|
||||
return !(READ_ONCE(pud_val(*pudp)) & PUD_S2_XN);
|
||||
}
|
||||
|
||||
static inline pud_t kvm_s2pud_mkyoung(pud_t pud)
|
||||
{
|
||||
return pud_mkyoung(pud);
|
||||
}
|
||||
|
||||
static inline bool kvm_s2pud_young(pud_t pud)
|
||||
{
|
||||
return pud_young(pud);
|
||||
}
|
||||
|
||||
#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep)
|
||||
|
||||
#ifdef __PAGETABLE_PMD_FOLDED
|
||||
|
@ -193,6 +193,10 @@
|
||||
#define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */
|
||||
#define PMD_S2_XN (_AT(pmdval_t, 2) << 53) /* XN[1:0] */
|
||||
|
||||
#define PUD_S2_RDONLY (_AT(pudval_t, 1) << 6) /* HAP[2:1] */
|
||||
#define PUD_S2_RDWR (_AT(pudval_t, 3) << 6) /* HAP[2:1] */
|
||||
#define PUD_S2_XN (_AT(pudval_t, 2) << 53) /* XN[1:0] */
|
||||
|
||||
/*
|
||||
* Memory Attribute override for Stage-2 (MemAttr[3:0])
|
||||
*/
|
||||
|
@ -315,6 +315,11 @@ static inline pte_t pud_pte(pud_t pud)
|
||||
return __pte(pud_val(pud));
|
||||
}
|
||||
|
||||
static inline pud_t pte_pud(pte_t pte)
|
||||
{
|
||||
return __pud(pte_val(pte));
|
||||
}
|
||||
|
||||
static inline pmd_t pud_pmd(pud_t pud)
|
||||
{
|
||||
return __pmd(pud_val(pud));
|
||||
@ -382,8 +387,12 @@ static inline int pmd_protnone(pmd_t pmd)
|
||||
#define pfn_pmd(pfn,prot) __pmd(__phys_to_pmd_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
|
||||
#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot)
|
||||
|
||||
#define pud_young(pud) pte_young(pud_pte(pud))
|
||||
#define pud_mkyoung(pud) pte_pud(pte_mkyoung(pud_pte(pud)))
|
||||
#define pud_write(pud) pte_write(pud_pte(pud))
|
||||
|
||||
#define pud_mkhuge(pud) (__pud(pud_val(pud) & ~PUD_TABLE_BIT))
|
||||
|
||||
#define __pud_to_phys(pud) __pte_to_phys(pud_pte(pud))
|
||||
#define __phys_to_pud_val(phys) __phys_to_pte_val(phys)
|
||||
#define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
|
||||
|
@ -30,16 +30,14 @@
|
||||
#define pt_levels_pgdir_shift(lvls) ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - (lvls))
|
||||
|
||||
/*
|
||||
* The hardware supports concatenation of up to 16 tables at stage2 entry level
|
||||
* and we use the feature whenever possible.
|
||||
* The hardware supports concatenation of up to 16 tables at stage2 entry
|
||||
* level and we use the feature whenever possible, which means we resolve 4
|
||||
* additional bits of address at the entry level.
|
||||
*
|
||||
* Now, the minimum number of bits resolved at any level is (PAGE_SHIFT - 3).
|
||||
* On arm64, the smallest PAGE_SIZE supported is 4k, which means
|
||||
* (PAGE_SHIFT - 3) > 4 holds for all page sizes.
|
||||
* This implies, the total number of page table levels at stage2 expected
|
||||
* by the hardware is actually the number of levels required for (IPA_SHIFT - 4)
|
||||
* in normal translations(e.g, stage1), since we cannot have another level in
|
||||
* the range (IPA_SHIFT, IPA_SHIFT - 4).
|
||||
* This implies, the total number of page table levels required for
|
||||
* IPA_SHIFT at stage2 expected by the hardware can be calculated using
|
||||
* the same logic used for the (non-collapsable) stage1 page tables but for
|
||||
* (IPA_SHIFT - 4).
|
||||
*/
|
||||
#define stage2_pgtable_levels(ipa) ARM64_HW_PGTABLE_LEVELS((ipa) - 4)
|
||||
#define kvm_stage2_levels(kvm) VTCR_EL2_LVLS(kvm->arch.vtcr)
|
||||
|
@ -236,24 +236,3 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* After successfully emulating an instruction, we might want to
|
||||
* return to user space with a KVM_EXIT_DEBUG. We can only do this
|
||||
* once the emulation is complete, though, so for userspace emulations
|
||||
* we have to wait until we have re-entered KVM before calling this
|
||||
* helper.
|
||||
*
|
||||
* Return true (and set exit_reason) to return to userspace or false
|
||||
* if no further action is required.
|
||||
*/
|
||||
bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
{
|
||||
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
|
||||
run->exit_reason = KVM_EXIT_DEBUG;
|
||||
run->debug.arch.hsr = ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -247,13 +247,6 @@ static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
handled = exit_handler(vcpu, run);
|
||||
}
|
||||
|
||||
/*
|
||||
* kvm_arm_handle_step_debug() sets the exit_reason on the kvm_run
|
||||
* structure if we need to return to userspace.
|
||||
*/
|
||||
if (handled > 0 && kvm_arm_handle_step_debug(vcpu, run))
|
||||
handled = 0;
|
||||
|
||||
return handled;
|
||||
}
|
||||
|
||||
@ -287,12 +280,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
case ARM_EXCEPTION_IRQ:
|
||||
return 1;
|
||||
case ARM_EXCEPTION_EL1_SERROR:
|
||||
/* We may still need to return for single-step */
|
||||
if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS)
|
||||
&& kvm_arm_handle_step_debug(vcpu, run))
|
||||
return 0;
|
||||
else
|
||||
return 1;
|
||||
return 1;
|
||||
case ARM_EXCEPTION_TRAP:
|
||||
return handle_trap_exceptions(vcpu, run);
|
||||
case ARM_EXCEPTION_HYP_GONE:
|
||||
|
@ -313,33 +313,6 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Skip an instruction which has been emulated. Returns true if
|
||||
* execution can continue or false if we need to exit hyp mode because
|
||||
* single-step was in effect.
|
||||
*/
|
||||
static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
*vcpu_pc(vcpu) = read_sysreg_el2(elr);
|
||||
|
||||
if (vcpu_mode_is_32bit(vcpu)) {
|
||||
vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(spsr);
|
||||
kvm_skip_instr32(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
|
||||
write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, spsr);
|
||||
} else {
|
||||
*vcpu_pc(vcpu) += 4;
|
||||
}
|
||||
|
||||
write_sysreg_el2(*vcpu_pc(vcpu), elr);
|
||||
|
||||
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
|
||||
vcpu->arch.fault.esr_el2 =
|
||||
(ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT) | 0x22;
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct user_fpsimd_state *host_fpsimd = vcpu->arch.host_fpsimd_state;
|
||||
@ -428,20 +401,12 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
if (valid) {
|
||||
int ret = __vgic_v2_perform_cpuif_access(vcpu);
|
||||
|
||||
if (ret == 1 && __skip_instr(vcpu))
|
||||
if (ret == 1)
|
||||
return true;
|
||||
|
||||
if (ret == -1) {
|
||||
/* Promote an illegal access to an
|
||||
* SError. If we would be returning
|
||||
* due to single-step clear the SS
|
||||
* bit so handle_exit knows what to
|
||||
* do after dealing with the error.
|
||||
*/
|
||||
if (!__skip_instr(vcpu))
|
||||
*vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
|
||||
/* Promote an illegal access to an SError.*/
|
||||
if (ret == -1)
|
||||
*exit_code = ARM_EXCEPTION_EL1_SERROR;
|
||||
}
|
||||
|
||||
goto exit;
|
||||
}
|
||||
@ -452,7 +417,7 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
|
||||
int ret = __vgic_v3_perform_cpuif_access(vcpu);
|
||||
|
||||
if (ret == 1 && __skip_instr(vcpu))
|
||||
if (ret == 1)
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -41,7 +41,7 @@ static bool __hyp_text __is_be(struct kvm_vcpu *vcpu)
|
||||
* Returns:
|
||||
* 1: GICV access successfully performed
|
||||
* 0: Not a GICV access
|
||||
* -1: Illegal GICV access
|
||||
* -1: Illegal GICV access successfully performed
|
||||
*/
|
||||
int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@ -61,12 +61,16 @@ int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
|
||||
/* Reject anything but a 32bit access */
|
||||
if (kvm_vcpu_dabt_get_as(vcpu) != sizeof(u32))
|
||||
if (kvm_vcpu_dabt_get_as(vcpu) != sizeof(u32)) {
|
||||
__kvm_skip_instr(vcpu);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Not aligned? Don't bother */
|
||||
if (fault_ipa & 3)
|
||||
if (fault_ipa & 3) {
|
||||
__kvm_skip_instr(vcpu);
|
||||
return -1;
|
||||
}
|
||||
|
||||
rd = kvm_vcpu_dabt_get_rd(vcpu);
|
||||
addr = hyp_symbol_addr(kvm_vgic_global_state)->vcpu_hyp_va;
|
||||
@ -88,5 +92,7 @@ int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
|
||||
vcpu_set_reg(vcpu, rd, data);
|
||||
}
|
||||
|
||||
__kvm_skip_instr(vcpu);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -76,7 +76,7 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu,
|
||||
return false;
|
||||
}
|
||||
|
||||
u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
|
||||
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
|
||||
{
|
||||
if (!vcpu->arch.sysregs_loaded_on_cpu)
|
||||
goto immediate_read;
|
||||
@ -1858,6 +1858,8 @@ static void perform_access(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *params,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
trace_kvm_sys_access(*vcpu_pc(vcpu), params, r);
|
||||
|
||||
/*
|
||||
* Not having an accessor means that we have configured a trap
|
||||
* that we don't know how to handle. This certainly qualifies
|
||||
@ -1920,8 +1922,8 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu,
|
||||
WARN_ON(1);
|
||||
}
|
||||
|
||||
kvm_err("Unsupported guest CP%d access at: %08lx\n",
|
||||
cp, *vcpu_pc(vcpu));
|
||||
kvm_err("Unsupported guest CP%d access at: %08lx [%08lx]\n",
|
||||
cp, *vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
|
||||
print_sys_reg_instr(params);
|
||||
kvm_inject_undefined(vcpu);
|
||||
}
|
||||
@ -2071,8 +2073,8 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu,
|
||||
if (likely(r)) {
|
||||
perform_access(vcpu, params, r);
|
||||
} else {
|
||||
kvm_err("Unsupported guest sys_reg access at: %lx\n",
|
||||
*vcpu_pc(vcpu));
|
||||
kvm_err("Unsupported guest sys_reg access at: %lx [%08lx]\n",
|
||||
*vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
|
||||
print_sys_reg_instr(params);
|
||||
kvm_inject_undefined(vcpu);
|
||||
}
|
||||
|
@ -35,6 +35,9 @@ struct sys_reg_params {
|
||||
};
|
||||
|
||||
struct sys_reg_desc {
|
||||
/* Sysreg string for debug */
|
||||
const char *name;
|
||||
|
||||
/* MRS/MSR instruction which accesses it. */
|
||||
u8 Op0;
|
||||
u8 Op1;
|
||||
@ -130,6 +133,7 @@ const struct sys_reg_desc *find_reg_by_id(u64 id,
|
||||
#define Op2(_x) .Op2 = _x
|
||||
|
||||
#define SYS_DESC(reg) \
|
||||
.name = #reg, \
|
||||
Op0(sys_reg_Op0(reg)), Op1(sys_reg_Op1(reg)), \
|
||||
CRn(sys_reg_CRn(reg)), CRm(sys_reg_CRm(reg)), \
|
||||
Op2(sys_reg_Op2(reg))
|
||||
|
@ -3,6 +3,7 @@
|
||||
#define _TRACE_ARM64_KVM_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
#include "sys_regs.h"
|
||||
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM kvm
|
||||
@ -152,6 +153,40 @@ TRACE_EVENT(kvm_handle_sys_reg,
|
||||
TP_printk("HSR 0x%08lx", __entry->hsr)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_sys_access,
|
||||
TP_PROTO(unsigned long vcpu_pc, struct sys_reg_params *params, const struct sys_reg_desc *reg),
|
||||
TP_ARGS(vcpu_pc, params, reg),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned long, vcpu_pc)
|
||||
__field(bool, is_write)
|
||||
__field(const char *, name)
|
||||
__field(u8, Op0)
|
||||
__field(u8, Op1)
|
||||
__field(u8, CRn)
|
||||
__field(u8, CRm)
|
||||
__field(u8, Op2)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vcpu_pc = vcpu_pc;
|
||||
__entry->is_write = params->is_write;
|
||||
__entry->name = reg->name;
|
||||
__entry->Op0 = reg->Op0;
|
||||
__entry->Op0 = reg->Op0;
|
||||
__entry->Op1 = reg->Op1;
|
||||
__entry->CRn = reg->CRn;
|
||||
__entry->CRm = reg->CRm;
|
||||
__entry->Op2 = reg->Op2;
|
||||
),
|
||||
|
||||
TP_printk("PC: %lx %s (%d,%d,%d,%d,%d) %s",
|
||||
__entry->vcpu_pc, __entry->name ?: "UNKN",
|
||||
__entry->Op0, __entry->Op1, __entry->CRn,
|
||||
__entry->CRm, __entry->Op2,
|
||||
__entry->is_write ? "write" : "read")
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_set_guest_debug,
|
||||
TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
|
||||
TP_ARGS(vcpu, guest_debug),
|
||||
|
@ -936,7 +936,7 @@ enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu,
|
||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
int kvm_unmap_hva_range(struct kvm *kvm,
|
||||
unsigned long start, unsigned long end);
|
||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||
int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
|
||||
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
||||
|
||||
|
@ -1004,14 +1004,37 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
|
||||
{
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
bool is_dirty = false;
|
||||
bool flush = false;
|
||||
int r;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
r = kvm_get_dirty_log_protect(kvm, log, &is_dirty);
|
||||
r = kvm_get_dirty_log_protect(kvm, log, &flush);
|
||||
|
||||
if (is_dirty) {
|
||||
if (flush) {
|
||||
slots = kvm_memslots(kvm);
|
||||
memslot = id_to_memslot(slots, log->slot);
|
||||
|
||||
/* Let implementation handle TLB/GVA invalidation */
|
||||
kvm_mips_callbacks->flush_shadow_memslot(kvm, memslot);
|
||||
}
|
||||
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
|
||||
{
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
bool flush = false;
|
||||
int r;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
r = kvm_clear_dirty_log_protect(kvm, log, &flush);
|
||||
|
||||
if (flush) {
|
||||
slots = kvm_memslots(kvm);
|
||||
memslot = id_to_memslot(slots, log->slot);
|
||||
|
||||
|
@ -551,7 +551,7 @@ static int kvm_set_spte_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
|
||||
(pte_dirty(old_pte) && !pte_dirty(hva_pte));
|
||||
}
|
||||
|
||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
|
||||
int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
|
||||
{
|
||||
unsigned long end = hva + PAGE_SIZE;
|
||||
int ret;
|
||||
@ -559,6 +559,7 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
|
||||
ret = handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &pte);
|
||||
if (ret)
|
||||
kvm_mips_callbacks->flush_shadow_all(kvm);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
|
||||
|
@ -335,6 +335,7 @@
|
||||
#define H_SET_PARTITION_TABLE 0xF800
|
||||
#define H_ENTER_NESTED 0xF804
|
||||
#define H_TLB_INVALIDATE 0xF808
|
||||
#define H_COPY_TOFROM_GUEST 0xF80C
|
||||
|
||||
/* Values for 2nd argument to H_SET_MODE */
|
||||
#define H_SET_MODE_RESOURCE_SET_CIABR 1
|
||||
|
@ -188,6 +188,13 @@ extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc);
|
||||
extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run,
|
||||
struct kvm_vcpu *vcpu,
|
||||
unsigned long ea, unsigned long dsisr);
|
||||
extern unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
|
||||
gva_t eaddr, void *to, void *from,
|
||||
unsigned long n);
|
||||
extern long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
void *to, unsigned long n);
|
||||
extern long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
void *from, unsigned long n);
|
||||
extern int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
struct kvmppc_pte *gpte, u64 root,
|
||||
u64 *pte_ret_p);
|
||||
@ -196,8 +203,11 @@ extern int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
int table_index, u64 *pte_ret_p);
|
||||
extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
struct kvmppc_pte *gpte, bool data, bool iswrite);
|
||||
extern void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
|
||||
unsigned int pshift, unsigned int lpid);
|
||||
extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa,
|
||||
unsigned int shift, struct kvm_memory_slot *memslot,
|
||||
unsigned int shift,
|
||||
const struct kvm_memory_slot *memslot,
|
||||
unsigned int lpid);
|
||||
extern bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable,
|
||||
bool writing, unsigned long gpa,
|
||||
@ -215,16 +225,14 @@ extern int kvmppc_radix_init(void);
|
||||
extern void kvmppc_radix_exit(void);
|
||||
extern int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long gfn);
|
||||
extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte,
|
||||
unsigned long gpa, unsigned int shift,
|
||||
struct kvm_memory_slot *memslot,
|
||||
unsigned int lpid);
|
||||
extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long gfn);
|
||||
extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long gfn);
|
||||
extern long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot, unsigned long *map);
|
||||
extern void kvmppc_radix_flush_memslot(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *memslot);
|
||||
extern int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info *info);
|
||||
|
||||
/* XXX remove this export when load_last_inst() is generic */
|
||||
@ -242,7 +250,7 @@ extern kvm_pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
bool writing, bool *writable);
|
||||
extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
|
||||
unsigned long *rmap, long pte_index, int realmode);
|
||||
extern void kvmppc_update_dirty_map(struct kvm_memory_slot *memslot,
|
||||
extern void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot,
|
||||
unsigned long gfn, unsigned long psize);
|
||||
extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
|
||||
unsigned long pte_index);
|
||||
@ -298,6 +306,7 @@ long kvmhv_nested_init(void);
|
||||
void kvmhv_nested_exit(void);
|
||||
void kvmhv_vm_nested_init(struct kvm *kvm);
|
||||
long kvmhv_set_partition_table(struct kvm_vcpu *vcpu);
|
||||
long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu);
|
||||
void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
|
||||
void kvmhv_release_all_nested(struct kvm *kvm);
|
||||
long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
|
||||
@ -307,7 +316,7 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu,
|
||||
void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
|
||||
void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
|
||||
struct hv_guest_state *hr);
|
||||
long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu);
|
||||
long int kvmhv_nested_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu);
|
||||
|
||||
void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
|
||||
|
||||
|
@ -55,6 +55,7 @@ struct kvm_nested_guest {
|
||||
cpumask_t need_tlb_flush;
|
||||
cpumask_t cpu_in_guest;
|
||||
short prev_cpu[NR_CPUS];
|
||||
u8 radix; /* is this nested guest radix */
|
||||
};
|
||||
|
||||
/*
|
||||
@ -150,6 +151,18 @@ static inline bool kvm_is_radix(struct kvm *kvm)
|
||||
return kvm->arch.radix;
|
||||
}
|
||||
|
||||
static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
bool radix;
|
||||
|
||||
if (vcpu->arch.nested)
|
||||
radix = vcpu->arch.nested->radix;
|
||||
else
|
||||
radix = kvm_is_radix(vcpu->kvm);
|
||||
|
||||
return radix;
|
||||
}
|
||||
|
||||
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
|
||||
#endif
|
||||
|
||||
@ -624,8 +637,11 @@ extern int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
|
||||
unsigned long *rmapp, struct rmap_nested **n_rmap);
|
||||
extern void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp,
|
||||
struct rmap_nested **n_rmap);
|
||||
extern void kvmhv_update_nest_rmap_rc_list(struct kvm *kvm, unsigned long *rmapp,
|
||||
unsigned long clr, unsigned long set,
|
||||
unsigned long hpa, unsigned long nbytes);
|
||||
extern void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
const struct kvm_memory_slot *memslot,
|
||||
unsigned long gpa, unsigned long hpa,
|
||||
unsigned long nbytes);
|
||||
|
||||
|
@ -72,7 +72,7 @@ extern int kvm_unmap_hva_range(struct kvm *kvm,
|
||||
unsigned long start, unsigned long end);
|
||||
extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
|
||||
extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
||||
extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||
extern int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||
|
||||
#define HPTEG_CACHE_NUM (1 << 15)
|
||||
#define HPTEG_HASH_BITS_PTE 13
|
||||
@ -793,6 +793,7 @@ struct kvm_vcpu_arch {
|
||||
/* For support of nested guests */
|
||||
struct kvm_nested_guest *nested;
|
||||
u32 nested_vcpu_id;
|
||||
gpa_t nested_io_gpr;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
|
||||
@ -827,6 +828,8 @@ struct kvm_vcpu_arch {
|
||||
#define KVM_MMIO_REG_FQPR 0x00c0
|
||||
#define KVM_MMIO_REG_VSX 0x0100
|
||||
#define KVM_MMIO_REG_VMX 0x0180
|
||||
#define KVM_MMIO_REG_NESTED_GPR 0xffc0
|
||||
|
||||
|
||||
#define __KVM_HAVE_ARCH_WQP
|
||||
#define __KVM_HAVE_CREATE_DEVICE
|
||||
|
@ -224,7 +224,8 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
|
||||
extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new);
|
||||
const struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change);
|
||||
extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
|
||||
struct kvm_ppc_smmu_info *info);
|
||||
extern void kvmppc_core_flush_memslot(struct kvm *kvm,
|
||||
@ -294,7 +295,8 @@ struct kvmppc_ops {
|
||||
void (*commit_memory_region)(struct kvm *kvm,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new);
|
||||
const struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change);
|
||||
int (*unmap_hva_range)(struct kvm *kvm, unsigned long start,
|
||||
unsigned long end);
|
||||
int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end);
|
||||
@ -326,6 +328,10 @@ struct kvmppc_ops {
|
||||
unsigned long flags);
|
||||
void (*giveup_ext)(struct kvm_vcpu *vcpu, ulong msr);
|
||||
int (*enable_nested)(struct kvm *kvm);
|
||||
int (*load_from_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
|
||||
int size);
|
||||
int (*store_to_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
|
||||
int size);
|
||||
};
|
||||
|
||||
extern struct kvmppc_ops *kvmppc_hv_ops;
|
||||
|
@ -995,7 +995,16 @@ EXC_COMMON_BEGIN(h_data_storage_common)
|
||||
bl save_nvgprs
|
||||
RECONCILE_IRQ_STATE(r10, r11)
|
||||
addi r3,r1,STACK_FRAME_OVERHEAD
|
||||
BEGIN_MMU_FTR_SECTION
|
||||
ld r4,PACA_EXGEN+EX_DAR(r13)
|
||||
lwz r5,PACA_EXGEN+EX_DSISR(r13)
|
||||
std r4,_DAR(r1)
|
||||
std r5,_DSISR(r1)
|
||||
li r5,SIGSEGV
|
||||
bl bad_page_fault
|
||||
MMU_FTR_SECTION_ELSE
|
||||
bl unknown_exception
|
||||
ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
|
||||
b ret_from_except
|
||||
|
||||
|
||||
|
@ -830,9 +830,10 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
|
||||
void kvmppc_core_commit_memory_region(struct kvm *kvm,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new)
|
||||
const struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new);
|
||||
kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new, change);
|
||||
}
|
||||
|
||||
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||
@ -850,9 +851,10 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
|
||||
return kvm->arch.kvm_ops->test_age_hva(kvm, hva);
|
||||
}
|
||||
|
||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
|
||||
int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
|
||||
{
|
||||
kvm->arch.kvm_ops->set_spte_hva(kvm, hva, pte);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
|
||||
|
@ -743,12 +743,15 @@ void kvmppc_rmap_reset(struct kvm *kvm)
|
||||
srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
slots = kvm_memslots(kvm);
|
||||
kvm_for_each_memslot(memslot, slots) {
|
||||
/* Mutual exclusion with kvm_unmap_hva_range etc. */
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
/*
|
||||
* This assumes it is acceptable to lose reference and
|
||||
* change bits across a reset.
|
||||
*/
|
||||
memset(memslot->arch.rmap, 0,
|
||||
memslot->npages * sizeof(*memslot->arch.rmap));
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
||||
}
|
||||
@ -896,11 +899,12 @@ void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
|
||||
|
||||
gfn = memslot->base_gfn;
|
||||
rmapp = memslot->arch.rmap;
|
||||
if (kvm_is_radix(kvm)) {
|
||||
kvmppc_radix_flush_memslot(kvm, memslot);
|
||||
return;
|
||||
}
|
||||
|
||||
for (n = memslot->npages; n; --n, ++gfn) {
|
||||
if (kvm_is_radix(kvm)) {
|
||||
kvm_unmap_radix(kvm, memslot, gfn);
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* Testing the present bit without locking is OK because
|
||||
* the memslot has been marked invalid already, and hence
|
||||
|
@ -29,6 +29,103 @@
|
||||
*/
|
||||
static int p9_supported_radix_bits[4] = { 5, 9, 9, 13 };
|
||||
|
||||
unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
|
||||
gva_t eaddr, void *to, void *from,
|
||||
unsigned long n)
|
||||
{
|
||||
unsigned long quadrant, ret = n;
|
||||
int old_pid, old_lpid;
|
||||
bool is_load = !!to;
|
||||
|
||||
/* Can't access quadrants 1 or 2 in non-HV mode, call the HV to do it */
|
||||
if (kvmhv_on_pseries())
|
||||
return plpar_hcall_norets(H_COPY_TOFROM_GUEST, lpid, pid, eaddr,
|
||||
__pa(to), __pa(from), n);
|
||||
|
||||
quadrant = 1;
|
||||
if (!pid)
|
||||
quadrant = 2;
|
||||
if (is_load)
|
||||
from = (void *) (eaddr | (quadrant << 62));
|
||||
else
|
||||
to = (void *) (eaddr | (quadrant << 62));
|
||||
|
||||
preempt_disable();
|
||||
|
||||
/* switch the lpid first to avoid running host with unallocated pid */
|
||||
old_lpid = mfspr(SPRN_LPID);
|
||||
if (old_lpid != lpid)
|
||||
mtspr(SPRN_LPID, lpid);
|
||||
if (quadrant == 1) {
|
||||
old_pid = mfspr(SPRN_PID);
|
||||
if (old_pid != pid)
|
||||
mtspr(SPRN_PID, pid);
|
||||
}
|
||||
isync();
|
||||
|
||||
pagefault_disable();
|
||||
if (is_load)
|
||||
ret = raw_copy_from_user(to, from, n);
|
||||
else
|
||||
ret = raw_copy_to_user(to, from, n);
|
||||
pagefault_enable();
|
||||
|
||||
/* switch the pid first to avoid running host with unallocated pid */
|
||||
if (quadrant == 1 && pid != old_pid)
|
||||
mtspr(SPRN_PID, old_pid);
|
||||
if (lpid != old_lpid)
|
||||
mtspr(SPRN_LPID, old_lpid);
|
||||
isync();
|
||||
|
||||
preempt_enable();
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__kvmhv_copy_tofrom_guest_radix);
|
||||
|
||||
static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
void *to, void *from, unsigned long n)
|
||||
{
|
||||
int lpid = vcpu->kvm->arch.lpid;
|
||||
int pid = vcpu->arch.pid;
|
||||
|
||||
/* This would cause a data segment intr so don't allow the access */
|
||||
if (eaddr & (0x3FFUL << 52))
|
||||
return -EINVAL;
|
||||
|
||||
/* Should we be using the nested lpid */
|
||||
if (vcpu->arch.nested)
|
||||
lpid = vcpu->arch.nested->shadow_lpid;
|
||||
|
||||
/* If accessing quadrant 3 then pid is expected to be 0 */
|
||||
if (((eaddr >> 62) & 0x3) == 0x3)
|
||||
pid = 0;
|
||||
|
||||
eaddr &= ~(0xFFFUL << 52);
|
||||
|
||||
return __kvmhv_copy_tofrom_guest_radix(lpid, pid, eaddr, to, from, n);
|
||||
}
|
||||
|
||||
long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *to,
|
||||
unsigned long n)
|
||||
{
|
||||
long ret;
|
||||
|
||||
ret = kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, to, NULL, n);
|
||||
if (ret > 0)
|
||||
memset(to + (n - ret), 0, ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvmhv_copy_from_guest_radix);
|
||||
|
||||
long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *from,
|
||||
unsigned long n)
|
||||
{
|
||||
return kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, NULL, from, n);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvmhv_copy_to_guest_radix);
|
||||
|
||||
int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
struct kvmppc_pte *gpte, u64 root,
|
||||
u64 *pte_ret_p)
|
||||
@ -197,8 +294,8 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
|
||||
unsigned int pshift, unsigned int lpid)
|
||||
void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
|
||||
unsigned int pshift, unsigned int lpid)
|
||||
{
|
||||
unsigned long psize = PAGE_SIZE;
|
||||
int psi;
|
||||
@ -284,7 +381,8 @@ static void kvmppc_pmd_free(pmd_t *pmdp)
|
||||
|
||||
/* Called with kvm->mmu_lock held */
|
||||
void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa,
|
||||
unsigned int shift, struct kvm_memory_slot *memslot,
|
||||
unsigned int shift,
|
||||
const struct kvm_memory_slot *memslot,
|
||||
unsigned int lpid)
|
||||
|
||||
{
|
||||
@ -683,6 +781,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
|
||||
pte_t pte, *ptep;
|
||||
unsigned int shift, level;
|
||||
int ret;
|
||||
bool large_enable;
|
||||
|
||||
/* used to check for invalidations in progress */
|
||||
mmu_seq = kvm->mmu_notifier_seq;
|
||||
@ -732,12 +831,15 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
|
||||
pte = *ptep;
|
||||
local_irq_enable();
|
||||
|
||||
/* If we're logging dirty pages, always map single pages */
|
||||
large_enable = !(memslot->flags & KVM_MEM_LOG_DIRTY_PAGES);
|
||||
|
||||
/* Get pte level from shift/size */
|
||||
if (shift == PUD_SHIFT &&
|
||||
if (large_enable && shift == PUD_SHIFT &&
|
||||
(gpa & (PUD_SIZE - PAGE_SIZE)) ==
|
||||
(hva & (PUD_SIZE - PAGE_SIZE))) {
|
||||
level = 2;
|
||||
} else if (shift == PMD_SHIFT &&
|
||||
} else if (large_enable && shift == PMD_SHIFT &&
|
||||
(gpa & (PMD_SIZE - PAGE_SIZE)) ==
|
||||
(hva & (PMD_SIZE - PAGE_SIZE))) {
|
||||
level = 1;
|
||||
@ -857,7 +959,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Called with kvm->lock held */
|
||||
/* Called with kvm->mmu_lock held */
|
||||
int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long gfn)
|
||||
{
|
||||
@ -872,7 +974,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Called with kvm->lock held */
|
||||
/* Called with kvm->mmu_lock held */
|
||||
int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long gfn)
|
||||
{
|
||||
@ -880,18 +982,24 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long gpa = gfn << PAGE_SHIFT;
|
||||
unsigned int shift;
|
||||
int ref = 0;
|
||||
unsigned long old, *rmapp;
|
||||
|
||||
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
|
||||
if (ptep && pte_present(*ptep) && pte_young(*ptep)) {
|
||||
kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
|
||||
gpa, shift);
|
||||
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
|
||||
gpa, shift);
|
||||
/* XXX need to flush tlb here? */
|
||||
/* Also clear bit in ptes in shadow pgtable for nested guests */
|
||||
rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
|
||||
kvmhv_update_nest_rmap_rc_list(kvm, rmapp, _PAGE_ACCESSED, 0,
|
||||
old & PTE_RPN_MASK,
|
||||
1UL << shift);
|
||||
ref = 1;
|
||||
}
|
||||
return ref;
|
||||
}
|
||||
|
||||
/* Called with kvm->lock held */
|
||||
/* Called with kvm->mmu_lock held */
|
||||
int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
||||
unsigned long gfn)
|
||||
{
|
||||
@ -915,15 +1023,23 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm,
|
||||
pte_t *ptep;
|
||||
unsigned int shift;
|
||||
int ret = 0;
|
||||
unsigned long old, *rmapp;
|
||||
|
||||
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
|
||||
if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) {
|
||||
ret = 1;
|
||||
if (shift)
|
||||
ret = 1 << (shift - PAGE_SHIFT);
|
||||
kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0,
|
||||
gpa, shift);
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0,
|
||||
gpa, shift);
|
||||
kvmppc_radix_tlbie_page(kvm, gpa, shift, kvm->arch.lpid);
|
||||
/* Also clear bit in ptes in shadow pgtable for nested guests */
|
||||
rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
|
||||
kvmhv_update_nest_rmap_rc_list(kvm, rmapp, _PAGE_DIRTY, 0,
|
||||
old & PTE_RPN_MASK,
|
||||
1UL << shift);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@ -953,6 +1069,26 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvmppc_radix_flush_memslot(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *memslot)
|
||||
{
|
||||
unsigned long n;
|
||||
pte_t *ptep;
|
||||
unsigned long gpa;
|
||||
unsigned int shift;
|
||||
|
||||
gpa = memslot->base_gfn << PAGE_SHIFT;
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
for (n = memslot->npages; n; --n) {
|
||||
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
|
||||
if (ptep && pte_present(*ptep))
|
||||
kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
|
||||
kvm->arch.lpid);
|
||||
gpa += PAGE_SIZE;
|
||||
}
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
static void add_rmmu_ap_encoding(struct kvm_ppc_rmmu_info *info,
|
||||
int psize, int *indexp)
|
||||
{
|
||||
|
@ -985,6 +985,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
|
||||
kvmppc_set_gpr(vcpu, 3, 0);
|
||||
vcpu->arch.hcall_needed = 0;
|
||||
return -EINTR;
|
||||
} else if (ret == H_TOO_HARD) {
|
||||
kvmppc_set_gpr(vcpu, 3, 0);
|
||||
vcpu->arch.hcall_needed = 0;
|
||||
return RESUME_HOST;
|
||||
}
|
||||
break;
|
||||
case H_TLB_INVALIDATE:
|
||||
@ -992,7 +996,11 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
|
||||
if (nesting_enabled(vcpu->kvm))
|
||||
ret = kvmhv_do_nested_tlbie(vcpu);
|
||||
break;
|
||||
|
||||
case H_COPY_TOFROM_GUEST:
|
||||
ret = H_FUNCTION;
|
||||
if (nesting_enabled(vcpu->kvm))
|
||||
ret = kvmhv_copy_tofrom_guest_nested(vcpu);
|
||||
break;
|
||||
default:
|
||||
return RESUME_HOST;
|
||||
}
|
||||
@ -1336,7 +1344,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
||||
return r;
|
||||
}
|
||||
|
||||
static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
|
||||
static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int r;
|
||||
int srcu_idx;
|
||||
@ -1394,7 +1402,7 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
case BOOK3S_INTERRUPT_H_DATA_STORAGE:
|
||||
srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
r = kvmhv_nested_page_fault(vcpu);
|
||||
r = kvmhv_nested_page_fault(run, vcpu);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
|
||||
break;
|
||||
case BOOK3S_INTERRUPT_H_INST_STORAGE:
|
||||
@ -1404,7 +1412,7 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
|
||||
if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
|
||||
vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
|
||||
srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
r = kvmhv_nested_page_fault(vcpu);
|
||||
r = kvmhv_nested_page_fault(run, vcpu);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
|
||||
break;
|
||||
|
||||
@ -4059,7 +4067,7 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run,
|
||||
if (!nested)
|
||||
r = kvmppc_handle_exit_hv(kvm_run, vcpu, current);
|
||||
else
|
||||
r = kvmppc_handle_nested_exit(vcpu);
|
||||
r = kvmppc_handle_nested_exit(kvm_run, vcpu);
|
||||
}
|
||||
vcpu->arch.ret = r;
|
||||
|
||||
@ -4371,7 +4379,8 @@ static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
|
||||
static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new)
|
||||
const struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
unsigned long npages = mem->memory_size >> PAGE_SHIFT;
|
||||
|
||||
@ -4383,6 +4392,23 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
|
||||
*/
|
||||
if (npages)
|
||||
atomic64_inc(&kvm->arch.mmio_update);
|
||||
|
||||
/*
|
||||
* For change == KVM_MR_MOVE or KVM_MR_DELETE, higher levels
|
||||
* have already called kvm_arch_flush_shadow_memslot() to
|
||||
* flush shadow mappings. For KVM_MR_CREATE we have no
|
||||
* previous mappings. So the only case to handle is
|
||||
* KVM_MR_FLAGS_ONLY when the KVM_MEM_LOG_DIRTY_PAGES bit
|
||||
* has been changed.
|
||||
* For radix guests, we flush on setting KVM_MEM_LOG_DIRTY_PAGES
|
||||
* to get rid of any THP PTEs in the partition-scoped page tables
|
||||
* so we can track dirtiness at the page level; we flush when
|
||||
* clearing KVM_MEM_LOG_DIRTY_PAGES so that we can go back to
|
||||
* using THP PTEs.
|
||||
*/
|
||||
if (change == KVM_MR_FLAGS_ONLY && kvm_is_radix(kvm) &&
|
||||
((new->flags ^ old->flags) & KVM_MEM_LOG_DIRTY_PAGES))
|
||||
kvmppc_radix_flush_memslot(kvm, old);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4532,12 +4558,15 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
|
||||
{
|
||||
if (nesting_enabled(kvm))
|
||||
kvmhv_release_all_nested(kvm);
|
||||
kvmppc_rmap_reset(kvm);
|
||||
kvm->arch.process_table = 0;
|
||||
/* Mutual exclusion with kvm_unmap_hva_range etc. */
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
kvm->arch.radix = 0;
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
kvmppc_free_radix(kvm);
|
||||
kvmppc_update_lpcr(kvm, LPCR_VPM1,
|
||||
LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
|
||||
kvmppc_rmap_reset(kvm);
|
||||
kvm->arch.radix = 0;
|
||||
kvm->arch.process_table = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -4549,12 +4578,14 @@ int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
|
||||
err = kvmppc_init_vm_radix(kvm);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
kvmppc_rmap_reset(kvm);
|
||||
/* Mutual exclusion with kvm_unmap_hva_range etc. */
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
kvm->arch.radix = 1;
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
kvmppc_free_hpt(&kvm->arch.hpt);
|
||||
kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR,
|
||||
LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
|
||||
kvmppc_rmap_reset(kvm);
|
||||
kvm->arch.radix = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -5214,6 +5245,44 @@ static int kvmhv_enable_nested(struct kvm *kvm)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvmhv_load_from_eaddr(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
|
||||
int size)
|
||||
{
|
||||
int rc = -EINVAL;
|
||||
|
||||
if (kvmhv_vcpu_is_radix(vcpu)) {
|
||||
rc = kvmhv_copy_from_guest_radix(vcpu, *eaddr, ptr, size);
|
||||
|
||||
if (rc > 0)
|
||||
rc = -EINVAL;
|
||||
}
|
||||
|
||||
/* For now quadrants are the only way to access nested guest memory */
|
||||
if (rc && vcpu->arch.nested)
|
||||
rc = -EAGAIN;
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int kvmhv_store_to_eaddr(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
|
||||
int size)
|
||||
{
|
||||
int rc = -EINVAL;
|
||||
|
||||
if (kvmhv_vcpu_is_radix(vcpu)) {
|
||||
rc = kvmhv_copy_to_guest_radix(vcpu, *eaddr, ptr, size);
|
||||
|
||||
if (rc > 0)
|
||||
rc = -EINVAL;
|
||||
}
|
||||
|
||||
/* For now quadrants are the only way to access nested guest memory */
|
||||
if (rc && vcpu->arch.nested)
|
||||
rc = -EAGAIN;
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static struct kvmppc_ops kvm_ops_hv = {
|
||||
.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
|
||||
.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
|
||||
@ -5254,6 +5323,8 @@ static struct kvmppc_ops kvm_ops_hv = {
|
||||
.get_rmmu_info = kvmhv_get_rmmu_info,
|
||||
.set_smt_mode = kvmhv_set_smt_mode,
|
||||
.enable_nested = kvmhv_enable_nested,
|
||||
.load_from_eaddr = kvmhv_load_from_eaddr,
|
||||
.store_to_eaddr = kvmhv_store_to_eaddr,
|
||||
};
|
||||
|
||||
static int kvm_init_subcore_bitmap(void)
|
||||
|
@ -195,6 +195,26 @@ void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
|
||||
vcpu->arch.ppr = hr->ppr;
|
||||
}
|
||||
|
||||
static void kvmhv_nested_mmio_needed(struct kvm_vcpu *vcpu, u64 regs_ptr)
|
||||
{
|
||||
/* No need to reflect the page fault to L1, we've handled it */
|
||||
vcpu->arch.trap = 0;
|
||||
|
||||
/*
|
||||
* Since the L2 gprs have already been written back into L1 memory when
|
||||
* we complete the mmio, store the L1 memory location of the L2 gpr
|
||||
* being loaded into by the mmio so that the loaded value can be
|
||||
* written there in kvmppc_complete_mmio_load()
|
||||
*/
|
||||
if (((vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) == KVM_MMIO_REG_GPR)
|
||||
&& (vcpu->mmio_is_write == 0)) {
|
||||
vcpu->arch.nested_io_gpr = (gpa_t) regs_ptr +
|
||||
offsetof(struct pt_regs,
|
||||
gpr[vcpu->arch.io_gpr]);
|
||||
vcpu->arch.io_gpr = KVM_MMIO_REG_NESTED_GPR;
|
||||
}
|
||||
}
|
||||
|
||||
long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
long int err, r;
|
||||
@ -316,6 +336,11 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
|
||||
if (r == -EINTR)
|
||||
return H_INTERRUPT;
|
||||
|
||||
if (vcpu->mmio_needed) {
|
||||
kvmhv_nested_mmio_needed(vcpu, regs_ptr);
|
||||
return H_TOO_HARD;
|
||||
}
|
||||
|
||||
return vcpu->arch.trap;
|
||||
}
|
||||
|
||||
@ -436,6 +461,81 @@ long kvmhv_set_partition_table(struct kvm_vcpu *vcpu)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle the H_COPY_TOFROM_GUEST hcall.
|
||||
* r4 = L1 lpid of nested guest
|
||||
* r5 = pid
|
||||
* r6 = eaddr to access
|
||||
* r7 = to buffer (L1 gpa)
|
||||
* r8 = from buffer (L1 gpa)
|
||||
* r9 = n bytes to copy
|
||||
*/
|
||||
long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_nested_guest *gp;
|
||||
int l1_lpid = kvmppc_get_gpr(vcpu, 4);
|
||||
int pid = kvmppc_get_gpr(vcpu, 5);
|
||||
gva_t eaddr = kvmppc_get_gpr(vcpu, 6);
|
||||
gpa_t gp_to = (gpa_t) kvmppc_get_gpr(vcpu, 7);
|
||||
gpa_t gp_from = (gpa_t) kvmppc_get_gpr(vcpu, 8);
|
||||
void *buf;
|
||||
unsigned long n = kvmppc_get_gpr(vcpu, 9);
|
||||
bool is_load = !!gp_to;
|
||||
long rc;
|
||||
|
||||
if (gp_to && gp_from) /* One must be NULL to determine the direction */
|
||||
return H_PARAMETER;
|
||||
|
||||
if (eaddr & (0xFFFUL << 52))
|
||||
return H_PARAMETER;
|
||||
|
||||
buf = kzalloc(n, GFP_KERNEL);
|
||||
if (!buf)
|
||||
return H_NO_MEM;
|
||||
|
||||
gp = kvmhv_get_nested(vcpu->kvm, l1_lpid, false);
|
||||
if (!gp) {
|
||||
rc = H_PARAMETER;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
mutex_lock(&gp->tlb_lock);
|
||||
|
||||
if (is_load) {
|
||||
/* Load from the nested guest into our buffer */
|
||||
rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid,
|
||||
eaddr, buf, NULL, n);
|
||||
if (rc)
|
||||
goto not_found;
|
||||
|
||||
/* Write what was loaded into our buffer back to the L1 guest */
|
||||
rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n);
|
||||
if (rc)
|
||||
goto not_found;
|
||||
} else {
|
||||
/* Load the data to be stored from the L1 guest into our buf */
|
||||
rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n);
|
||||
if (rc)
|
||||
goto not_found;
|
||||
|
||||
/* Store from our buffer into the nested guest */
|
||||
rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid,
|
||||
eaddr, NULL, buf, n);
|
||||
if (rc)
|
||||
goto not_found;
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&gp->tlb_lock);
|
||||
kvmhv_put_nested(gp);
|
||||
out_free:
|
||||
kfree(buf);
|
||||
return rc;
|
||||
not_found:
|
||||
rc = H_NOT_FOUND;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reload the partition table entry for a guest.
|
||||
* Caller must hold gp->tlb_lock.
|
||||
@ -480,6 +580,7 @@ struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid)
|
||||
if (shadow_lpid < 0)
|
||||
goto out_free2;
|
||||
gp->shadow_lpid = shadow_lpid;
|
||||
gp->radix = 1;
|
||||
|
||||
memset(gp->prev_cpu, -1, sizeof(gp->prev_cpu));
|
||||
|
||||
@ -687,6 +788,57 @@ void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp,
|
||||
*n_rmap = NULL;
|
||||
}
|
||||
|
||||
static void kvmhv_update_nest_rmap_rc(struct kvm *kvm, u64 n_rmap,
|
||||
unsigned long clr, unsigned long set,
|
||||
unsigned long hpa, unsigned long mask)
|
||||
{
|
||||
struct kvm_nested_guest *gp;
|
||||
unsigned long gpa;
|
||||
unsigned int shift, lpid;
|
||||
pte_t *ptep;
|
||||
|
||||
gpa = n_rmap & RMAP_NESTED_GPA_MASK;
|
||||
lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT;
|
||||
gp = kvmhv_find_nested(kvm, lpid);
|
||||
if (!gp)
|
||||
return;
|
||||
|
||||
/* Find the pte */
|
||||
ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift);
|
||||
/*
|
||||
* If the pte is present and the pfn is still the same, update the pte.
|
||||
* If the pfn has changed then this is a stale rmap entry, the nested
|
||||
* gpa actually points somewhere else now, and there is nothing to do.
|
||||
* XXX A future optimisation would be to remove the rmap entry here.
|
||||
*/
|
||||
if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa)) {
|
||||
__radix_pte_update(ptep, clr, set);
|
||||
kvmppc_radix_tlbie_page(kvm, gpa, shift, lpid);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* For a given list of rmap entries, update the rc bits in all ptes in shadow
|
||||
* page tables for nested guests which are referenced by the rmap list.
|
||||
*/
|
||||
void kvmhv_update_nest_rmap_rc_list(struct kvm *kvm, unsigned long *rmapp,
|
||||
unsigned long clr, unsigned long set,
|
||||
unsigned long hpa, unsigned long nbytes)
|
||||
{
|
||||
struct llist_node *entry = ((struct llist_head *) rmapp)->first;
|
||||
struct rmap_nested *cursor;
|
||||
unsigned long rmap, mask;
|
||||
|
||||
if ((clr | set) & ~(_PAGE_DIRTY | _PAGE_ACCESSED))
|
||||
return;
|
||||
|
||||
mask = PTE_RPN_MASK & ~(nbytes - 1);
|
||||
hpa &= mask;
|
||||
|
||||
for_each_nest_rmap_safe(cursor, entry, &rmap)
|
||||
kvmhv_update_nest_rmap_rc(kvm, rmap, clr, set, hpa, mask);
|
||||
}
|
||||
|
||||
static void kvmhv_remove_nest_rmap(struct kvm *kvm, u64 n_rmap,
|
||||
unsigned long hpa, unsigned long mask)
|
||||
{
|
||||
@ -723,7 +875,7 @@ static void kvmhv_remove_nest_rmap_list(struct kvm *kvm, unsigned long *rmapp,
|
||||
|
||||
/* called with kvm->mmu_lock held */
|
||||
void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
const struct kvm_memory_slot *memslot,
|
||||
unsigned long gpa, unsigned long hpa,
|
||||
unsigned long nbytes)
|
||||
{
|
||||
@ -1049,7 +1201,7 @@ static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu,
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
bool writing = !!(dsisr & DSISR_ISSTORE);
|
||||
u64 pgflags;
|
||||
bool ret;
|
||||
long ret;
|
||||
|
||||
/* Are the rc bits set in the L1 partition scoped pte? */
|
||||
pgflags = _PAGE_ACCESSED;
|
||||
@ -1062,16 +1214,22 @@ static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu,
|
||||
/* Set the rc bit in the pte of our (L0) pgtable for the L1 guest */
|
||||
ret = kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable, writing,
|
||||
gpte.raddr, kvm->arch.lpid);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
if (!ret)
|
||||
return -EINVAL;
|
||||
if (!ret) {
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* Set the rc bit in the pte of the shadow_pgtable for the nest guest */
|
||||
ret = kvmppc_hv_handle_set_rc(kvm, gp->shadow_pgtable, writing, n_gpa,
|
||||
gp->shadow_lpid);
|
||||
if (!ret)
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
ret = -EINVAL;
|
||||
else
|
||||
ret = 0;
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int kvmppc_radix_level_to_shift(int level)
|
||||
@ -1099,7 +1257,8 @@ static inline int kvmppc_radix_shift_to_level(int shift)
|
||||
}
|
||||
|
||||
/* called with gp->tlb_lock held */
|
||||
static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
|
||||
static long int __kvmhv_nested_page_fault(struct kvm_run *run,
|
||||
struct kvm_vcpu *vcpu,
|
||||
struct kvm_nested_guest *gp)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
@ -1180,9 +1339,9 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
|
||||
kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
|
||||
return RESUME_GUEST;
|
||||
}
|
||||
/* passthrough of emulated MMIO case... */
|
||||
pr_err("emulated MMIO passthrough?\n");
|
||||
return -EINVAL;
|
||||
|
||||
/* passthrough of emulated MMIO case */
|
||||
return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, writing);
|
||||
}
|
||||
if (memslot->flags & KVM_MEM_READONLY) {
|
||||
if (writing) {
|
||||
@ -1220,6 +1379,8 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
|
||||
return ret;
|
||||
shift = kvmppc_radix_level_to_shift(level);
|
||||
}
|
||||
/* Align gfn to the start of the page */
|
||||
gfn = (gpa & ~((1UL << shift) - 1)) >> PAGE_SHIFT;
|
||||
|
||||
/* 3. Compute the pte we need to insert for nest_gpa -> host r_addr */
|
||||
|
||||
@ -1227,6 +1388,9 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
|
||||
perm |= gpte.may_read ? 0UL : _PAGE_READ;
|
||||
perm |= gpte.may_write ? 0UL : _PAGE_WRITE;
|
||||
perm |= gpte.may_execute ? 0UL : _PAGE_EXEC;
|
||||
/* Only set accessed/dirty (rc) bits if set in host and l1 guest ptes */
|
||||
perm |= (gpte.rc & _PAGE_ACCESSED) ? 0UL : _PAGE_ACCESSED;
|
||||
perm |= ((gpte.rc & _PAGE_DIRTY) && writing) ? 0UL : _PAGE_DIRTY;
|
||||
pte = __pte(pte_val(pte) & ~perm);
|
||||
|
||||
/* What size pte can we insert? */
|
||||
@ -1264,13 +1428,13 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
|
||||
return RESUME_GUEST;
|
||||
}
|
||||
|
||||
long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu)
|
||||
long int kvmhv_nested_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_nested_guest *gp = vcpu->arch.nested;
|
||||
long int ret;
|
||||
|
||||
mutex_lock(&gp->tlb_lock);
|
||||
ret = __kvmhv_nested_page_fault(vcpu, gp);
|
||||
ret = __kvmhv_nested_page_fault(run, vcpu, gp);
|
||||
mutex_unlock(&gp->tlb_lock);
|
||||
return ret;
|
||||
}
|
||||
|
@ -107,7 +107,7 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
|
||||
EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
|
||||
|
||||
/* Update the dirty bitmap of a memslot */
|
||||
void kvmppc_update_dirty_map(struct kvm_memory_slot *memslot,
|
||||
void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot,
|
||||
unsigned long gfn, unsigned long psize)
|
||||
{
|
||||
unsigned long npages;
|
||||
|
@ -587,6 +587,7 @@ void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr)
|
||||
case PVR_POWER8:
|
||||
case PVR_POWER8E:
|
||||
case PVR_POWER8NVL:
|
||||
case PVR_POWER9:
|
||||
vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE |
|
||||
BOOK3S_HFLAG_NEW_TLBIE;
|
||||
break;
|
||||
@ -1913,7 +1914,8 @@ static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm,
|
||||
static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new)
|
||||
const struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -1015,17 +1015,7 @@ static int xics_debug_show(struct seq_file *m, void *private)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xics_debug_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, xics_debug_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations xics_debug_fops = {
|
||||
.open = xics_debug_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
DEFINE_SHOW_ATTRIBUTE(xics_debug);
|
||||
|
||||
static void xics_debugfs_init(struct kvmppc_xics *xics)
|
||||
{
|
||||
|
@ -1968,17 +1968,7 @@ static int xive_debug_show(struct seq_file *m, void *private)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xive_debug_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, xive_debug_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations xive_debug_fops = {
|
||||
.open = xive_debug_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
DEFINE_SHOW_ATTRIBUTE(xive_debug);
|
||||
|
||||
static void xive_debugfs_init(struct kvmppc_xive *xive)
|
||||
{
|
||||
|
@ -1833,7 +1833,8 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
|
||||
void kvmppc_core_commit_memory_region(struct kvm *kvm,
|
||||
const struct kvm_userspace_memory_region *mem,
|
||||
const struct kvm_memory_slot *old,
|
||||
const struct kvm_memory_slot *new)
|
||||
const struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -757,10 +757,11 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
|
||||
int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
|
||||
{
|
||||
/* The page will get remapped properly on its next fault */
|
||||
kvm_unmap_hva(kvm, hva);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*****************************************/
|
||||
|
@ -331,10 +331,17 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
|
||||
{
|
||||
ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK;
|
||||
struct kvmppc_pte pte;
|
||||
int r;
|
||||
int r = -EINVAL;
|
||||
|
||||
vcpu->stat.st++;
|
||||
|
||||
if (vcpu->kvm->arch.kvm_ops && vcpu->kvm->arch.kvm_ops->store_to_eaddr)
|
||||
r = vcpu->kvm->arch.kvm_ops->store_to_eaddr(vcpu, eaddr, ptr,
|
||||
size);
|
||||
|
||||
if ((!r) || (r == -EAGAIN))
|
||||
return r;
|
||||
|
||||
r = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST,
|
||||
XLATE_WRITE, &pte);
|
||||
if (r < 0)
|
||||
@ -367,10 +374,17 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
|
||||
{
|
||||
ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK;
|
||||
struct kvmppc_pte pte;
|
||||
int rc;
|
||||
int rc = -EINVAL;
|
||||
|
||||
vcpu->stat.ld++;
|
||||
|
||||
if (vcpu->kvm->arch.kvm_ops && vcpu->kvm->arch.kvm_ops->load_from_eaddr)
|
||||
rc = vcpu->kvm->arch.kvm_ops->load_from_eaddr(vcpu, eaddr, ptr,
|
||||
size);
|
||||
|
||||
if ((!rc) || (rc == -EAGAIN))
|
||||
return rc;
|
||||
|
||||
rc = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST,
|
||||
XLATE_READ, &pte);
|
||||
if (rc)
|
||||
@ -518,7 +532,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_PPC_UNSET_IRQ:
|
||||
case KVM_CAP_PPC_IRQ_LEVEL:
|
||||
case KVM_CAP_ENABLE_CAP:
|
||||
case KVM_CAP_ENABLE_CAP_VM:
|
||||
case KVM_CAP_ONE_REG:
|
||||
case KVM_CAP_IOEVENTFD:
|
||||
case KVM_CAP_DEVICE_CTRL:
|
||||
@ -543,8 +556,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
case KVM_CAP_SPAPR_TCE:
|
||||
case KVM_CAP_SPAPR_TCE_64:
|
||||
/* fallthrough */
|
||||
r = 1;
|
||||
break;
|
||||
case KVM_CAP_SPAPR_TCE_VFIO:
|
||||
r = !!cpu_has_feature(CPU_FTR_HVMODE);
|
||||
break;
|
||||
case KVM_CAP_PPC_RTAS:
|
||||
case KVM_CAP_PPC_FIXUP_HCALL:
|
||||
case KVM_CAP_PPC_ENABLE_HCALL:
|
||||
@ -696,7 +712,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *new,
|
||||
enum kvm_mr_change change)
|
||||
{
|
||||
kvmppc_core_commit_memory_region(kvm, mem, old, new);
|
||||
kvmppc_core_commit_memory_region(kvm, mem, old, new, change);
|
||||
}
|
||||
|
||||
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||
@ -1191,6 +1207,14 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
|
||||
KVMPPC_VMX_COPY_BYTE)
|
||||
kvmppc_set_vmx_byte(vcpu, gpr);
|
||||
break;
|
||||
#endif
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||
case KVM_MMIO_REG_NESTED_GPR:
|
||||
if (kvmppc_need_byteswap(vcpu))
|
||||
gpr = swab64(gpr);
|
||||
kvm_vcpu_write_guest(vcpu, vcpu->arch.nested_io_gpr, &gpr,
|
||||
sizeof(gpr));
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
BUG();
|
||||
@ -2084,8 +2108,8 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
|
||||
}
|
||||
|
||||
|
||||
static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
||||
struct kvm_enable_cap *cap)
|
||||
int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
||||
struct kvm_enable_cap *cap)
|
||||
{
|
||||
int r;
|
||||
|
||||
@ -2273,15 +2297,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
|
||||
break;
|
||||
}
|
||||
case KVM_ENABLE_CAP:
|
||||
{
|
||||
struct kvm_enable_cap cap;
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&cap, argp, sizeof(cap)))
|
||||
goto out;
|
||||
r = kvm_vm_ioctl_enable_cap(kvm, &cap);
|
||||
break;
|
||||
}
|
||||
#ifdef CONFIG_SPAPR_TCE_IOMMU
|
||||
case KVM_CREATE_SPAPR_TCE_64: {
|
||||
struct kvm_create_spapr_tce_64 create_tce_64;
|
||||
|
@ -636,6 +636,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
|
||||
switch (TRAP(regs)) {
|
||||
case 0x300:
|
||||
case 0x380:
|
||||
case 0xe00:
|
||||
printk(KERN_ALERT "Unable to handle kernel paging request for "
|
||||
"data at address 0x%08lx\n", regs->dar);
|
||||
break;
|
||||
|
@ -11,6 +11,9 @@
|
||||
* Jason J. Herne <jjherne@us.ibm.com>
|
||||
*/
|
||||
|
||||
#define KMSG_COMPONENT "kvm-s390"
|
||||
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
|
||||
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/fs.h>
|
||||
@ -44,10 +47,6 @@
|
||||
#include "kvm-s390.h"
|
||||
#include "gaccess.h"
|
||||
|
||||
#define KMSG_COMPONENT "kvm-s390"
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "trace.h"
|
||||
#include "trace-s390.h"
|
||||
@ -417,19 +416,30 @@ static void kvm_s390_cpu_feat_init(void)
|
||||
|
||||
int kvm_arch_init(void *opaque)
|
||||
{
|
||||
int rc;
|
||||
|
||||
kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
|
||||
if (!kvm_s390_dbf)
|
||||
return -ENOMEM;
|
||||
|
||||
if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
|
||||
debug_unregister(kvm_s390_dbf);
|
||||
return -ENOMEM;
|
||||
rc = -ENOMEM;
|
||||
goto out_debug_unreg;
|
||||
}
|
||||
|
||||
kvm_s390_cpu_feat_init();
|
||||
|
||||
/* Register floating interrupt controller interface. */
|
||||
return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
|
||||
rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
|
||||
if (rc) {
|
||||
pr_err("Failed to register FLIC rc=%d\n", rc);
|
||||
goto out_debug_unreg;
|
||||
}
|
||||
return 0;
|
||||
|
||||
out_debug_unreg:
|
||||
debug_unregister(kvm_s390_dbf);
|
||||
return rc;
|
||||
}
|
||||
|
||||
void kvm_arch_exit(void)
|
||||
@ -464,7 +474,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_S390_CSS_SUPPORT:
|
||||
case KVM_CAP_IOEVENTFD:
|
||||
case KVM_CAP_DEVICE_CTRL:
|
||||
case KVM_CAP_ENABLE_CAP_VM:
|
||||
case KVM_CAP_S390_IRQCHIP:
|
||||
case KVM_CAP_VM_ATTRIBUTES:
|
||||
case KVM_CAP_MP_STATE:
|
||||
@ -607,7 +616,7 @@ static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
|
||||
}
|
||||
}
|
||||
|
||||
static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
|
||||
int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
|
||||
{
|
||||
int r;
|
||||
|
||||
@ -1933,14 +1942,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
r = kvm_s390_inject_vm(kvm, &s390int);
|
||||
break;
|
||||
}
|
||||
case KVM_ENABLE_CAP: {
|
||||
struct kvm_enable_cap cap;
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&cap, argp, sizeof(cap)))
|
||||
break;
|
||||
r = kvm_vm_ioctl_enable_cap(kvm, &cap);
|
||||
break;
|
||||
}
|
||||
case KVM_CREATE_IRQCHIP: {
|
||||
struct kvm_irq_routing_entry routing;
|
||||
|
||||
|
@ -68,6 +68,7 @@ static struct pt_cap_desc {
|
||||
PT_CAP(topa_output, 0, CPUID_ECX, BIT(0)),
|
||||
PT_CAP(topa_multiple_entries, 0, CPUID_ECX, BIT(1)),
|
||||
PT_CAP(single_range_output, 0, CPUID_ECX, BIT(2)),
|
||||
PT_CAP(output_subsys, 0, CPUID_ECX, BIT(3)),
|
||||
PT_CAP(payloads_lip, 0, CPUID_ECX, BIT(31)),
|
||||
PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x3),
|
||||
PT_CAP(mtc_periods, 1, CPUID_EAX, 0xffff0000),
|
||||
@ -75,14 +76,21 @@ static struct pt_cap_desc {
|
||||
PT_CAP(psb_periods, 1, CPUID_EBX, 0xffff0000),
|
||||
};
|
||||
|
||||
static u32 pt_cap_get(enum pt_capabilities cap)
|
||||
u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities capability)
|
||||
{
|
||||
struct pt_cap_desc *cd = &pt_caps[cap];
|
||||
u32 c = pt_pmu.caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg];
|
||||
struct pt_cap_desc *cd = &pt_caps[capability];
|
||||
u32 c = caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg];
|
||||
unsigned int shift = __ffs(cd->mask);
|
||||
|
||||
return (c & cd->mask) >> shift;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(intel_pt_validate_cap);
|
||||
|
||||
u32 intel_pt_validate_hw_cap(enum pt_capabilities cap)
|
||||
{
|
||||
return intel_pt_validate_cap(pt_pmu.caps, cap);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(intel_pt_validate_hw_cap);
|
||||
|
||||
static ssize_t pt_cap_show(struct device *cdev,
|
||||
struct device_attribute *attr,
|
||||
@ -92,7 +100,7 @@ static ssize_t pt_cap_show(struct device *cdev,
|
||||
container_of(attr, struct dev_ext_attribute, attr);
|
||||
enum pt_capabilities cap = (long)ea->var;
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%x\n", pt_cap_get(cap));
|
||||
return snprintf(buf, PAGE_SIZE, "%x\n", intel_pt_validate_hw_cap(cap));
|
||||
}
|
||||
|
||||
static struct attribute_group pt_cap_group __ro_after_init = {
|
||||
@ -310,16 +318,16 @@ static bool pt_event_valid(struct perf_event *event)
|
||||
return false;
|
||||
|
||||
if (config & RTIT_CTL_CYC_PSB) {
|
||||
if (!pt_cap_get(PT_CAP_psb_cyc))
|
||||
if (!intel_pt_validate_hw_cap(PT_CAP_psb_cyc))
|
||||
return false;
|
||||
|
||||
allowed = pt_cap_get(PT_CAP_psb_periods);
|
||||
allowed = intel_pt_validate_hw_cap(PT_CAP_psb_periods);
|
||||
requested = (config & RTIT_CTL_PSB_FREQ) >>
|
||||
RTIT_CTL_PSB_FREQ_OFFSET;
|
||||
if (requested && (!(allowed & BIT(requested))))
|
||||
return false;
|
||||
|
||||
allowed = pt_cap_get(PT_CAP_cycle_thresholds);
|
||||
allowed = intel_pt_validate_hw_cap(PT_CAP_cycle_thresholds);
|
||||
requested = (config & RTIT_CTL_CYC_THRESH) >>
|
||||
RTIT_CTL_CYC_THRESH_OFFSET;
|
||||
if (requested && (!(allowed & BIT(requested))))
|
||||
@ -334,10 +342,10 @@ static bool pt_event_valid(struct perf_event *event)
|
||||
* Spec says that setting mtc period bits while mtc bit in
|
||||
* CPUID is 0 will #GP, so better safe than sorry.
|
||||
*/
|
||||
if (!pt_cap_get(PT_CAP_mtc))
|
||||
if (!intel_pt_validate_hw_cap(PT_CAP_mtc))
|
||||
return false;
|
||||
|
||||
allowed = pt_cap_get(PT_CAP_mtc_periods);
|
||||
allowed = intel_pt_validate_hw_cap(PT_CAP_mtc_periods);
|
||||
if (!allowed)
|
||||
return false;
|
||||
|
||||
@ -349,11 +357,11 @@ static bool pt_event_valid(struct perf_event *event)
|
||||
}
|
||||
|
||||
if (config & RTIT_CTL_PWR_EVT_EN &&
|
||||
!pt_cap_get(PT_CAP_power_event_trace))
|
||||
!intel_pt_validate_hw_cap(PT_CAP_power_event_trace))
|
||||
return false;
|
||||
|
||||
if (config & RTIT_CTL_PTW) {
|
||||
if (!pt_cap_get(PT_CAP_ptwrite))
|
||||
if (!intel_pt_validate_hw_cap(PT_CAP_ptwrite))
|
||||
return false;
|
||||
|
||||
/* FUPonPTW without PTW doesn't make sense */
|
||||
@ -598,7 +606,7 @@ static struct topa *topa_alloc(int cpu, gfp_t gfp)
|
||||
* In case of singe-entry ToPA, always put the self-referencing END
|
||||
* link as the 2nd entry in the table
|
||||
*/
|
||||
if (!pt_cap_get(PT_CAP_topa_multiple_entries)) {
|
||||
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) {
|
||||
TOPA_ENTRY(topa, 1)->base = topa->phys >> TOPA_SHIFT;
|
||||
TOPA_ENTRY(topa, 1)->end = 1;
|
||||
}
|
||||
@ -638,7 +646,7 @@ static void topa_insert_table(struct pt_buffer *buf, struct topa *topa)
|
||||
topa->offset = last->offset + last->size;
|
||||
buf->last = topa;
|
||||
|
||||
if (!pt_cap_get(PT_CAP_topa_multiple_entries))
|
||||
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
|
||||
return;
|
||||
|
||||
BUG_ON(last->last != TENTS_PER_PAGE - 1);
|
||||
@ -654,7 +662,7 @@ static void topa_insert_table(struct pt_buffer *buf, struct topa *topa)
|
||||
static bool topa_table_full(struct topa *topa)
|
||||
{
|
||||
/* single-entry ToPA is a special case */
|
||||
if (!pt_cap_get(PT_CAP_topa_multiple_entries))
|
||||
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
|
||||
return !!topa->last;
|
||||
|
||||
return topa->last == TENTS_PER_PAGE - 1;
|
||||
@ -690,7 +698,8 @@ static int topa_insert_pages(struct pt_buffer *buf, gfp_t gfp)
|
||||
|
||||
TOPA_ENTRY(topa, -1)->base = page_to_phys(p) >> TOPA_SHIFT;
|
||||
TOPA_ENTRY(topa, -1)->size = order;
|
||||
if (!buf->snapshot && !pt_cap_get(PT_CAP_topa_multiple_entries)) {
|
||||
if (!buf->snapshot &&
|
||||
!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) {
|
||||
TOPA_ENTRY(topa, -1)->intr = 1;
|
||||
TOPA_ENTRY(topa, -1)->stop = 1;
|
||||
}
|
||||
@ -725,7 +734,7 @@ static void pt_topa_dump(struct pt_buffer *buf)
|
||||
topa->table[i].intr ? 'I' : ' ',
|
||||
topa->table[i].stop ? 'S' : ' ',
|
||||
*(u64 *)&topa->table[i]);
|
||||
if ((pt_cap_get(PT_CAP_topa_multiple_entries) &&
|
||||
if ((intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) &&
|
||||
topa->table[i].stop) ||
|
||||
topa->table[i].end)
|
||||
break;
|
||||
@ -828,7 +837,7 @@ static void pt_handle_status(struct pt *pt)
|
||||
* means we are already losing data; need to let the decoder
|
||||
* know.
|
||||
*/
|
||||
if (!pt_cap_get(PT_CAP_topa_multiple_entries) ||
|
||||
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) ||
|
||||
buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
|
||||
perf_aux_output_flag(&pt->handle,
|
||||
PERF_AUX_FLAG_TRUNCATED);
|
||||
@ -840,7 +849,8 @@ static void pt_handle_status(struct pt *pt)
|
||||
* Also on single-entry ToPA implementations, interrupt will come
|
||||
* before the output reaches its output region's boundary.
|
||||
*/
|
||||
if (!pt_cap_get(PT_CAP_topa_multiple_entries) && !buf->snapshot &&
|
||||
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) &&
|
||||
!buf->snapshot &&
|
||||
pt_buffer_region_size(buf) - buf->output_off <= TOPA_PMI_MARGIN) {
|
||||
void *head = pt_buffer_region(buf);
|
||||
|
||||
@ -931,7 +941,7 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
|
||||
|
||||
|
||||
/* single entry ToPA is handled by marking all regions STOP=1 INT=1 */
|
||||
if (!pt_cap_get(PT_CAP_topa_multiple_entries))
|
||||
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
|
||||
return 0;
|
||||
|
||||
/* clear STOP and INT from current entry */
|
||||
@ -1082,7 +1092,7 @@ static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages,
|
||||
pt_buffer_setup_topa_index(buf);
|
||||
|
||||
/* link last table to the first one, unless we're double buffering */
|
||||
if (pt_cap_get(PT_CAP_topa_multiple_entries)) {
|
||||
if (intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) {
|
||||
TOPA_ENTRY(buf->last, -1)->base = buf->first->phys >> TOPA_SHIFT;
|
||||
TOPA_ENTRY(buf->last, -1)->end = 1;
|
||||
}
|
||||
@ -1153,7 +1163,7 @@ static int pt_addr_filters_init(struct perf_event *event)
|
||||
struct pt_filters *filters;
|
||||
int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu);
|
||||
|
||||
if (!pt_cap_get(PT_CAP_num_address_ranges))
|
||||
if (!intel_pt_validate_hw_cap(PT_CAP_num_address_ranges))
|
||||
return 0;
|
||||
|
||||
filters = kzalloc_node(sizeof(struct pt_filters), GFP_KERNEL, node);
|
||||
@ -1202,7 +1212,7 @@ static int pt_event_addr_filters_validate(struct list_head *filters)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (++range > pt_cap_get(PT_CAP_num_address_ranges))
|
||||
if (++range > intel_pt_validate_hw_cap(PT_CAP_num_address_ranges))
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
@ -1507,12 +1517,12 @@ static __init int pt_init(void)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!pt_cap_get(PT_CAP_topa_output)) {
|
||||
if (!intel_pt_validate_hw_cap(PT_CAP_topa_output)) {
|
||||
pr_warn("ToPA output is not supported on this CPU\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (!pt_cap_get(PT_CAP_topa_multiple_entries))
|
||||
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
|
||||
pt_pmu.pmu.capabilities =
|
||||
PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
|
||||
|
||||
@ -1530,7 +1540,7 @@ static __init int pt_init(void)
|
||||
pt_pmu.pmu.addr_filters_sync = pt_event_addr_filters_sync;
|
||||
pt_pmu.pmu.addr_filters_validate = pt_event_addr_filters_validate;
|
||||
pt_pmu.pmu.nr_addr_filters =
|
||||
pt_cap_get(PT_CAP_num_address_ranges);
|
||||
intel_pt_validate_hw_cap(PT_CAP_num_address_ranges);
|
||||
|
||||
ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
|
||||
|
||||
|
@ -19,43 +19,6 @@
|
||||
#ifndef __INTEL_PT_H__
|
||||
#define __INTEL_PT_H__
|
||||
|
||||
/*
|
||||
* PT MSR bit definitions
|
||||
*/
|
||||
#define RTIT_CTL_TRACEEN BIT(0)
|
||||
#define RTIT_CTL_CYCLEACC BIT(1)
|
||||
#define RTIT_CTL_OS BIT(2)
|
||||
#define RTIT_CTL_USR BIT(3)
|
||||
#define RTIT_CTL_PWR_EVT_EN BIT(4)
|
||||
#define RTIT_CTL_FUP_ON_PTW BIT(5)
|
||||
#define RTIT_CTL_CR3EN BIT(7)
|
||||
#define RTIT_CTL_TOPA BIT(8)
|
||||
#define RTIT_CTL_MTC_EN BIT(9)
|
||||
#define RTIT_CTL_TSC_EN BIT(10)
|
||||
#define RTIT_CTL_DISRETC BIT(11)
|
||||
#define RTIT_CTL_PTW_EN BIT(12)
|
||||
#define RTIT_CTL_BRANCH_EN BIT(13)
|
||||
#define RTIT_CTL_MTC_RANGE_OFFSET 14
|
||||
#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
|
||||
#define RTIT_CTL_CYC_THRESH_OFFSET 19
|
||||
#define RTIT_CTL_CYC_THRESH (0x0full << RTIT_CTL_CYC_THRESH_OFFSET)
|
||||
#define RTIT_CTL_PSB_FREQ_OFFSET 24
|
||||
#define RTIT_CTL_PSB_FREQ (0x0full << RTIT_CTL_PSB_FREQ_OFFSET)
|
||||
#define RTIT_CTL_ADDR0_OFFSET 32
|
||||
#define RTIT_CTL_ADDR0 (0x0full << RTIT_CTL_ADDR0_OFFSET)
|
||||
#define RTIT_CTL_ADDR1_OFFSET 36
|
||||
#define RTIT_CTL_ADDR1 (0x0full << RTIT_CTL_ADDR1_OFFSET)
|
||||
#define RTIT_CTL_ADDR2_OFFSET 40
|
||||
#define RTIT_CTL_ADDR2 (0x0full << RTIT_CTL_ADDR2_OFFSET)
|
||||
#define RTIT_CTL_ADDR3_OFFSET 44
|
||||
#define RTIT_CTL_ADDR3 (0x0full << RTIT_CTL_ADDR3_OFFSET)
|
||||
#define RTIT_STATUS_FILTEREN BIT(0)
|
||||
#define RTIT_STATUS_CONTEXTEN BIT(1)
|
||||
#define RTIT_STATUS_TRIGGEREN BIT(2)
|
||||
#define RTIT_STATUS_BUFFOVF BIT(3)
|
||||
#define RTIT_STATUS_ERROR BIT(4)
|
||||
#define RTIT_STATUS_STOPPED BIT(5)
|
||||
|
||||
/*
|
||||
* Single-entry ToPA: when this close to region boundary, switch
|
||||
* buffers to avoid losing data.
|
||||
@ -82,30 +45,9 @@ struct topa_entry {
|
||||
u64 rsvd4 : 16;
|
||||
};
|
||||
|
||||
#define PT_CPUID_LEAVES 2
|
||||
#define PT_CPUID_REGS_NUM 4 /* number of regsters (eax, ebx, ecx, edx) */
|
||||
|
||||
/* TSC to Core Crystal Clock Ratio */
|
||||
#define CPUID_TSC_LEAF 0x15
|
||||
|
||||
enum pt_capabilities {
|
||||
PT_CAP_max_subleaf = 0,
|
||||
PT_CAP_cr3_filtering,
|
||||
PT_CAP_psb_cyc,
|
||||
PT_CAP_ip_filtering,
|
||||
PT_CAP_mtc,
|
||||
PT_CAP_ptwrite,
|
||||
PT_CAP_power_event_trace,
|
||||
PT_CAP_topa_output,
|
||||
PT_CAP_topa_multiple_entries,
|
||||
PT_CAP_single_range_output,
|
||||
PT_CAP_payloads_lip,
|
||||
PT_CAP_num_address_ranges,
|
||||
PT_CAP_mtc_periods,
|
||||
PT_CAP_cycle_thresholds,
|
||||
PT_CAP_psb_periods,
|
||||
};
|
||||
|
||||
struct pt_pmu {
|
||||
struct pmu pmu;
|
||||
u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
|
||||
|
@ -7,6 +7,7 @@
|
||||
*
|
||||
* Author : Lan Tianyu <Tianyu.Lan@microsoft.com>
|
||||
*/
|
||||
#define pr_fmt(fmt) "Hyper-V: " fmt
|
||||
|
||||
|
||||
#include <linux/types.h>
|
||||
@ -54,3 +55,82 @@ fault:
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hyperv_flush_guest_mapping);
|
||||
|
||||
int hyperv_fill_flush_guest_mapping_list(
|
||||
struct hv_guest_mapping_flush_list *flush,
|
||||
u64 start_gfn, u64 pages)
|
||||
{
|
||||
u64 cur = start_gfn;
|
||||
u64 additional_pages;
|
||||
int gpa_n = 0;
|
||||
|
||||
do {
|
||||
/*
|
||||
* If flush requests exceed max flush count, go back to
|
||||
* flush tlbs without range.
|
||||
*/
|
||||
if (gpa_n >= HV_MAX_FLUSH_REP_COUNT)
|
||||
return -ENOSPC;
|
||||
|
||||
additional_pages = min_t(u64, pages, HV_MAX_FLUSH_PAGES) - 1;
|
||||
|
||||
flush->gpa_list[gpa_n].page.additional_pages = additional_pages;
|
||||
flush->gpa_list[gpa_n].page.largepage = false;
|
||||
flush->gpa_list[gpa_n].page.basepfn = cur;
|
||||
|
||||
pages -= additional_pages + 1;
|
||||
cur += additional_pages + 1;
|
||||
gpa_n++;
|
||||
} while (pages > 0);
|
||||
|
||||
return gpa_n;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hyperv_fill_flush_guest_mapping_list);
|
||||
|
||||
int hyperv_flush_guest_mapping_range(u64 as,
|
||||
hyperv_fill_flush_list_func fill_flush_list_func, void *data)
|
||||
{
|
||||
struct hv_guest_mapping_flush_list **flush_pcpu;
|
||||
struct hv_guest_mapping_flush_list *flush;
|
||||
u64 status = 0;
|
||||
unsigned long flags;
|
||||
int ret = -ENOTSUPP;
|
||||
int gpa_n = 0;
|
||||
|
||||
if (!hv_hypercall_pg || !fill_flush_list_func)
|
||||
goto fault;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
flush_pcpu = (struct hv_guest_mapping_flush_list **)
|
||||
this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
|
||||
flush = *flush_pcpu;
|
||||
if (unlikely(!flush)) {
|
||||
local_irq_restore(flags);
|
||||
goto fault;
|
||||
}
|
||||
|
||||
flush->address_space = as;
|
||||
flush->flags = 0;
|
||||
|
||||
gpa_n = fill_flush_list_func(flush, data);
|
||||
if (gpa_n < 0) {
|
||||
local_irq_restore(flags);
|
||||
goto fault;
|
||||
}
|
||||
|
||||
status = hv_do_rep_hypercall(HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST,
|
||||
gpa_n, 0, flush, NULL);
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
if (!(status & HV_HYPERCALL_RESULT_MASK))
|
||||
ret = 0;
|
||||
else
|
||||
ret = status;
|
||||
fault:
|
||||
trace_hyperv_nested_flush_guest_mapping_range(as, ret);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hyperv_flush_guest_mapping_range);
|
||||
|
@ -281,6 +281,7 @@
|
||||
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
|
||||
#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
|
||||
#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
|
||||
#define X86_FEATURE_WBNOINVD (13*32+ 9) /* WBNOINVD instruction */
|
||||
#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */
|
||||
#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
|
||||
#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */
|
||||
|
@ -10,6 +10,7 @@
|
||||
#define _ASM_X86_HYPERV_TLFS_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
/*
|
||||
* The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
|
||||
@ -30,158 +31,150 @@
|
||||
/*
|
||||
* Feature identification. EAX indicates which features are available
|
||||
* to the partition based upon the current partition privileges.
|
||||
* These are HYPERV_CPUID_FEATURES.EAX bits.
|
||||
*/
|
||||
|
||||
/* VP Runtime (HV_X64_MSR_VP_RUNTIME) available */
|
||||
#define HV_X64_MSR_VP_RUNTIME_AVAILABLE (1 << 0)
|
||||
#define HV_X64_MSR_VP_RUNTIME_AVAILABLE BIT(0)
|
||||
/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
|
||||
#define HV_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1)
|
||||
/* Partition reference TSC MSR is available */
|
||||
#define HV_MSR_REFERENCE_TSC_AVAILABLE (1 << 9)
|
||||
/* Partition Guest IDLE MSR is available */
|
||||
#define HV_X64_MSR_GUEST_IDLE_AVAILABLE (1 << 10)
|
||||
|
||||
/* A partition's reference time stamp counter (TSC) page */
|
||||
#define HV_X64_MSR_REFERENCE_TSC 0x40000021
|
||||
|
||||
/*
|
||||
* There is a single feature flag that signifies if the partition has access
|
||||
* to MSRs with local APIC and TSC frequencies.
|
||||
*/
|
||||
#define HV_X64_ACCESS_FREQUENCY_MSRS (1 << 11)
|
||||
|
||||
/* AccessReenlightenmentControls privilege */
|
||||
#define HV_X64_ACCESS_REENLIGHTENMENT BIT(13)
|
||||
|
||||
#define HV_MSR_TIME_REF_COUNT_AVAILABLE BIT(1)
|
||||
/*
|
||||
* Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM
|
||||
* and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available
|
||||
*/
|
||||
#define HV_X64_MSR_SYNIC_AVAILABLE (1 << 2)
|
||||
#define HV_X64_MSR_SYNIC_AVAILABLE BIT(2)
|
||||
/*
|
||||
* Synthetic Timer MSRs (HV_X64_MSR_STIMER0_CONFIG through
|
||||
* HV_X64_MSR_STIMER3_COUNT) available
|
||||
*/
|
||||
#define HV_MSR_SYNTIMER_AVAILABLE (1 << 3)
|
||||
#define HV_MSR_SYNTIMER_AVAILABLE BIT(3)
|
||||
/*
|
||||
* APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR)
|
||||
* are available
|
||||
*/
|
||||
#define HV_X64_MSR_APIC_ACCESS_AVAILABLE (1 << 4)
|
||||
#define HV_X64_MSR_APIC_ACCESS_AVAILABLE BIT(4)
|
||||
/* Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) available*/
|
||||
#define HV_X64_MSR_HYPERCALL_AVAILABLE (1 << 5)
|
||||
#define HV_X64_MSR_HYPERCALL_AVAILABLE BIT(5)
|
||||
/* Access virtual processor index MSR (HV_X64_MSR_VP_INDEX) available*/
|
||||
#define HV_X64_MSR_VP_INDEX_AVAILABLE (1 << 6)
|
||||
#define HV_X64_MSR_VP_INDEX_AVAILABLE BIT(6)
|
||||
/* Virtual system reset MSR (HV_X64_MSR_RESET) is available*/
|
||||
#define HV_X64_MSR_RESET_AVAILABLE (1 << 7)
|
||||
/*
|
||||
* Access statistics pages MSRs (HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE,
|
||||
* HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE, HV_X64_MSR_STATS_VP_RETAIL_PAGE,
|
||||
* HV_X64_MSR_STATS_VP_INTERNAL_PAGE) available
|
||||
*/
|
||||
#define HV_X64_MSR_STAT_PAGES_AVAILABLE (1 << 8)
|
||||
|
||||
/* Frequency MSRs available */
|
||||
#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE (1 << 8)
|
||||
|
||||
/* Crash MSR available */
|
||||
#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE (1 << 10)
|
||||
|
||||
/* stimer Direct Mode is available */
|
||||
#define HV_STIMER_DIRECT_MODE_AVAILABLE (1 << 19)
|
||||
#define HV_X64_MSR_RESET_AVAILABLE BIT(7)
|
||||
/*
|
||||
* Access statistics pages MSRs (HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE,
|
||||
* HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE, HV_X64_MSR_STATS_VP_RETAIL_PAGE,
|
||||
* HV_X64_MSR_STATS_VP_INTERNAL_PAGE) available
|
||||
*/
|
||||
#define HV_X64_MSR_STAT_PAGES_AVAILABLE BIT(8)
|
||||
/* Partition reference TSC MSR is available */
|
||||
#define HV_MSR_REFERENCE_TSC_AVAILABLE BIT(9)
|
||||
/* Partition Guest IDLE MSR is available */
|
||||
#define HV_X64_MSR_GUEST_IDLE_AVAILABLE BIT(10)
|
||||
/*
|
||||
* There is a single feature flag that signifies if the partition has access
|
||||
* to MSRs with local APIC and TSC frequencies.
|
||||
*/
|
||||
#define HV_X64_ACCESS_FREQUENCY_MSRS BIT(11)
|
||||
/* AccessReenlightenmentControls privilege */
|
||||
#define HV_X64_ACCESS_REENLIGHTENMENT BIT(13)
|
||||
|
||||
/*
|
||||
* Feature identification: EBX indicates which flags were specified at
|
||||
* partition creation. The format is the same as the partition creation
|
||||
* flag structure defined in section Partition Creation Flags.
|
||||
* Feature identification: indicates which flags were specified at partition
|
||||
* creation. The format is the same as the partition creation flag structure
|
||||
* defined in section Partition Creation Flags.
|
||||
* These are HYPERV_CPUID_FEATURES.EBX bits.
|
||||
*/
|
||||
#define HV_X64_CREATE_PARTITIONS (1 << 0)
|
||||
#define HV_X64_ACCESS_PARTITION_ID (1 << 1)
|
||||
#define HV_X64_ACCESS_MEMORY_POOL (1 << 2)
|
||||
#define HV_X64_ADJUST_MESSAGE_BUFFERS (1 << 3)
|
||||
#define HV_X64_POST_MESSAGES (1 << 4)
|
||||
#define HV_X64_SIGNAL_EVENTS (1 << 5)
|
||||
#define HV_X64_CREATE_PORT (1 << 6)
|
||||
#define HV_X64_CONNECT_PORT (1 << 7)
|
||||
#define HV_X64_ACCESS_STATS (1 << 8)
|
||||
#define HV_X64_DEBUGGING (1 << 11)
|
||||
#define HV_X64_CPU_POWER_MANAGEMENT (1 << 12)
|
||||
#define HV_X64_CONFIGURE_PROFILER (1 << 13)
|
||||
#define HV_X64_CREATE_PARTITIONS BIT(0)
|
||||
#define HV_X64_ACCESS_PARTITION_ID BIT(1)
|
||||
#define HV_X64_ACCESS_MEMORY_POOL BIT(2)
|
||||
#define HV_X64_ADJUST_MESSAGE_BUFFERS BIT(3)
|
||||
#define HV_X64_POST_MESSAGES BIT(4)
|
||||
#define HV_X64_SIGNAL_EVENTS BIT(5)
|
||||
#define HV_X64_CREATE_PORT BIT(6)
|
||||
#define HV_X64_CONNECT_PORT BIT(7)
|
||||
#define HV_X64_ACCESS_STATS BIT(8)
|
||||
#define HV_X64_DEBUGGING BIT(11)
|
||||
#define HV_X64_CPU_POWER_MANAGEMENT BIT(12)
|
||||
|
||||
/*
|
||||
* Feature identification. EDX indicates which miscellaneous features
|
||||
* are available to the partition.
|
||||
* These are HYPERV_CPUID_FEATURES.EDX bits.
|
||||
*/
|
||||
/* The MWAIT instruction is available (per section MONITOR / MWAIT) */
|
||||
#define HV_X64_MWAIT_AVAILABLE (1 << 0)
|
||||
#define HV_X64_MWAIT_AVAILABLE BIT(0)
|
||||
/* Guest debugging support is available */
|
||||
#define HV_X64_GUEST_DEBUGGING_AVAILABLE (1 << 1)
|
||||
#define HV_X64_GUEST_DEBUGGING_AVAILABLE BIT(1)
|
||||
/* Performance Monitor support is available*/
|
||||
#define HV_X64_PERF_MONITOR_AVAILABLE (1 << 2)
|
||||
#define HV_X64_PERF_MONITOR_AVAILABLE BIT(2)
|
||||
/* Support for physical CPU dynamic partitioning events is available*/
|
||||
#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE (1 << 3)
|
||||
#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE BIT(3)
|
||||
/*
|
||||
* Support for passing hypercall input parameter block via XMM
|
||||
* registers is available
|
||||
*/
|
||||
#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE (1 << 4)
|
||||
#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE BIT(4)
|
||||
/* Support for a virtual guest idle state is available */
|
||||
#define HV_X64_GUEST_IDLE_STATE_AVAILABLE (1 << 5)
|
||||
/* Guest crash data handler available */
|
||||
#define HV_X64_GUEST_CRASH_MSR_AVAILABLE (1 << 10)
|
||||
#define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5)
|
||||
/* Frequency MSRs available */
|
||||
#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8)
|
||||
/* Crash MSR available */
|
||||
#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10)
|
||||
/* stimer Direct Mode is available */
|
||||
#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(19)
|
||||
|
||||
/*
|
||||
* Implementation recommendations. Indicates which behaviors the hypervisor
|
||||
* recommends the OS implement for optimal performance.
|
||||
* These are HYPERV_CPUID_ENLIGHTMENT_INFO.EAX bits.
|
||||
*/
|
||||
/*
|
||||
* Recommend using hypercall for address space switches rather
|
||||
* than MOV to CR3 instruction
|
||||
*/
|
||||
#define HV_X64_AS_SWITCH_RECOMMENDED (1 << 0)
|
||||
/*
|
||||
* Recommend using hypercall for address space switches rather
|
||||
* than MOV to CR3 instruction
|
||||
*/
|
||||
#define HV_X64_AS_SWITCH_RECOMMENDED BIT(0)
|
||||
/* Recommend using hypercall for local TLB flushes rather
|
||||
* than INVLPG or MOV to CR3 instructions */
|
||||
#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED (1 << 1)
|
||||
#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED BIT(1)
|
||||
/*
|
||||
* Recommend using hypercall for remote TLB flushes rather
|
||||
* than inter-processor interrupts
|
||||
*/
|
||||
#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED (1 << 2)
|
||||
#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED BIT(2)
|
||||
/*
|
||||
* Recommend using MSRs for accessing APIC registers
|
||||
* EOI, ICR and TPR rather than their memory-mapped counterparts
|
||||
*/
|
||||
#define HV_X64_APIC_ACCESS_RECOMMENDED (1 << 3)
|
||||
#define HV_X64_APIC_ACCESS_RECOMMENDED BIT(3)
|
||||
/* Recommend using the hypervisor-provided MSR to initiate a system RESET */
|
||||
#define HV_X64_SYSTEM_RESET_RECOMMENDED (1 << 4)
|
||||
#define HV_X64_SYSTEM_RESET_RECOMMENDED BIT(4)
|
||||
/*
|
||||
* Recommend using relaxed timing for this partition. If used,
|
||||
* the VM should disable any watchdog timeouts that rely on the
|
||||
* timely delivery of external interrupts
|
||||
*/
|
||||
#define HV_X64_RELAXED_TIMING_RECOMMENDED (1 << 5)
|
||||
#define HV_X64_RELAXED_TIMING_RECOMMENDED BIT(5)
|
||||
|
||||
/*
|
||||
* Recommend not using Auto End-Of-Interrupt feature
|
||||
*/
|
||||
#define HV_DEPRECATING_AEOI_RECOMMENDED (1 << 9)
|
||||
#define HV_DEPRECATING_AEOI_RECOMMENDED BIT(9)
|
||||
|
||||
/*
|
||||
* Recommend using cluster IPI hypercalls.
|
||||
*/
|
||||
#define HV_X64_CLUSTER_IPI_RECOMMENDED (1 << 10)
|
||||
#define HV_X64_CLUSTER_IPI_RECOMMENDED BIT(10)
|
||||
|
||||
/* Recommend using the newer ExProcessorMasks interface */
|
||||
#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11)
|
||||
#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11)
|
||||
|
||||
/* Recommend using enlightened VMCS */
|
||||
#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED (1 << 14)
|
||||
#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14)
|
||||
|
||||
/*
|
||||
* Crash notification flags.
|
||||
*/
|
||||
#define HV_CRASH_CTL_CRASH_NOTIFY_MSG BIT_ULL(62)
|
||||
#define HV_CRASH_CTL_CRASH_NOTIFY BIT_ULL(63)
|
||||
/* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */
|
||||
#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18)
|
||||
#define HV_X64_NESTED_MSR_BITMAP BIT(19)
|
||||
|
||||
/* Hyper-V specific model specific registers (MSRs) */
|
||||
|
||||
/* MSR used to identify the guest OS. */
|
||||
#define HV_X64_MSR_GUEST_OS_ID 0x40000000
|
||||
@ -201,6 +194,9 @@
|
||||
/* MSR used to read the per-partition time reference counter */
|
||||
#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
|
||||
|
||||
/* A partition's reference time stamp counter (TSC) page */
|
||||
#define HV_X64_MSR_REFERENCE_TSC 0x40000021
|
||||
|
||||
/* MSR used to retrieve the TSC frequency */
|
||||
#define HV_X64_MSR_TSC_FREQUENCY 0x40000022
|
||||
|
||||
@ -258,9 +254,11 @@
|
||||
#define HV_X64_MSR_CRASH_P3 0x40000103
|
||||
#define HV_X64_MSR_CRASH_P4 0x40000104
|
||||
#define HV_X64_MSR_CRASH_CTL 0x40000105
|
||||
#define HV_X64_MSR_CRASH_CTL_NOTIFY (1ULL << 63)
|
||||
#define HV_X64_MSR_CRASH_PARAMS \
|
||||
(1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0))
|
||||
|
||||
/* TSC emulation after migration */
|
||||
#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
|
||||
#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
|
||||
#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
|
||||
|
||||
/*
|
||||
* Declare the MSR used to setup pages used to communicate with the hypervisor.
|
||||
@ -271,7 +269,7 @@ union hv_x64_msr_hypercall_contents {
|
||||
u64 enable:1;
|
||||
u64 reserved:11;
|
||||
u64 guest_physical_address:52;
|
||||
};
|
||||
} __packed;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -283,7 +281,7 @@ struct ms_hyperv_tsc_page {
|
||||
volatile u64 tsc_scale;
|
||||
volatile s64 tsc_offset;
|
||||
u64 reserved2[509];
|
||||
};
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* The guest OS needs to register the guest ID with the hypervisor.
|
||||
@ -311,39 +309,37 @@ struct ms_hyperv_tsc_page {
|
||||
|
||||
#define HV_LINUX_VENDOR_ID 0x8100
|
||||
|
||||
/* TSC emulation after migration */
|
||||
#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
|
||||
|
||||
/* Nested features (CPUID 0x4000000A) EAX */
|
||||
#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18)
|
||||
#define HV_X64_NESTED_MSR_BITMAP BIT(19)
|
||||
|
||||
struct hv_reenlightenment_control {
|
||||
__u64 vector:8;
|
||||
__u64 reserved1:8;
|
||||
__u64 enabled:1;
|
||||
__u64 reserved2:15;
|
||||
__u64 target_vp:32;
|
||||
};
|
||||
|
||||
#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
|
||||
#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
|
||||
} __packed;
|
||||
|
||||
struct hv_tsc_emulation_control {
|
||||
__u64 enabled:1;
|
||||
__u64 reserved:63;
|
||||
};
|
||||
} __packed;
|
||||
|
||||
struct hv_tsc_emulation_status {
|
||||
__u64 inprogress:1;
|
||||
__u64 reserved:63;
|
||||
};
|
||||
} __packed;
|
||||
|
||||
#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001
|
||||
#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12
|
||||
#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \
|
||||
(~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
|
||||
|
||||
/*
|
||||
* Crash notification (HV_X64_MSR_CRASH_CTL) flags.
|
||||
*/
|
||||
#define HV_CRASH_CTL_CRASH_NOTIFY_MSG BIT_ULL(62)
|
||||
#define HV_CRASH_CTL_CRASH_NOTIFY BIT_ULL(63)
|
||||
#define HV_X64_MSR_CRASH_PARAMS \
|
||||
(1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0))
|
||||
|
||||
#define HV_IPI_LOW_VECTOR 0x10
|
||||
#define HV_IPI_HIGH_VECTOR 0xff
|
||||
|
||||
@ -358,6 +354,7 @@ struct hv_tsc_emulation_status {
|
||||
#define HVCALL_POST_MESSAGE 0x005c
|
||||
#define HVCALL_SIGNAL_EVENT 0x005d
|
||||
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
|
||||
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
|
||||
|
||||
#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001
|
||||
#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12
|
||||
@ -409,7 +406,7 @@ typedef struct _HV_REFERENCE_TSC_PAGE {
|
||||
__u32 res1;
|
||||
__u64 tsc_scale;
|
||||
__s64 tsc_offset;
|
||||
} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
|
||||
} __packed HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
|
||||
|
||||
/* Define the number of synthetic interrupt sources. */
|
||||
#define HV_SYNIC_SINT_COUNT (16)
|
||||
@ -466,7 +463,7 @@ union hv_message_flags {
|
||||
struct {
|
||||
__u8 msg_pending:1;
|
||||
__u8 reserved:7;
|
||||
};
|
||||
} __packed;
|
||||
};
|
||||
|
||||
/* Define port identifier type. */
|
||||
@ -475,7 +472,7 @@ union hv_port_id {
|
||||
struct {
|
||||
__u32 id:24;
|
||||
__u32 reserved:8;
|
||||
} u;
|
||||
} __packed u;
|
||||
};
|
||||
|
||||
/* Define synthetic interrupt controller message header. */
|
||||
@ -488,7 +485,7 @@ struct hv_message_header {
|
||||
__u64 sender;
|
||||
union hv_port_id port;
|
||||
};
|
||||
};
|
||||
} __packed;
|
||||
|
||||
/* Define synthetic interrupt controller message format. */
|
||||
struct hv_message {
|
||||
@ -496,12 +493,12 @@ struct hv_message {
|
||||
union {
|
||||
__u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
|
||||
} u;
|
||||
};
|
||||
} __packed;
|
||||
|
||||
/* Define the synthetic interrupt message page layout. */
|
||||
struct hv_message_page {
|
||||
struct hv_message sint_message[HV_SYNIC_SINT_COUNT];
|
||||
};
|
||||
} __packed;
|
||||
|
||||
/* Define timer message payload structure. */
|
||||
struct hv_timer_message_payload {
|
||||
@ -509,7 +506,7 @@ struct hv_timer_message_payload {
|
||||
__u32 reserved;
|
||||
__u64 expiration_time; /* When the timer expired */
|
||||
__u64 delivery_time; /* When the message was delivered */
|
||||
};
|
||||
} __packed;
|
||||
|
||||
/* Define virtual processor assist page structure. */
|
||||
struct hv_vp_assist_page {
|
||||
@ -518,8 +515,9 @@ struct hv_vp_assist_page {
|
||||
__u64 vtl_control[2];
|
||||
__u64 nested_enlightenments_control[2];
|
||||
__u32 enlighten_vmentry;
|
||||
__u32 padding;
|
||||
__u64 current_nested_vmcs;
|
||||
};
|
||||
} __packed;
|
||||
|
||||
struct hv_enlightened_vmcs {
|
||||
u32 revision_id;
|
||||
@ -533,6 +531,8 @@ struct hv_enlightened_vmcs {
|
||||
u16 host_gs_selector;
|
||||
u16 host_tr_selector;
|
||||
|
||||
u16 padding16_1;
|
||||
|
||||
u64 host_ia32_pat;
|
||||
u64 host_ia32_efer;
|
||||
|
||||
@ -651,7 +651,7 @@ struct hv_enlightened_vmcs {
|
||||
u64 ept_pointer;
|
||||
|
||||
u16 virtual_processor_id;
|
||||
u16 padding16[3];
|
||||
u16 padding16_2[3];
|
||||
|
||||
u64 padding64_2[5];
|
||||
u64 guest_physical_address;
|
||||
@ -693,7 +693,7 @@ struct hv_enlightened_vmcs {
|
||||
u32 nested_flush_hypercall:1;
|
||||
u32 msr_bitmap:1;
|
||||
u32 reserved:30;
|
||||
} hv_enlightenments_control;
|
||||
} __packed hv_enlightenments_control;
|
||||
u32 hv_vp_id;
|
||||
|
||||
u64 hv_vm_id;
|
||||
@ -703,7 +703,7 @@ struct hv_enlightened_vmcs {
|
||||
u64 padding64_5[7];
|
||||
u64 xss_exit_bitmap;
|
||||
u64 padding64_6[7];
|
||||
};
|
||||
} __packed;
|
||||
|
||||
#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0
|
||||
#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0)
|
||||
@ -725,36 +725,129 @@ struct hv_enlightened_vmcs {
|
||||
|
||||
#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF
|
||||
|
||||
#define HV_STIMER_ENABLE (1ULL << 0)
|
||||
#define HV_STIMER_PERIODIC (1ULL << 1)
|
||||
#define HV_STIMER_LAZY (1ULL << 2)
|
||||
#define HV_STIMER_AUTOENABLE (1ULL << 3)
|
||||
#define HV_STIMER_SINT(config) (__u8)(((config) >> 16) & 0x0F)
|
||||
/* Define synthetic interrupt controller flag constants. */
|
||||
#define HV_EVENT_FLAGS_COUNT (256 * 8)
|
||||
#define HV_EVENT_FLAGS_LONG_COUNT (256 / sizeof(unsigned long))
|
||||
|
||||
/*
|
||||
* Synthetic timer configuration.
|
||||
*/
|
||||
union hv_stimer_config {
|
||||
u64 as_uint64;
|
||||
struct {
|
||||
u64 enable:1;
|
||||
u64 periodic:1;
|
||||
u64 lazy:1;
|
||||
u64 auto_enable:1;
|
||||
u64 apic_vector:8;
|
||||
u64 direct_mode:1;
|
||||
u64 reserved_z0:3;
|
||||
u64 sintx:4;
|
||||
u64 reserved_z1:44;
|
||||
} __packed;
|
||||
};
|
||||
|
||||
|
||||
/* Define the synthetic interrupt controller event flags format. */
|
||||
union hv_synic_event_flags {
|
||||
unsigned long flags[HV_EVENT_FLAGS_LONG_COUNT];
|
||||
};
|
||||
|
||||
/* Define SynIC control register. */
|
||||
union hv_synic_scontrol {
|
||||
u64 as_uint64;
|
||||
struct {
|
||||
u64 enable:1;
|
||||
u64 reserved:63;
|
||||
} __packed;
|
||||
};
|
||||
|
||||
/* Define synthetic interrupt source. */
|
||||
union hv_synic_sint {
|
||||
u64 as_uint64;
|
||||
struct {
|
||||
u64 vector:8;
|
||||
u64 reserved1:8;
|
||||
u64 masked:1;
|
||||
u64 auto_eoi:1;
|
||||
u64 reserved2:46;
|
||||
} __packed;
|
||||
};
|
||||
|
||||
/* Define the format of the SIMP register */
|
||||
union hv_synic_simp {
|
||||
u64 as_uint64;
|
||||
struct {
|
||||
u64 simp_enabled:1;
|
||||
u64 preserved:11;
|
||||
u64 base_simp_gpa:52;
|
||||
} __packed;
|
||||
};
|
||||
|
||||
/* Define the format of the SIEFP register */
|
||||
union hv_synic_siefp {
|
||||
u64 as_uint64;
|
||||
struct {
|
||||
u64 siefp_enabled:1;
|
||||
u64 preserved:11;
|
||||
u64 base_siefp_gpa:52;
|
||||
} __packed;
|
||||
};
|
||||
|
||||
struct hv_vpset {
|
||||
u64 format;
|
||||
u64 valid_bank_mask;
|
||||
u64 bank_contents[];
|
||||
};
|
||||
} __packed;
|
||||
|
||||
/* HvCallSendSyntheticClusterIpi hypercall */
|
||||
struct hv_send_ipi {
|
||||
u32 vector;
|
||||
u32 reserved;
|
||||
u64 cpu_mask;
|
||||
};
|
||||
} __packed;
|
||||
|
||||
/* HvCallSendSyntheticClusterIpiEx hypercall */
|
||||
struct hv_send_ipi_ex {
|
||||
u32 vector;
|
||||
u32 reserved;
|
||||
struct hv_vpset vp_set;
|
||||
};
|
||||
} __packed;
|
||||
|
||||
/* HvFlushGuestPhysicalAddressSpace hypercalls */
|
||||
struct hv_guest_mapping_flush {
|
||||
u64 address_space;
|
||||
u64 flags;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* HV_MAX_FLUSH_PAGES = "additional_pages" + 1. It's limited
|
||||
* by the bitwidth of "additional_pages" in union hv_gpa_page_range.
|
||||
*/
|
||||
#define HV_MAX_FLUSH_PAGES (2048)
|
||||
|
||||
/* HvFlushGuestPhysicalAddressList hypercall */
|
||||
union hv_gpa_page_range {
|
||||
u64 address_space;
|
||||
struct {
|
||||
u64 additional_pages:11;
|
||||
u64 largepage:1;
|
||||
u64 basepfn:52;
|
||||
} page;
|
||||
};
|
||||
|
||||
/*
|
||||
* All input flush parameters should be in single page. The max flush
|
||||
* count is equal with how many entries of union hv_gpa_page_range can
|
||||
* be populated into the input parameter page.
|
||||
*/
|
||||
#define HV_MAX_FLUSH_REP_COUNT (PAGE_SIZE - 2 * sizeof(u64) / \
|
||||
sizeof(union hv_gpa_page_range))
|
||||
|
||||
struct hv_guest_mapping_flush_list {
|
||||
u64 address_space;
|
||||
u64 flags;
|
||||
union hv_gpa_page_range gpa_list[HV_MAX_FLUSH_REP_COUNT];
|
||||
};
|
||||
|
||||
/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
|
||||
@ -763,7 +856,7 @@ struct hv_tlb_flush {
|
||||
u64 flags;
|
||||
u64 processor_mask;
|
||||
u64 gva_list[];
|
||||
};
|
||||
} __packed;
|
||||
|
||||
/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
|
||||
struct hv_tlb_flush_ex {
|
||||
@ -771,6 +864,6 @@ struct hv_tlb_flush_ex {
|
||||
u64 flags;
|
||||
struct hv_vpset hv_vp_set;
|
||||
u64 gva_list[];
|
||||
};
|
||||
} __packed;
|
||||
|
||||
#endif
|
||||
|
@ -2,10 +2,36 @@
|
||||
#ifndef _ASM_X86_INTEL_PT_H
|
||||
#define _ASM_X86_INTEL_PT_H
|
||||
|
||||
#define PT_CPUID_LEAVES 2
|
||||
#define PT_CPUID_REGS_NUM 4 /* number of regsters (eax, ebx, ecx, edx) */
|
||||
|
||||
enum pt_capabilities {
|
||||
PT_CAP_max_subleaf = 0,
|
||||
PT_CAP_cr3_filtering,
|
||||
PT_CAP_psb_cyc,
|
||||
PT_CAP_ip_filtering,
|
||||
PT_CAP_mtc,
|
||||
PT_CAP_ptwrite,
|
||||
PT_CAP_power_event_trace,
|
||||
PT_CAP_topa_output,
|
||||
PT_CAP_topa_multiple_entries,
|
||||
PT_CAP_single_range_output,
|
||||
PT_CAP_output_subsys,
|
||||
PT_CAP_payloads_lip,
|
||||
PT_CAP_num_address_ranges,
|
||||
PT_CAP_mtc_periods,
|
||||
PT_CAP_cycle_thresholds,
|
||||
PT_CAP_psb_periods,
|
||||
};
|
||||
|
||||
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
|
||||
void cpu_emergency_stop_pt(void);
|
||||
extern u32 intel_pt_validate_hw_cap(enum pt_capabilities cap);
|
||||
extern u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities cap);
|
||||
#else
|
||||
static inline void cpu_emergency_stop_pt(void) {}
|
||||
static inline u32 intel_pt_validate_hw_cap(enum pt_capabilities cap) { return 0; }
|
||||
static inline u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities capability) { return 0; }
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_X86_INTEL_PT_H */
|
||||
|
@ -439,6 +439,11 @@ struct kvm_mmu {
|
||||
u64 pdptrs[4]; /* pae */
|
||||
};
|
||||
|
||||
struct kvm_tlb_range {
|
||||
u64 start_gfn;
|
||||
u64 pages;
|
||||
};
|
||||
|
||||
enum pmc_type {
|
||||
KVM_PMC_GP = 0,
|
||||
KVM_PMC_FIXED,
|
||||
@ -497,7 +502,7 @@ struct kvm_mtrr {
|
||||
struct kvm_vcpu_hv_stimer {
|
||||
struct hrtimer timer;
|
||||
int index;
|
||||
u64 config;
|
||||
union hv_stimer_config config;
|
||||
u64 count;
|
||||
u64 exp_time;
|
||||
struct hv_message msg;
|
||||
@ -601,17 +606,16 @@ struct kvm_vcpu_arch {
|
||||
|
||||
/*
|
||||
* QEMU userspace and the guest each have their own FPU state.
|
||||
* In vcpu_run, we switch between the user and guest FPU contexts.
|
||||
* While running a VCPU, the VCPU thread will have the guest FPU
|
||||
* context.
|
||||
* In vcpu_run, we switch between the user, maintained in the
|
||||
* task_struct struct, and guest FPU contexts. While running a VCPU,
|
||||
* the VCPU thread will have the guest FPU context.
|
||||
*
|
||||
* Note that while the PKRU state lives inside the fpu registers,
|
||||
* it is switched out separately at VMENTER and VMEXIT time. The
|
||||
* "guest_fpu" state here contains the guest FPU context, with the
|
||||
* host PRKU bits.
|
||||
*/
|
||||
struct fpu user_fpu;
|
||||
struct fpu guest_fpu;
|
||||
struct fpu *guest_fpu;
|
||||
|
||||
u64 xcr0;
|
||||
u64 guest_supported_xcr0;
|
||||
@ -1042,6 +1046,8 @@ struct kvm_x86_ops {
|
||||
|
||||
void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa);
|
||||
int (*tlb_remote_flush)(struct kvm *kvm);
|
||||
int (*tlb_remote_flush_with_range)(struct kvm *kvm,
|
||||
struct kvm_tlb_range *range);
|
||||
|
||||
/*
|
||||
* Flush any TLB entries associated with the given GVA.
|
||||
@ -1106,6 +1112,7 @@ struct kvm_x86_ops {
|
||||
bool (*mpx_supported)(void);
|
||||
bool (*xsaves_supported)(void);
|
||||
bool (*umip_emulated)(void);
|
||||
bool (*pt_supported)(void);
|
||||
|
||||
int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
|
||||
void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
|
||||
@ -1186,6 +1193,7 @@ struct kvm_x86_ops {
|
||||
|
||||
int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu,
|
||||
uint16_t *vmcs_version);
|
||||
uint16_t (*nested_get_evmcs_version)(struct kvm_vcpu *vcpu);
|
||||
};
|
||||
|
||||
struct kvm_arch_async_pf {
|
||||
@ -1196,6 +1204,7 @@ struct kvm_arch_async_pf {
|
||||
};
|
||||
|
||||
extern struct kvm_x86_ops *kvm_x86_ops;
|
||||
extern struct kmem_cache *x86_fpu_cache;
|
||||
|
||||
#define __KVM_HAVE_ARCH_VM_ALLOC
|
||||
static inline struct kvm *kvm_arch_alloc_vm(void)
|
||||
@ -1492,7 +1501,7 @@ asmlinkage void kvm_spurious_fault(void);
|
||||
"cmpb $0, kvm_rebooting \n\t" \
|
||||
"jne 668b \n\t" \
|
||||
__ASM_SIZE(push) " $666b \n\t" \
|
||||
"call kvm_spurious_fault \n\t" \
|
||||
"jmp kvm_spurious_fault \n\t" \
|
||||
".popsection \n\t" \
|
||||
_ASM_EXTABLE(666b, 667b)
|
||||
|
||||
@ -1503,7 +1512,7 @@ asmlinkage void kvm_spurious_fault(void);
|
||||
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
|
||||
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
|
||||
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||
int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||
int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
|
||||
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
|
||||
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
|
||||
|
@ -22,6 +22,11 @@ struct ms_hyperv_info {
|
||||
|
||||
extern struct ms_hyperv_info ms_hyperv;
|
||||
|
||||
|
||||
typedef int (*hyperv_fill_flush_list_func)(
|
||||
struct hv_guest_mapping_flush_list *flush,
|
||||
void *data);
|
||||
|
||||
/*
|
||||
* Generate the guest ID.
|
||||
*/
|
||||
@ -348,6 +353,11 @@ void set_hv_tscchange_cb(void (*cb)(void));
|
||||
void clear_hv_tscchange_cb(void);
|
||||
void hyperv_stop_tsc_emulation(void);
|
||||
int hyperv_flush_guest_mapping(u64 as);
|
||||
int hyperv_flush_guest_mapping_range(u64 as,
|
||||
hyperv_fill_flush_list_func fill_func, void *data);
|
||||
int hyperv_fill_flush_guest_mapping_list(
|
||||
struct hv_guest_mapping_flush_list *flush,
|
||||
u64 start_gfn, u64 end_gfn);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
void hv_apic_init(void);
|
||||
@ -370,6 +380,11 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
|
||||
return NULL;
|
||||
}
|
||||
static inline int hyperv_flush_guest_mapping(u64 as) { return -1; }
|
||||
static inline int hyperv_flush_guest_mapping_range(u64 as,
|
||||
hyperv_fill_flush_list_func fill_func, void *data)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
#endif /* CONFIG_HYPERV */
|
||||
|
||||
#ifdef CONFIG_HYPERV_TSCPAGE
|
||||
|
@ -121,7 +121,43 @@
|
||||
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
|
||||
|
||||
#define MSR_IA32_RTIT_CTL 0x00000570
|
||||
#define RTIT_CTL_TRACEEN BIT(0)
|
||||
#define RTIT_CTL_CYCLEACC BIT(1)
|
||||
#define RTIT_CTL_OS BIT(2)
|
||||
#define RTIT_CTL_USR BIT(3)
|
||||
#define RTIT_CTL_PWR_EVT_EN BIT(4)
|
||||
#define RTIT_CTL_FUP_ON_PTW BIT(5)
|
||||
#define RTIT_CTL_FABRIC_EN BIT(6)
|
||||
#define RTIT_CTL_CR3EN BIT(7)
|
||||
#define RTIT_CTL_TOPA BIT(8)
|
||||
#define RTIT_CTL_MTC_EN BIT(9)
|
||||
#define RTIT_CTL_TSC_EN BIT(10)
|
||||
#define RTIT_CTL_DISRETC BIT(11)
|
||||
#define RTIT_CTL_PTW_EN BIT(12)
|
||||
#define RTIT_CTL_BRANCH_EN BIT(13)
|
||||
#define RTIT_CTL_MTC_RANGE_OFFSET 14
|
||||
#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
|
||||
#define RTIT_CTL_CYC_THRESH_OFFSET 19
|
||||
#define RTIT_CTL_CYC_THRESH (0x0full << RTIT_CTL_CYC_THRESH_OFFSET)
|
||||
#define RTIT_CTL_PSB_FREQ_OFFSET 24
|
||||
#define RTIT_CTL_PSB_FREQ (0x0full << RTIT_CTL_PSB_FREQ_OFFSET)
|
||||
#define RTIT_CTL_ADDR0_OFFSET 32
|
||||
#define RTIT_CTL_ADDR0 (0x0full << RTIT_CTL_ADDR0_OFFSET)
|
||||
#define RTIT_CTL_ADDR1_OFFSET 36
|
||||
#define RTIT_CTL_ADDR1 (0x0full << RTIT_CTL_ADDR1_OFFSET)
|
||||
#define RTIT_CTL_ADDR2_OFFSET 40
|
||||
#define RTIT_CTL_ADDR2 (0x0full << RTIT_CTL_ADDR2_OFFSET)
|
||||
#define RTIT_CTL_ADDR3_OFFSET 44
|
||||
#define RTIT_CTL_ADDR3 (0x0full << RTIT_CTL_ADDR3_OFFSET)
|
||||
#define MSR_IA32_RTIT_STATUS 0x00000571
|
||||
#define RTIT_STATUS_FILTEREN BIT(0)
|
||||
#define RTIT_STATUS_CONTEXTEN BIT(1)
|
||||
#define RTIT_STATUS_TRIGGEREN BIT(2)
|
||||
#define RTIT_STATUS_BUFFOVF BIT(3)
|
||||
#define RTIT_STATUS_ERROR BIT(4)
|
||||
#define RTIT_STATUS_STOPPED BIT(5)
|
||||
#define RTIT_STATUS_BYTECNT_OFFSET 32
|
||||
#define RTIT_STATUS_BYTECNT (0x1ffffull << RTIT_STATUS_BYTECNT_OFFSET)
|
||||
#define MSR_IA32_RTIT_ADDR0_A 0x00000580
|
||||
#define MSR_IA32_RTIT_ADDR0_B 0x00000581
|
||||
#define MSR_IA32_RTIT_ADDR1_A 0x00000582
|
||||
@ -772,6 +808,7 @@
|
||||
#define VMX_BASIC_INOUT 0x0040000000000000LLU
|
||||
|
||||
/* MSR_IA32_VMX_MISC bits */
|
||||
#define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14)
|
||||
#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
|
||||
#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
|
||||
/* AMD-V MSRs */
|
||||
|
@ -290,11 +290,4 @@ struct __attribute__ ((__packed__)) vmcb {
|
||||
|
||||
#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
|
||||
|
||||
#define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
|
||||
#define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
|
||||
#define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
|
||||
#define SVM_CLGI ".byte 0x0f, 0x01, 0xdd"
|
||||
#define SVM_STGI ".byte 0x0f, 0x01, 0xdc"
|
||||
#define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
|
||||
|
||||
#endif
|
||||
|
@ -42,6 +42,20 @@ TRACE_EVENT(hyperv_nested_flush_guest_mapping,
|
||||
TP_printk("address space %llx ret %d", __entry->as, __entry->ret)
|
||||
);
|
||||
|
||||
TRACE_EVENT(hyperv_nested_flush_guest_mapping_range,
|
||||
TP_PROTO(u64 as, int ret),
|
||||
TP_ARGS(as, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(u64, as)
|
||||
__field(int, ret)
|
||||
),
|
||||
TP_fast_assign(__entry->as = as;
|
||||
__entry->ret = ret;
|
||||
),
|
||||
TP_printk("address space %llx ret %d", __entry->as, __entry->ret)
|
||||
);
|
||||
|
||||
TRACE_EVENT(hyperv_send_ipi_mask,
|
||||
TP_PROTO(const struct cpumask *cpus,
|
||||
int vector),
|
||||
|
@ -77,7 +77,10 @@
|
||||
#define SECONDARY_EXEC_ENCLS_EXITING 0x00008000
|
||||
#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000
|
||||
#define SECONDARY_EXEC_ENABLE_PML 0x00020000
|
||||
#define SECONDARY_EXEC_PT_CONCEAL_VMX 0x00080000
|
||||
#define SECONDARY_EXEC_XSAVES 0x00100000
|
||||
#define SECONDARY_EXEC_PT_USE_GPA 0x01000000
|
||||
#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC 0x00400000
|
||||
#define SECONDARY_EXEC_TSC_SCALING 0x02000000
|
||||
|
||||
#define PIN_BASED_EXT_INTR_MASK 0x00000001
|
||||
@ -98,6 +101,8 @@
|
||||
#define VM_EXIT_LOAD_IA32_EFER 0x00200000
|
||||
#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000
|
||||
#define VM_EXIT_CLEAR_BNDCFGS 0x00800000
|
||||
#define VM_EXIT_PT_CONCEAL_PIP 0x01000000
|
||||
#define VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000
|
||||
|
||||
#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
|
||||
|
||||
@ -109,6 +114,8 @@
|
||||
#define VM_ENTRY_LOAD_IA32_PAT 0x00004000
|
||||
#define VM_ENTRY_LOAD_IA32_EFER 0x00008000
|
||||
#define VM_ENTRY_LOAD_BNDCFGS 0x00010000
|
||||
#define VM_ENTRY_PT_CONCEAL_PIP 0x00020000
|
||||
#define VM_ENTRY_LOAD_IA32_RTIT_CTL 0x00040000
|
||||
|
||||
#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
|
||||
|
||||
@ -240,6 +247,8 @@ enum vmcs_field {
|
||||
GUEST_PDPTR3_HIGH = 0x00002811,
|
||||
GUEST_BNDCFGS = 0x00002812,
|
||||
GUEST_BNDCFGS_HIGH = 0x00002813,
|
||||
GUEST_IA32_RTIT_CTL = 0x00002814,
|
||||
GUEST_IA32_RTIT_CTL_HIGH = 0x00002815,
|
||||
HOST_IA32_PAT = 0x00002c00,
|
||||
HOST_IA32_PAT_HIGH = 0x00002c01,
|
||||
HOST_IA32_EFER = 0x00002c02,
|
||||
|
@ -1,19 +1,6 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/* KVM paravirtual clock driver. A clocksource implementation
|
||||
Copyright (C) 2008 Glauber de Oliveira Costa, Red Hat Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <linux/clocksource.h>
|
||||
|
@ -16,7 +16,7 @@ kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
|
||||
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
|
||||
hyperv.o page_track.o debugfs.o
|
||||
|
||||
kvm-intel-y += vmx.o pmu_intel.o
|
||||
kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o vmx/evmcs.o vmx/nested.o
|
||||
kvm-amd-y += svm.o pmu_amd.o
|
||||
|
||||
obj-$(CONFIG_KVM) += kvm.o
|
||||
|
@ -67,9 +67,6 @@ u64 kvm_supported_xcr0(void)
|
||||
|
||||
#define F(x) bit(X86_FEATURE_##x)
|
||||
|
||||
/* For scattered features from cpufeatures.h; we currently expose none */
|
||||
#define KF(x) bit(KVM_CPUID_BIT_##x)
|
||||
|
||||
int kvm_update_cpuid(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
@ -337,6 +334,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
||||
unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0;
|
||||
unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
|
||||
unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0;
|
||||
unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
|
||||
|
||||
/* cpuid 1.edx */
|
||||
const u32 kvm_cpuid_1_edx_x86_features =
|
||||
@ -380,8 +378,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
||||
|
||||
/* cpuid 0x80000008.ebx */
|
||||
const u32 kvm_cpuid_8000_0008_ebx_x86_features =
|
||||
F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
|
||||
F(AMD_SSB_NO);
|
||||
F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
|
||||
F(AMD_SSB_NO) | F(AMD_STIBP);
|
||||
|
||||
/* cpuid 0xC0000001.edx */
|
||||
const u32 kvm_cpuid_C000_0001_edx_x86_features =
|
||||
@ -395,7 +393,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
||||
F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
|
||||
F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
|
||||
F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
|
||||
F(SHA_NI) | F(AVX512BW) | F(AVX512VL);
|
||||
F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | f_intel_pt;
|
||||
|
||||
/* cpuid 0xD.1.eax */
|
||||
const u32 kvm_cpuid_D_1_eax_x86_features =
|
||||
@ -411,7 +409,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
||||
/* cpuid 7.0.edx*/
|
||||
const u32 kvm_cpuid_7_0_edx_x86_features =
|
||||
F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
|
||||
F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES);
|
||||
F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP);
|
||||
|
||||
/* all calls to cpuid_count() should be made on the same cpu */
|
||||
get_cpu();
|
||||
@ -426,7 +424,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
||||
|
||||
switch (function) {
|
||||
case 0:
|
||||
entry->eax = min(entry->eax, (u32)0xd);
|
||||
entry->eax = min(entry->eax, (u32)(f_intel_pt ? 0x14 : 0xd));
|
||||
break;
|
||||
case 1:
|
||||
entry->edx &= kvm_cpuid_1_edx_x86_features;
|
||||
@ -603,6 +601,23 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
||||
}
|
||||
break;
|
||||
}
|
||||
/* Intel PT */
|
||||
case 0x14: {
|
||||
int t, times = entry->eax;
|
||||
|
||||
if (!f_intel_pt)
|
||||
break;
|
||||
|
||||
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
for (t = 1; t <= times; ++t) {
|
||||
if (*nent >= maxnent)
|
||||
goto out;
|
||||
do_cpuid_1_ent(&entry[t], function, t);
|
||||
entry[t].flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
++*nent;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case KVM_CPUID_SIGNATURE: {
|
||||
static const char signature[12] = "KVMKVMKVM\0\0";
|
||||
const u32 *sigptr = (const u32 *)signature;
|
||||
|
@ -38,6 +38,9 @@
|
||||
|
||||
#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64)
|
||||
|
||||
static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
|
||||
bool vcpu_kick);
|
||||
|
||||
static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint)
|
||||
{
|
||||
return atomic64_read(&synic->sint[sint]);
|
||||
@ -158,59 +161,24 @@ static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx)
|
||||
return (synic->active) ? synic : NULL;
|
||||
}
|
||||
|
||||
static void synic_clear_sint_msg_pending(struct kvm_vcpu_hv_synic *synic,
|
||||
u32 sint)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
|
||||
struct page *page;
|
||||
gpa_t gpa;
|
||||
struct hv_message *msg;
|
||||
struct hv_message_page *msg_page;
|
||||
|
||||
gpa = synic->msg_page & PAGE_MASK;
|
||||
page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
|
||||
if (is_error_page(page)) {
|
||||
vcpu_err(vcpu, "Hyper-V SynIC can't get msg page, gpa 0x%llx\n",
|
||||
gpa);
|
||||
return;
|
||||
}
|
||||
msg_page = kmap_atomic(page);
|
||||
|
||||
msg = &msg_page->sint_message[sint];
|
||||
msg->header.message_flags.msg_pending = 0;
|
||||
|
||||
kunmap_atomic(msg_page);
|
||||
kvm_release_page_dirty(page);
|
||||
kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
|
||||
struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
|
||||
struct kvm_vcpu_hv_stimer *stimer;
|
||||
int gsi, idx, stimers_pending;
|
||||
int gsi, idx;
|
||||
|
||||
trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint);
|
||||
|
||||
if (synic->msg_page & HV_SYNIC_SIMP_ENABLE)
|
||||
synic_clear_sint_msg_pending(synic, sint);
|
||||
|
||||
/* Try to deliver pending Hyper-V SynIC timers messages */
|
||||
stimers_pending = 0;
|
||||
for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) {
|
||||
stimer = &hv_vcpu->stimer[idx];
|
||||
if (stimer->msg_pending &&
|
||||
(stimer->config & HV_STIMER_ENABLE) &&
|
||||
HV_STIMER_SINT(stimer->config) == sint) {
|
||||
set_bit(stimer->index,
|
||||
hv_vcpu->stimer_pending_bitmap);
|
||||
stimers_pending++;
|
||||
}
|
||||
if (stimer->msg_pending && stimer->config.enable &&
|
||||
!stimer->config.direct_mode &&
|
||||
stimer->config.sintx == sint)
|
||||
stimer_mark_pending(stimer, false);
|
||||
}
|
||||
if (stimers_pending)
|
||||
kvm_make_request(KVM_REQ_HV_STIMER, vcpu);
|
||||
|
||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||
gsi = atomic_read(&synic->sint_to_gsi[sint]);
|
||||
@ -497,7 +465,7 @@ static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
|
||||
time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm);
|
||||
ktime_now = ktime_get();
|
||||
|
||||
if (stimer->config & HV_STIMER_PERIODIC) {
|
||||
if (stimer->config.periodic) {
|
||||
if (stimer->exp_time) {
|
||||
if (time_now >= stimer->exp_time) {
|
||||
u64 remainder;
|
||||
@ -546,13 +514,18 @@ static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
|
||||
static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
|
||||
bool host)
|
||||
{
|
||||
union hv_stimer_config new_config = {.as_uint64 = config},
|
||||
old_config = {.as_uint64 = stimer->config.as_uint64};
|
||||
|
||||
trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id,
|
||||
stimer->index, config, host);
|
||||
|
||||
stimer_cleanup(stimer);
|
||||
if ((stimer->config & HV_STIMER_ENABLE) && HV_STIMER_SINT(config) == 0)
|
||||
config &= ~HV_STIMER_ENABLE;
|
||||
stimer->config = config;
|
||||
if (old_config.enable &&
|
||||
!new_config.direct_mode && new_config.sintx == 0)
|
||||
new_config.enable = 0;
|
||||
stimer->config.as_uint64 = new_config.as_uint64;
|
||||
|
||||
stimer_mark_pending(stimer, false);
|
||||
return 0;
|
||||
}
|
||||
@ -566,16 +539,16 @@ static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count,
|
||||
stimer_cleanup(stimer);
|
||||
stimer->count = count;
|
||||
if (stimer->count == 0)
|
||||
stimer->config &= ~HV_STIMER_ENABLE;
|
||||
else if (stimer->config & HV_STIMER_AUTOENABLE)
|
||||
stimer->config |= HV_STIMER_ENABLE;
|
||||
stimer->config.enable = 0;
|
||||
else if (stimer->config.auto_enable)
|
||||
stimer->config.enable = 1;
|
||||
stimer_mark_pending(stimer, false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig)
|
||||
{
|
||||
*pconfig = stimer->config;
|
||||
*pconfig = stimer->config.as_uint64;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -586,44 +559,60 @@ static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount)
|
||||
}
|
||||
|
||||
static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint,
|
||||
struct hv_message *src_msg)
|
||||
struct hv_message *src_msg, bool no_retry)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
|
||||
struct page *page;
|
||||
gpa_t gpa;
|
||||
struct hv_message *dst_msg;
|
||||
int msg_off = offsetof(struct hv_message_page, sint_message[sint]);
|
||||
gfn_t msg_page_gfn;
|
||||
struct hv_message_header hv_hdr;
|
||||
int r;
|
||||
struct hv_message_page *msg_page;
|
||||
|
||||
if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE))
|
||||
return -ENOENT;
|
||||
|
||||
gpa = synic->msg_page & PAGE_MASK;
|
||||
page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
|
||||
if (is_error_page(page))
|
||||
return -EFAULT;
|
||||
msg_page_gfn = synic->msg_page >> PAGE_SHIFT;
|
||||
|
||||
msg_page = kmap_atomic(page);
|
||||
dst_msg = &msg_page->sint_message[sint];
|
||||
if (sync_cmpxchg(&dst_msg->header.message_type, HVMSG_NONE,
|
||||
src_msg->header.message_type) != HVMSG_NONE) {
|
||||
dst_msg->header.message_flags.msg_pending = 1;
|
||||
r = -EAGAIN;
|
||||
} else {
|
||||
memcpy(&dst_msg->u.payload, &src_msg->u.payload,
|
||||
src_msg->header.payload_size);
|
||||
dst_msg->header.message_type = src_msg->header.message_type;
|
||||
dst_msg->header.payload_size = src_msg->header.payload_size;
|
||||
r = synic_set_irq(synic, sint);
|
||||
if (r >= 1)
|
||||
r = 0;
|
||||
else if (r == 0)
|
||||
r = -EFAULT;
|
||||
/*
|
||||
* Strictly following the spec-mandated ordering would assume setting
|
||||
* .msg_pending before checking .message_type. However, this function
|
||||
* is only called in vcpu context so the entire update is atomic from
|
||||
* guest POV and thus the exact order here doesn't matter.
|
||||
*/
|
||||
r = kvm_vcpu_read_guest_page(vcpu, msg_page_gfn, &hv_hdr.message_type,
|
||||
msg_off + offsetof(struct hv_message,
|
||||
header.message_type),
|
||||
sizeof(hv_hdr.message_type));
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (hv_hdr.message_type != HVMSG_NONE) {
|
||||
if (no_retry)
|
||||
return 0;
|
||||
|
||||
hv_hdr.message_flags.msg_pending = 1;
|
||||
r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn,
|
||||
&hv_hdr.message_flags,
|
||||
msg_off +
|
||||
offsetof(struct hv_message,
|
||||
header.message_flags),
|
||||
sizeof(hv_hdr.message_flags));
|
||||
if (r < 0)
|
||||
return r;
|
||||
return -EAGAIN;
|
||||
}
|
||||
kunmap_atomic(msg_page);
|
||||
kvm_release_page_dirty(page);
|
||||
kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
|
||||
return r;
|
||||
|
||||
r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn, src_msg, msg_off,
|
||||
sizeof(src_msg->header) +
|
||||
src_msg->header.payload_size);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = synic_set_irq(synic, sint);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r == 0)
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer)
|
||||
@ -633,24 +622,45 @@ static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer)
|
||||
struct hv_timer_message_payload *payload =
|
||||
(struct hv_timer_message_payload *)&msg->u.payload;
|
||||
|
||||
/*
|
||||
* To avoid piling up periodic ticks, don't retry message
|
||||
* delivery for them (within "lazy" lost ticks policy).
|
||||
*/
|
||||
bool no_retry = stimer->config.periodic;
|
||||
|
||||
payload->expiration_time = stimer->exp_time;
|
||||
payload->delivery_time = get_time_ref_counter(vcpu->kvm);
|
||||
return synic_deliver_msg(vcpu_to_synic(vcpu),
|
||||
HV_STIMER_SINT(stimer->config), msg);
|
||||
stimer->config.sintx, msg,
|
||||
no_retry);
|
||||
}
|
||||
|
||||
static int stimer_notify_direct(struct kvm_vcpu_hv_stimer *stimer)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
|
||||
struct kvm_lapic_irq irq = {
|
||||
.delivery_mode = APIC_DM_FIXED,
|
||||
.vector = stimer->config.apic_vector
|
||||
};
|
||||
|
||||
return !kvm_apic_set_irq(vcpu, &irq, NULL);
|
||||
}
|
||||
|
||||
static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
|
||||
{
|
||||
int r;
|
||||
int r, direct = stimer->config.direct_mode;
|
||||
|
||||
stimer->msg_pending = true;
|
||||
r = stimer_send_msg(stimer);
|
||||
if (!direct)
|
||||
r = stimer_send_msg(stimer);
|
||||
else
|
||||
r = stimer_notify_direct(stimer);
|
||||
trace_kvm_hv_stimer_expiration(stimer_to_vcpu(stimer)->vcpu_id,
|
||||
stimer->index, r);
|
||||
stimer->index, direct, r);
|
||||
if (!r) {
|
||||
stimer->msg_pending = false;
|
||||
if (!(stimer->config & HV_STIMER_PERIODIC))
|
||||
stimer->config &= ~HV_STIMER_ENABLE;
|
||||
if (!(stimer->config.periodic))
|
||||
stimer->config.enable = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -664,7 +674,7 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu)
|
||||
for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
|
||||
if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) {
|
||||
stimer = &hv_vcpu->stimer[i];
|
||||
if (stimer->config & HV_STIMER_ENABLE) {
|
||||
if (stimer->config.enable) {
|
||||
exp_time = stimer->exp_time;
|
||||
|
||||
if (exp_time) {
|
||||
@ -674,7 +684,7 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu)
|
||||
stimer_expiration(stimer);
|
||||
}
|
||||
|
||||
if ((stimer->config & HV_STIMER_ENABLE) &&
|
||||
if ((stimer->config.enable) &&
|
||||
stimer->count) {
|
||||
if (!stimer->msg_pending)
|
||||
stimer_start(stimer);
|
||||
@ -815,9 +825,9 @@ static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host)
|
||||
struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
|
||||
|
||||
if (host)
|
||||
hv->hv_crash_ctl = data & HV_X64_MSR_CRASH_CTL_NOTIFY;
|
||||
hv->hv_crash_ctl = data & HV_CRASH_CTL_CRASH_NOTIFY;
|
||||
|
||||
if (!host && (data & HV_X64_MSR_CRASH_CTL_NOTIFY)) {
|
||||
if (!host && (data & HV_CRASH_CTL_CRASH_NOTIFY)) {
|
||||
|
||||
vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n",
|
||||
hv->hv_crash_param[0],
|
||||
@ -1758,3 +1768,124 @@ int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args)
|
||||
return kvm_hv_eventfd_deassign(kvm, args->conn_id);
|
||||
return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd);
|
||||
}
|
||||
|
||||
int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
|
||||
struct kvm_cpuid_entry2 __user *entries)
|
||||
{
|
||||
uint16_t evmcs_ver = kvm_x86_ops->nested_get_evmcs_version(vcpu);
|
||||
struct kvm_cpuid_entry2 cpuid_entries[] = {
|
||||
{ .function = HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS },
|
||||
{ .function = HYPERV_CPUID_INTERFACE },
|
||||
{ .function = HYPERV_CPUID_VERSION },
|
||||
{ .function = HYPERV_CPUID_FEATURES },
|
||||
{ .function = HYPERV_CPUID_ENLIGHTMENT_INFO },
|
||||
{ .function = HYPERV_CPUID_IMPLEMENT_LIMITS },
|
||||
{ .function = HYPERV_CPUID_NESTED_FEATURES },
|
||||
};
|
||||
int i, nent = ARRAY_SIZE(cpuid_entries);
|
||||
|
||||
/* Skip NESTED_FEATURES if eVMCS is not supported */
|
||||
if (!evmcs_ver)
|
||||
--nent;
|
||||
|
||||
if (cpuid->nent < nent)
|
||||
return -E2BIG;
|
||||
|
||||
if (cpuid->nent > nent)
|
||||
cpuid->nent = nent;
|
||||
|
||||
for (i = 0; i < nent; i++) {
|
||||
struct kvm_cpuid_entry2 *ent = &cpuid_entries[i];
|
||||
u32 signature[3];
|
||||
|
||||
switch (ent->function) {
|
||||
case HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS:
|
||||
memcpy(signature, "Linux KVM Hv", 12);
|
||||
|
||||
ent->eax = HYPERV_CPUID_NESTED_FEATURES;
|
||||
ent->ebx = signature[0];
|
||||
ent->ecx = signature[1];
|
||||
ent->edx = signature[2];
|
||||
break;
|
||||
|
||||
case HYPERV_CPUID_INTERFACE:
|
||||
memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12);
|
||||
ent->eax = signature[0];
|
||||
break;
|
||||
|
||||
case HYPERV_CPUID_VERSION:
|
||||
/*
|
||||
* We implement some Hyper-V 2016 functions so let's use
|
||||
* this version.
|
||||
*/
|
||||
ent->eax = 0x00003839;
|
||||
ent->ebx = 0x000A0000;
|
||||
break;
|
||||
|
||||
case HYPERV_CPUID_FEATURES:
|
||||
ent->eax |= HV_X64_MSR_VP_RUNTIME_AVAILABLE;
|
||||
ent->eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE;
|
||||
ent->eax |= HV_X64_MSR_SYNIC_AVAILABLE;
|
||||
ent->eax |= HV_MSR_SYNTIMER_AVAILABLE;
|
||||
ent->eax |= HV_X64_MSR_APIC_ACCESS_AVAILABLE;
|
||||
ent->eax |= HV_X64_MSR_HYPERCALL_AVAILABLE;
|
||||
ent->eax |= HV_X64_MSR_VP_INDEX_AVAILABLE;
|
||||
ent->eax |= HV_X64_MSR_RESET_AVAILABLE;
|
||||
ent->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE;
|
||||
ent->eax |= HV_X64_MSR_GUEST_IDLE_AVAILABLE;
|
||||
ent->eax |= HV_X64_ACCESS_FREQUENCY_MSRS;
|
||||
ent->eax |= HV_X64_ACCESS_REENLIGHTENMENT;
|
||||
|
||||
ent->ebx |= HV_X64_POST_MESSAGES;
|
||||
ent->ebx |= HV_X64_SIGNAL_EVENTS;
|
||||
|
||||
ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE;
|
||||
ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
|
||||
ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
|
||||
|
||||
break;
|
||||
|
||||
case HYPERV_CPUID_ENLIGHTMENT_INFO:
|
||||
ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
|
||||
ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED;
|
||||
ent->eax |= HV_X64_SYSTEM_RESET_RECOMMENDED;
|
||||
ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED;
|
||||
ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED;
|
||||
ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
|
||||
ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
|
||||
|
||||
/*
|
||||
* Default number of spinlock retry attempts, matches
|
||||
* HyperV 2016.
|
||||
*/
|
||||
ent->ebx = 0x00000FFF;
|
||||
|
||||
break;
|
||||
|
||||
case HYPERV_CPUID_IMPLEMENT_LIMITS:
|
||||
/* Maximum number of virtual processors */
|
||||
ent->eax = KVM_MAX_VCPUS;
|
||||
/*
|
||||
* Maximum number of logical processors, matches
|
||||
* HyperV 2016.
|
||||
*/
|
||||
ent->ebx = 64;
|
||||
|
||||
break;
|
||||
|
||||
case HYPERV_CPUID_NESTED_FEATURES:
|
||||
ent->eax = evmcs_ver;
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (copy_to_user(entries, cpuid_entries,
|
||||
nent * sizeof(struct kvm_cpuid_entry2)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -24,6 +24,8 @@
|
||||
#ifndef __ARCH_X86_KVM_HYPERV_H__
|
||||
#define __ARCH_X86_KVM_HYPERV_H__
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
static inline struct kvm_vcpu_hv *vcpu_to_hv_vcpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return &vcpu->arch.hyperv;
|
||||
@ -95,5 +97,7 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
|
||||
void kvm_hv_init_vm(struct kvm *kvm);
|
||||
void kvm_hv_destroy_vm(struct kvm *kvm);
|
||||
int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args);
|
||||
int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
|
||||
struct kvm_cpuid_entry2 __user *entries);
|
||||
|
||||
#endif
|
||||
|
@ -2,6 +2,8 @@
|
||||
#ifndef ASM_KVM_CACHE_REGS_H
|
||||
#define ASM_KVM_CACHE_REGS_H
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS
|
||||
#define KVM_POSSIBLE_CR4_GUEST_BITS \
|
||||
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
|
||||
|
@ -251,10 +251,9 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
|
||||
|
||||
if (enabled != apic->sw_enabled) {
|
||||
apic->sw_enabled = enabled;
|
||||
if (enabled) {
|
||||
if (enabled)
|
||||
static_key_slow_dec_deferred(&apic_sw_disabled);
|
||||
recalculate_apic_map(apic->vcpu->kvm);
|
||||
} else
|
||||
else
|
||||
static_key_slow_inc(&apic_sw_disabled.key);
|
||||
}
|
||||
}
|
||||
|
@ -264,6 +264,35 @@ static void mmu_spte_set(u64 *sptep, u64 spte);
|
||||
static union kvm_mmu_page_role
|
||||
kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu);
|
||||
|
||||
|
||||
static inline bool kvm_available_flush_tlb_with_range(void)
|
||||
{
|
||||
return kvm_x86_ops->tlb_remote_flush_with_range;
|
||||
}
|
||||
|
||||
static void kvm_flush_remote_tlbs_with_range(struct kvm *kvm,
|
||||
struct kvm_tlb_range *range)
|
||||
{
|
||||
int ret = -ENOTSUPP;
|
||||
|
||||
if (range && kvm_x86_ops->tlb_remote_flush_with_range)
|
||||
ret = kvm_x86_ops->tlb_remote_flush_with_range(kvm, range);
|
||||
|
||||
if (ret)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
}
|
||||
|
||||
static void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
|
||||
u64 start_gfn, u64 pages)
|
||||
{
|
||||
struct kvm_tlb_range range;
|
||||
|
||||
range.start_gfn = start_gfn;
|
||||
range.pages = pages;
|
||||
|
||||
kvm_flush_remote_tlbs_with_range(kvm, &range);
|
||||
}
|
||||
|
||||
void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value)
|
||||
{
|
||||
BUG_ON((mmio_mask & mmio_value) != mmio_value);
|
||||
@ -1456,8 +1485,12 @@ static bool __drop_large_spte(struct kvm *kvm, u64 *sptep)
|
||||
|
||||
static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
|
||||
{
|
||||
if (__drop_large_spte(vcpu->kvm, sptep))
|
||||
kvm_flush_remote_tlbs(vcpu->kvm);
|
||||
if (__drop_large_spte(vcpu->kvm, sptep)) {
|
||||
struct kvm_mmu_page *sp = page_header(__pa(sptep));
|
||||
|
||||
kvm_flush_remote_tlbs_with_address(vcpu->kvm, sp->gfn,
|
||||
KVM_PAGES_PER_HPAGE(sp->role.level));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1743,10 +1776,12 @@ restart:
|
||||
}
|
||||
}
|
||||
|
||||
if (need_flush)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
if (need_flush && kvm_available_flush_tlb_with_range()) {
|
||||
kvm_flush_remote_tlbs_with_address(kvm, gfn, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return need_flush;
|
||||
}
|
||||
|
||||
struct slot_rmap_walk_iterator {
|
||||
@ -1880,9 +1915,9 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||
return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
|
||||
}
|
||||
|
||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
|
||||
int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
|
||||
{
|
||||
kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
|
||||
return kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
|
||||
}
|
||||
|
||||
static int kvm_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
|
||||
@ -1925,7 +1960,8 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
|
||||
rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp);
|
||||
|
||||
kvm_unmap_rmapp(vcpu->kvm, rmap_head, NULL, gfn, sp->role.level, 0);
|
||||
kvm_flush_remote_tlbs(vcpu->kvm);
|
||||
kvm_flush_remote_tlbs_with_address(vcpu->kvm, sp->gfn,
|
||||
KVM_PAGES_PER_HPAGE(sp->role.level));
|
||||
}
|
||||
|
||||
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||
@ -2441,7 +2477,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
|
||||
account_shadowed(vcpu->kvm, sp);
|
||||
if (level == PT_PAGE_TABLE_LEVEL &&
|
||||
rmap_write_protect(vcpu, gfn))
|
||||
kvm_flush_remote_tlbs(vcpu->kvm);
|
||||
kvm_flush_remote_tlbs_with_address(vcpu->kvm, gfn, 1);
|
||||
|
||||
if (level > PT_PAGE_TABLE_LEVEL && need_sync)
|
||||
flush |= kvm_sync_pages(vcpu, gfn, &invalid_list);
|
||||
@ -2561,7 +2597,7 @@ static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|
||||
return;
|
||||
|
||||
drop_parent_pte(child, sptep);
|
||||
kvm_flush_remote_tlbs(vcpu->kvm);
|
||||
kvm_flush_remote_tlbs_with_address(vcpu->kvm, child->gfn, 1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2985,8 +3021,10 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
|
||||
ret = RET_PF_EMULATE;
|
||||
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
|
||||
}
|
||||
|
||||
if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH || flush)
|
||||
kvm_flush_remote_tlbs(vcpu->kvm);
|
||||
kvm_flush_remote_tlbs_with_address(vcpu->kvm, gfn,
|
||||
KVM_PAGES_PER_HPAGE(level));
|
||||
|
||||
if (unlikely(is_mmio_spte(*sptep)))
|
||||
ret = RET_PF_EMULATE;
|
||||
@ -5586,8 +5624,13 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
|
||||
{
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
bool flush_tlb = true;
|
||||
bool flush = false;
|
||||
int i;
|
||||
|
||||
if (kvm_available_flush_tlb_with_range())
|
||||
flush_tlb = false;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
|
||||
slots = __kvm_memslots(kvm, i);
|
||||
@ -5599,12 +5642,17 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
|
||||
if (start >= end)
|
||||
continue;
|
||||
|
||||
slot_handle_level_range(kvm, memslot, kvm_zap_rmapp,
|
||||
PT_PAGE_TABLE_LEVEL, PT_MAX_HUGEPAGE_LEVEL,
|
||||
start, end - 1, true);
|
||||
flush |= slot_handle_level_range(kvm, memslot,
|
||||
kvm_zap_rmapp, PT_PAGE_TABLE_LEVEL,
|
||||
PT_MAX_HUGEPAGE_LEVEL, start,
|
||||
end - 1, flush_tlb);
|
||||
}
|
||||
}
|
||||
|
||||
if (flush)
|
||||
kvm_flush_remote_tlbs_with_address(kvm, gfn_start,
|
||||
gfn_end - gfn_start + 1);
|
||||
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
@ -5638,12 +5686,13 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
|
||||
* spte from present to present (changing the spte from present
|
||||
* to nonpresent will flush all the TLBs immediately), in other
|
||||
* words, the only case we care is mmu_spte_update() where we
|
||||
* haved checked SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE
|
||||
* have checked SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE
|
||||
* instead of PT_WRITABLE_MASK, that means it does not depend
|
||||
* on PT_WRITABLE_MASK anymore.
|
||||
*/
|
||||
if (flush)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
kvm_flush_remote_tlbs_with_address(kvm, memslot->base_gfn,
|
||||
memslot->npages);
|
||||
}
|
||||
|
||||
static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
|
||||
@ -5671,7 +5720,13 @@ restart:
|
||||
!kvm_is_reserved_pfn(pfn) &&
|
||||
PageTransCompoundMap(pfn_to_page(pfn))) {
|
||||
pte_list_remove(rmap_head, sptep);
|
||||
need_tlb_flush = 1;
|
||||
|
||||
if (kvm_available_flush_tlb_with_range())
|
||||
kvm_flush_remote_tlbs_with_address(kvm, sp->gfn,
|
||||
KVM_PAGES_PER_HPAGE(sp->role.level));
|
||||
else
|
||||
need_tlb_flush = 1;
|
||||
|
||||
goto restart;
|
||||
}
|
||||
}
|
||||
@ -5707,7 +5762,8 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
|
||||
* dirty_bitmap.
|
||||
*/
|
||||
if (flush)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
kvm_flush_remote_tlbs_with_address(kvm, memslot->base_gfn,
|
||||
memslot->npages);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_slot_leaf_clear_dirty);
|
||||
|
||||
@ -5725,7 +5781,8 @@ void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
|
||||
lockdep_assert_held(&kvm->slots_lock);
|
||||
|
||||
if (flush)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
kvm_flush_remote_tlbs_with_address(kvm, memslot->base_gfn,
|
||||
memslot->npages);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_slot_largepage_remove_write_access);
|
||||
|
||||
@ -5742,7 +5799,8 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm,
|
||||
|
||||
/* see kvm_mmu_slot_leaf_clear_dirty */
|
||||
if (flush)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
kvm_flush_remote_tlbs_with_address(kvm, memslot->base_gfn,
|
||||
memslot->npages);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty);
|
||||
|
||||
|
@ -894,7 +894,8 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
|
||||
pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
|
||||
|
||||
if (mmu_page_zap_pte(vcpu->kvm, sp, sptep))
|
||||
kvm_flush_remote_tlbs(vcpu->kvm);
|
||||
kvm_flush_remote_tlbs_with_address(vcpu->kvm,
|
||||
sp->gfn, KVM_PAGES_PER_HPAGE(sp->role.level));
|
||||
|
||||
if (!rmap_can_add(vcpu))
|
||||
break;
|
||||
|
@ -675,11 +675,6 @@ struct svm_cpu_data {
|
||||
|
||||
static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
|
||||
|
||||
struct svm_init_data {
|
||||
int cpu;
|
||||
int r;
|
||||
};
|
||||
|
||||
static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
|
||||
|
||||
#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
|
||||
@ -711,17 +706,17 @@ static u32 svm_msrpm_offset(u32 msr)
|
||||
|
||||
static inline void clgi(void)
|
||||
{
|
||||
asm volatile (__ex(SVM_CLGI));
|
||||
asm volatile (__ex("clgi"));
|
||||
}
|
||||
|
||||
static inline void stgi(void)
|
||||
{
|
||||
asm volatile (__ex(SVM_STGI));
|
||||
asm volatile (__ex("stgi"));
|
||||
}
|
||||
|
||||
static inline void invlpga(unsigned long addr, u32 asid)
|
||||
{
|
||||
asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
|
||||
asm volatile (__ex("invlpga %1, %0") : : "c"(asid), "a"(addr));
|
||||
}
|
||||
|
||||
static int get_npt_level(struct kvm_vcpu *vcpu)
|
||||
@ -1456,10 +1451,11 @@ static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
|
||||
g_tsc_offset = svm->vmcb->control.tsc_offset -
|
||||
svm->nested.hsave->control.tsc_offset;
|
||||
svm->nested.hsave->control.tsc_offset = offset;
|
||||
} else
|
||||
trace_kvm_write_tsc_offset(vcpu->vcpu_id,
|
||||
svm->vmcb->control.tsc_offset,
|
||||
offset);
|
||||
}
|
||||
|
||||
trace_kvm_write_tsc_offset(vcpu->vcpu_id,
|
||||
svm->vmcb->control.tsc_offset - g_tsc_offset,
|
||||
offset);
|
||||
|
||||
svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
|
||||
|
||||
@ -2129,6 +2125,13 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
|
||||
goto out;
|
||||
}
|
||||
|
||||
svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, GFP_KERNEL);
|
||||
if (!svm->vcpu.arch.guest_fpu) {
|
||||
printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
|
||||
err = -ENOMEM;
|
||||
goto free_partial_svm;
|
||||
}
|
||||
|
||||
err = kvm_vcpu_init(&svm->vcpu, kvm, id);
|
||||
if (err)
|
||||
goto free_svm;
|
||||
@ -2188,6 +2191,8 @@ free_page1:
|
||||
uninit:
|
||||
kvm_vcpu_uninit(&svm->vcpu);
|
||||
free_svm:
|
||||
kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu);
|
||||
free_partial_svm:
|
||||
kmem_cache_free(kvm_vcpu_cache, svm);
|
||||
out:
|
||||
return ERR_PTR(err);
|
||||
@ -2217,6 +2222,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
|
||||
__free_page(virt_to_page(svm->nested.hsave));
|
||||
__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
|
||||
kvm_vcpu_uninit(vcpu);
|
||||
kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu);
|
||||
kmem_cache_free(kvm_vcpu_cache, svm);
|
||||
}
|
||||
|
||||
@ -3278,6 +3284,8 @@ static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *fr
|
||||
dst->event_inj_err = from->event_inj_err;
|
||||
dst->nested_cr3 = from->nested_cr3;
|
||||
dst->virt_ext = from->virt_ext;
|
||||
dst->pause_filter_count = from->pause_filter_count;
|
||||
dst->pause_filter_thresh = from->pause_filter_thresh;
|
||||
}
|
||||
|
||||
static int nested_svm_vmexit(struct vcpu_svm *svm)
|
||||
@ -3356,6 +3364,11 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
|
||||
nested_vmcb->control.event_inj = 0;
|
||||
nested_vmcb->control.event_inj_err = 0;
|
||||
|
||||
nested_vmcb->control.pause_filter_count =
|
||||
svm->vmcb->control.pause_filter_count;
|
||||
nested_vmcb->control.pause_filter_thresh =
|
||||
svm->vmcb->control.pause_filter_thresh;
|
||||
|
||||
/* We always set V_INTR_MASKING and remember the old value in hflags */
|
||||
if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
|
||||
nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
|
||||
@ -3532,6 +3545,11 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
|
||||
svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
|
||||
svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
|
||||
|
||||
svm->vmcb->control.pause_filter_count =
|
||||
nested_vmcb->control.pause_filter_count;
|
||||
svm->vmcb->control.pause_filter_thresh =
|
||||
nested_vmcb->control.pause_filter_thresh;
|
||||
|
||||
nested_svm_unmap(page);
|
||||
|
||||
/* Enter Guest-Mode */
|
||||
@ -5636,9 +5654,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
/* Enter guest mode */
|
||||
"push %%" _ASM_AX " \n\t"
|
||||
"mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t"
|
||||
__ex(SVM_VMLOAD) "\n\t"
|
||||
__ex(SVM_VMRUN) "\n\t"
|
||||
__ex(SVM_VMSAVE) "\n\t"
|
||||
__ex("vmload %%" _ASM_AX) "\n\t"
|
||||
__ex("vmrun %%" _ASM_AX) "\n\t"
|
||||
__ex("vmsave %%" _ASM_AX) "\n\t"
|
||||
"pop %%" _ASM_AX " \n\t"
|
||||
|
||||
/* Save guest registers, load host registers */
|
||||
@ -5836,6 +5854,13 @@ static bool svm_cpu_has_accelerated_tpr(void)
|
||||
|
||||
static bool svm_has_emulated_msr(int index)
|
||||
{
|
||||
switch (index) {
|
||||
case MSR_IA32_MCG_EXT_CTL:
|
||||
return false;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -5924,6 +5949,11 @@ static bool svm_umip_emulated(void)
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool svm_pt_supported(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool svm_has_wbinvd_exit(void)
|
||||
{
|
||||
return true;
|
||||
@ -7053,6 +7083,12 @@ failed:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* Not supported */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
|
||||
uint16_t *vmcs_version)
|
||||
{
|
||||
@ -7159,6 +7195,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
|
||||
.mpx_supported = svm_mpx_supported,
|
||||
.xsaves_supported = svm_xsaves_supported,
|
||||
.umip_emulated = svm_umip_emulated,
|
||||
.pt_supported = svm_pt_supported,
|
||||
|
||||
.set_supported_cpuid = svm_set_supported_cpuid,
|
||||
|
||||
@ -7191,6 +7228,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
|
||||
.mem_enc_unreg_region = svm_unregister_enc_region,
|
||||
|
||||
.nested_enable_evmcs = nested_enable_evmcs,
|
||||
.nested_get_evmcs_version = nested_get_evmcs_version,
|
||||
};
|
||||
|
||||
static int __init svm_init(void)
|
||||
|
@ -1254,24 +1254,26 @@ TRACE_EVENT(kvm_hv_stimer_callback,
|
||||
* Tracepoint for stimer_expiration.
|
||||
*/
|
||||
TRACE_EVENT(kvm_hv_stimer_expiration,
|
||||
TP_PROTO(int vcpu_id, int timer_index, int msg_send_result),
|
||||
TP_ARGS(vcpu_id, timer_index, msg_send_result),
|
||||
TP_PROTO(int vcpu_id, int timer_index, int direct, int msg_send_result),
|
||||
TP_ARGS(vcpu_id, timer_index, direct, msg_send_result),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(int, vcpu_id)
|
||||
__field(int, timer_index)
|
||||
__field(int, direct)
|
||||
__field(int, msg_send_result)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vcpu_id = vcpu_id;
|
||||
__entry->timer_index = timer_index;
|
||||
__entry->direct = direct;
|
||||
__entry->msg_send_result = msg_send_result;
|
||||
),
|
||||
|
||||
TP_printk("vcpu_id %d timer %d msg send result %d",
|
||||
TP_printk("vcpu_id %d timer %d direct %d send result %d",
|
||||
__entry->vcpu_id, __entry->timer_index,
|
||||
__entry->msg_send_result)
|
||||
__entry->direct, __entry->msg_send_result)
|
||||
);
|
||||
|
||||
/*
|
||||
|
15252
arch/x86/kvm/vmx.c
15252
arch/x86/kvm/vmx.c
File diff suppressed because it is too large
Load Diff
343
arch/x86/kvm/vmx/capabilities.h
Normal file
343
arch/x86/kvm/vmx/capabilities.h
Normal file
@ -0,0 +1,343 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __KVM_X86_VMX_CAPS_H
|
||||
#define __KVM_X86_VMX_CAPS_H
|
||||
|
||||
#include "lapic.h"
|
||||
|
||||
extern bool __read_mostly enable_vpid;
|
||||
extern bool __read_mostly flexpriority_enabled;
|
||||
extern bool __read_mostly enable_ept;
|
||||
extern bool __read_mostly enable_unrestricted_guest;
|
||||
extern bool __read_mostly enable_ept_ad_bits;
|
||||
extern bool __read_mostly enable_pml;
|
||||
extern int __read_mostly pt_mode;
|
||||
|
||||
#define PT_MODE_SYSTEM 0
|
||||
#define PT_MODE_HOST_GUEST 1
|
||||
|
||||
struct nested_vmx_msrs {
|
||||
/*
|
||||
* We only store the "true" versions of the VMX capability MSRs. We
|
||||
* generate the "non-true" versions by setting the must-be-1 bits
|
||||
* according to the SDM.
|
||||
*/
|
||||
u32 procbased_ctls_low;
|
||||
u32 procbased_ctls_high;
|
||||
u32 secondary_ctls_low;
|
||||
u32 secondary_ctls_high;
|
||||
u32 pinbased_ctls_low;
|
||||
u32 pinbased_ctls_high;
|
||||
u32 exit_ctls_low;
|
||||
u32 exit_ctls_high;
|
||||
u32 entry_ctls_low;
|
||||
u32 entry_ctls_high;
|
||||
u32 misc_low;
|
||||
u32 misc_high;
|
||||
u32 ept_caps;
|
||||
u32 vpid_caps;
|
||||
u64 basic;
|
||||
u64 cr0_fixed0;
|
||||
u64 cr0_fixed1;
|
||||
u64 cr4_fixed0;
|
||||
u64 cr4_fixed1;
|
||||
u64 vmcs_enum;
|
||||
u64 vmfunc_controls;
|
||||
};
|
||||
|
||||
struct vmcs_config {
|
||||
int size;
|
||||
int order;
|
||||
u32 basic_cap;
|
||||
u32 revision_id;
|
||||
u32 pin_based_exec_ctrl;
|
||||
u32 cpu_based_exec_ctrl;
|
||||
u32 cpu_based_2nd_exec_ctrl;
|
||||
u32 vmexit_ctrl;
|
||||
u32 vmentry_ctrl;
|
||||
struct nested_vmx_msrs nested;
|
||||
};
|
||||
extern struct vmcs_config vmcs_config;
|
||||
|
||||
struct vmx_capability {
|
||||
u32 ept;
|
||||
u32 vpid;
|
||||
};
|
||||
extern struct vmx_capability vmx_capability;
|
||||
|
||||
static inline bool cpu_has_vmx_basic_inout(void)
|
||||
{
|
||||
return (((u64)vmcs_config.basic_cap << 32) & VMX_BASIC_INOUT);
|
||||
}
|
||||
|
||||
static inline bool cpu_has_virtual_nmis(void)
|
||||
{
|
||||
return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_preemption_timer(void)
|
||||
{
|
||||
return vmcs_config.pin_based_exec_ctrl &
|
||||
PIN_BASED_VMX_PREEMPTION_TIMER;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_posted_intr(void)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_X86_LOCAL_APIC) &&
|
||||
vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_load_ia32_efer(void)
|
||||
{
|
||||
return (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_EFER) &&
|
||||
(vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_EFER);
|
||||
}
|
||||
|
||||
static inline bool cpu_has_load_perf_global_ctrl(void)
|
||||
{
|
||||
return (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
|
||||
(vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
|
||||
}
|
||||
|
||||
static inline bool vmx_mpx_supported(void)
|
||||
{
|
||||
return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) &&
|
||||
(vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS);
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_tpr_shadow(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW;
|
||||
}
|
||||
|
||||
static inline bool cpu_need_tpr_shadow(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return cpu_has_vmx_tpr_shadow() && lapic_in_kernel(vcpu);
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_msr_bitmap(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_secondary_exec_ctrls(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_exec_ctrl &
|
||||
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_ept(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_ENABLE_EPT;
|
||||
}
|
||||
|
||||
static inline bool vmx_umip_emulated(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_DESC;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_rdtscp(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_RDTSCP;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_virtualize_x2apic_mode(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_vpid(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_ENABLE_VPID;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_wbinvd_exit(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_WBINVD_EXITING;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_unrestricted_guest(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_UNRESTRICTED_GUEST;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_apic_register_virt(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_APIC_REGISTER_VIRT;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_virtual_intr_delivery(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_ple(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_PAUSE_LOOP_EXITING;
|
||||
}
|
||||
|
||||
static inline bool vmx_rdrand_supported(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_RDRAND_EXITING;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_invpcid(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_ENABLE_INVPCID;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_vmfunc(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_ENABLE_VMFUNC;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_shadow_vmcs(void)
|
||||
{
|
||||
u64 vmx_msr;
|
||||
|
||||
/* check if the cpu supports writing r/o exit information fields */
|
||||
rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
|
||||
if (!(vmx_msr & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS))
|
||||
return false;
|
||||
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_SHADOW_VMCS;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_encls_vmexit(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_ENCLS_EXITING;
|
||||
}
|
||||
|
||||
static inline bool vmx_rdseed_supported(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_RDSEED_EXITING;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_pml(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML;
|
||||
}
|
||||
|
||||
static inline bool vmx_xsaves_supported(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_XSAVES;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_tsc_scaling(void)
|
||||
{
|
||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||
SECONDARY_EXEC_TSC_SCALING;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_apicv(void)
|
||||
{
|
||||
return cpu_has_vmx_apic_register_virt() &&
|
||||
cpu_has_vmx_virtual_intr_delivery() &&
|
||||
cpu_has_vmx_posted_intr();
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_flexpriority(void)
|
||||
{
|
||||
return cpu_has_vmx_tpr_shadow() &&
|
||||
cpu_has_vmx_virtualize_apic_accesses();
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_ept_execute_only(void)
|
||||
{
|
||||
return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_ept_4levels(void)
|
||||
{
|
||||
return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_ept_5levels(void)
|
||||
{
|
||||
return vmx_capability.ept & VMX_EPT_PAGE_WALK_5_BIT;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_ept_mt_wb(void)
|
||||
{
|
||||
return vmx_capability.ept & VMX_EPTP_WB_BIT;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_ept_2m_page(void)
|
||||
{
|
||||
return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_ept_1g_page(void)
|
||||
{
|
||||
return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_ept_ad_bits(void)
|
||||
{
|
||||
return vmx_capability.ept & VMX_EPT_AD_BIT;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_invept_context(void)
|
||||
{
|
||||
return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_invept_global(void)
|
||||
{
|
||||
return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_invvpid(void)
|
||||
{
|
||||
return vmx_capability.vpid & VMX_VPID_INVVPID_BIT;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_invvpid_individual_addr(void)
|
||||
{
|
||||
return vmx_capability.vpid & VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_invvpid_single(void)
|
||||
{
|
||||
return vmx_capability.vpid & VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_invvpid_global(void)
|
||||
{
|
||||
return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
|
||||
}
|
||||
|
||||
static inline bool cpu_has_vmx_intel_pt(void)
|
||||
{
|
||||
u64 vmx_msr;
|
||||
|
||||
rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
|
||||
return (vmx_msr & MSR_IA32_VMX_MISC_INTEL_PT) &&
|
||||
(vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_PT_USE_GPA) &&
|
||||
(vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_IA32_RTIT_CTL) &&
|
||||
(vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_RTIT_CTL);
|
||||
}
|
||||
|
||||
#endif /* __KVM_X86_VMX_CAPS_H */
|
@ -1,20 +1,22 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __KVM_X86_VMX_EVMCS_H
|
||||
#define __KVM_X86_VMX_EVMCS_H
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <asm/hyperv-tlfs.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include "evmcs.h"
|
||||
#include "vmcs.h"
|
||||
#include "vmx.h"
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(enable_evmcs);
|
||||
|
||||
#if IS_ENABLED(CONFIG_HYPERV)
|
||||
|
||||
#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
|
||||
#define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x)
|
||||
#define EVMCS1_FIELD(number, name, clean_field)[ROL16(number, 6)] = \
|
||||
{EVMCS1_OFFSET(name), clean_field}
|
||||
|
||||
struct evmcs_field {
|
||||
u16 offset;
|
||||
u16 clean_field;
|
||||
};
|
||||
|
||||
static const struct evmcs_field vmcs_field_to_evmcs_1[] = {
|
||||
const struct evmcs_field vmcs_field_to_evmcs_1[] = {
|
||||
/* 64 bit rw */
|
||||
EVMCS1_FIELD(GUEST_RIP, guest_rip,
|
||||
HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
|
||||
@ -298,27 +300,53 @@ static const struct evmcs_field vmcs_field_to_evmcs_1[] = {
|
||||
EVMCS1_FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id,
|
||||
HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT),
|
||||
};
|
||||
const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1);
|
||||
|
||||
static __always_inline int get_evmcs_offset(unsigned long field,
|
||||
u16 *clean_field)
|
||||
void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
|
||||
{
|
||||
unsigned int index = ROL16(field, 6);
|
||||
const struct evmcs_field *evmcs_field;
|
||||
vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL;
|
||||
vmcs_conf->cpu_based_2nd_exec_ctrl &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
|
||||
|
||||
if (unlikely(index >= ARRAY_SIZE(vmcs_field_to_evmcs_1))) {
|
||||
WARN_ONCE(1, "KVM: accessing unsupported EVMCS field %lx\n",
|
||||
field);
|
||||
return -ENOENT;
|
||||
}
|
||||
vmcs_conf->vmexit_ctrl &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
|
||||
vmcs_conf->vmentry_ctrl &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
|
||||
|
||||
evmcs_field = &vmcs_field_to_evmcs_1[index];
|
||||
}
|
||||
#endif
|
||||
|
||||
if (clean_field)
|
||||
*clean_field = evmcs_field->clean_field;
|
||||
uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
/*
|
||||
* vmcs_version represents the range of supported Enlightened VMCS
|
||||
* versions: lower 8 bits is the minimal version, higher 8 bits is the
|
||||
* maximum supported version. KVM supports versions from 1 to
|
||||
* KVM_EVMCS_VERSION.
|
||||
*/
|
||||
if (vmx->nested.enlightened_vmcs_enabled)
|
||||
return (KVM_EVMCS_VERSION << 8) | 1;
|
||||
|
||||
return evmcs_field->offset;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#undef ROL16
|
||||
int nested_enable_evmcs(struct kvm_vcpu *vcpu,
|
||||
uint16_t *vmcs_version)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
||||
#endif /* __KVM_X86_VMX_EVMCS_H */
|
||||
if (vmcs_version)
|
||||
*vmcs_version = nested_get_evmcs_version(vcpu);
|
||||
|
||||
/* We don't support disabling the feature for simplicity. */
|
||||
if (vmx->nested.enlightened_vmcs_enabled)
|
||||
return 0;
|
||||
|
||||
vmx->nested.enlightened_vmcs_enabled = true;
|
||||
|
||||
vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
|
||||
vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
|
||||
vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
|
||||
vmx->nested.msrs.secondary_ctls_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
|
||||
vmx->nested.msrs.vmfunc_controls &= ~EVMCS1_UNSUPPORTED_VMFUNC;
|
||||
|
||||
return 0;
|
||||
}
|
202
arch/x86/kvm/vmx/evmcs.h
Normal file
202
arch/x86/kvm/vmx/evmcs.h
Normal file
@ -0,0 +1,202 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __KVM_X86_VMX_EVMCS_H
|
||||
#define __KVM_X86_VMX_EVMCS_H
|
||||
|
||||
#include <linux/jump_label.h>
|
||||
|
||||
#include <asm/hyperv-tlfs.h>
|
||||
#include <asm/mshyperv.h>
|
||||
#include <asm/vmx.h>
|
||||
|
||||
#include "capabilities.h"
|
||||
#include "vmcs.h"
|
||||
|
||||
struct vmcs_config;
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(enable_evmcs);
|
||||
|
||||
#define current_evmcs ((struct hv_enlightened_vmcs *)this_cpu_read(current_vmcs))
|
||||
|
||||
#define KVM_EVMCS_VERSION 1
|
||||
|
||||
/*
|
||||
* Enlightened VMCSv1 doesn't support these:
|
||||
*
|
||||
* POSTED_INTR_NV = 0x00000002,
|
||||
* GUEST_INTR_STATUS = 0x00000810,
|
||||
* APIC_ACCESS_ADDR = 0x00002014,
|
||||
* POSTED_INTR_DESC_ADDR = 0x00002016,
|
||||
* EOI_EXIT_BITMAP0 = 0x0000201c,
|
||||
* EOI_EXIT_BITMAP1 = 0x0000201e,
|
||||
* EOI_EXIT_BITMAP2 = 0x00002020,
|
||||
* EOI_EXIT_BITMAP3 = 0x00002022,
|
||||
* GUEST_PML_INDEX = 0x00000812,
|
||||
* PML_ADDRESS = 0x0000200e,
|
||||
* VM_FUNCTION_CONTROL = 0x00002018,
|
||||
* EPTP_LIST_ADDRESS = 0x00002024,
|
||||
* VMREAD_BITMAP = 0x00002026,
|
||||
* VMWRITE_BITMAP = 0x00002028,
|
||||
*
|
||||
* TSC_MULTIPLIER = 0x00002032,
|
||||
* PLE_GAP = 0x00004020,
|
||||
* PLE_WINDOW = 0x00004022,
|
||||
* VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
|
||||
* GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808,
|
||||
* HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
|
||||
*
|
||||
* Currently unsupported in KVM:
|
||||
* GUEST_IA32_RTIT_CTL = 0x00002814,
|
||||
*/
|
||||
#define EVMCS1_UNSUPPORTED_PINCTRL (PIN_BASED_POSTED_INTR | \
|
||||
PIN_BASED_VMX_PREEMPTION_TIMER)
|
||||
#define EVMCS1_UNSUPPORTED_2NDEXEC \
|
||||
(SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \
|
||||
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | \
|
||||
SECONDARY_EXEC_APIC_REGISTER_VIRT | \
|
||||
SECONDARY_EXEC_ENABLE_PML | \
|
||||
SECONDARY_EXEC_ENABLE_VMFUNC | \
|
||||
SECONDARY_EXEC_SHADOW_VMCS | \
|
||||
SECONDARY_EXEC_TSC_SCALING | \
|
||||
SECONDARY_EXEC_PAUSE_LOOP_EXITING)
|
||||
#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
|
||||
#define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
|
||||
#define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING)
|
||||
|
||||
#if IS_ENABLED(CONFIG_HYPERV)
|
||||
|
||||
struct evmcs_field {
|
||||
u16 offset;
|
||||
u16 clean_field;
|
||||
};
|
||||
|
||||
extern const struct evmcs_field vmcs_field_to_evmcs_1[];
|
||||
extern const unsigned int nr_evmcs_1_fields;
|
||||
|
||||
#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
|
||||
|
||||
static __always_inline int get_evmcs_offset(unsigned long field,
|
||||
u16 *clean_field)
|
||||
{
|
||||
unsigned int index = ROL16(field, 6);
|
||||
const struct evmcs_field *evmcs_field;
|
||||
|
||||
if (unlikely(index >= nr_evmcs_1_fields)) {
|
||||
WARN_ONCE(1, "KVM: accessing unsupported EVMCS field %lx\n",
|
||||
field);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
evmcs_field = &vmcs_field_to_evmcs_1[index];
|
||||
|
||||
if (clean_field)
|
||||
*clean_field = evmcs_field->clean_field;
|
||||
|
||||
return evmcs_field->offset;
|
||||
}
|
||||
|
||||
#undef ROL16
|
||||
|
||||
static inline void evmcs_write64(unsigned long field, u64 value)
|
||||
{
|
||||
u16 clean_field;
|
||||
int offset = get_evmcs_offset(field, &clean_field);
|
||||
|
||||
if (offset < 0)
|
||||
return;
|
||||
|
||||
*(u64 *)((char *)current_evmcs + offset) = value;
|
||||
|
||||
current_evmcs->hv_clean_fields &= ~clean_field;
|
||||
}
|
||||
|
||||
static inline void evmcs_write32(unsigned long field, u32 value)
|
||||
{
|
||||
u16 clean_field;
|
||||
int offset = get_evmcs_offset(field, &clean_field);
|
||||
|
||||
if (offset < 0)
|
||||
return;
|
||||
|
||||
*(u32 *)((char *)current_evmcs + offset) = value;
|
||||
current_evmcs->hv_clean_fields &= ~clean_field;
|
||||
}
|
||||
|
||||
static inline void evmcs_write16(unsigned long field, u16 value)
|
||||
{
|
||||
u16 clean_field;
|
||||
int offset = get_evmcs_offset(field, &clean_field);
|
||||
|
||||
if (offset < 0)
|
||||
return;
|
||||
|
||||
*(u16 *)((char *)current_evmcs + offset) = value;
|
||||
current_evmcs->hv_clean_fields &= ~clean_field;
|
||||
}
|
||||
|
||||
static inline u64 evmcs_read64(unsigned long field)
|
||||
{
|
||||
int offset = get_evmcs_offset(field, NULL);
|
||||
|
||||
if (offset < 0)
|
||||
return 0;
|
||||
|
||||
return *(u64 *)((char *)current_evmcs + offset);
|
||||
}
|
||||
|
||||
static inline u32 evmcs_read32(unsigned long field)
|
||||
{
|
||||
int offset = get_evmcs_offset(field, NULL);
|
||||
|
||||
if (offset < 0)
|
||||
return 0;
|
||||
|
||||
return *(u32 *)((char *)current_evmcs + offset);
|
||||
}
|
||||
|
||||
static inline u16 evmcs_read16(unsigned long field)
|
||||
{
|
||||
int offset = get_evmcs_offset(field, NULL);
|
||||
|
||||
if (offset < 0)
|
||||
return 0;
|
||||
|
||||
return *(u16 *)((char *)current_evmcs + offset);
|
||||
}
|
||||
|
||||
static inline void evmcs_touch_msr_bitmap(void)
|
||||
{
|
||||
if (unlikely(!current_evmcs))
|
||||
return;
|
||||
|
||||
if (current_evmcs->hv_enlightenments_control.msr_bitmap)
|
||||
current_evmcs->hv_clean_fields &=
|
||||
~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
|
||||
}
|
||||
|
||||
static inline void evmcs_load(u64 phys_addr)
|
||||
{
|
||||
struct hv_vp_assist_page *vp_ap =
|
||||
hv_get_vp_assist_page(smp_processor_id());
|
||||
|
||||
vp_ap->current_nested_vmcs = phys_addr;
|
||||
vp_ap->enlighten_vmentry = 1;
|
||||
}
|
||||
|
||||
void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf);
|
||||
#else /* !IS_ENABLED(CONFIG_HYPERV) */
|
||||
static inline void evmcs_write64(unsigned long field, u64 value) {}
|
||||
static inline void evmcs_write32(unsigned long field, u32 value) {}
|
||||
static inline void evmcs_write16(unsigned long field, u16 value) {}
|
||||
static inline u64 evmcs_read64(unsigned long field) { return 0; }
|
||||
static inline u32 evmcs_read32(unsigned long field) { return 0; }
|
||||
static inline u16 evmcs_read16(unsigned long field) { return 0; }
|
||||
static inline void evmcs_load(u64 phys_addr) {}
|
||||
static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}
|
||||
static inline void evmcs_touch_msr_bitmap(void) {}
|
||||
#endif /* IS_ENABLED(CONFIG_HYPERV) */
|
||||
|
||||
uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu);
|
||||
int nested_enable_evmcs(struct kvm_vcpu *vcpu,
|
||||
uint16_t *vmcs_version);
|
||||
|
||||
#endif /* __KVM_X86_VMX_EVMCS_H */
|
5721
arch/x86/kvm/vmx/nested.c
Normal file
5721
arch/x86/kvm/vmx/nested.c
Normal file
File diff suppressed because it is too large
Load Diff
282
arch/x86/kvm/vmx/nested.h
Normal file
282
arch/x86/kvm/vmx/nested.h
Normal file
@ -0,0 +1,282 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __KVM_X86_VMX_NESTED_H
|
||||
#define __KVM_X86_VMX_NESTED_H
|
||||
|
||||
#include "kvm_cache_regs.h"
|
||||
#include "vmcs12.h"
|
||||
#include "vmx.h"
|
||||
|
||||
void vmx_leave_nested(struct kvm_vcpu *vcpu);
|
||||
void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
|
||||
bool apicv);
|
||||
void nested_vmx_hardware_unsetup(void);
|
||||
__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
|
||||
void nested_vmx_vcpu_setup(void);
|
||||
void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu);
|
||||
int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry);
|
||||
bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason);
|
||||
void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
|
||||
u32 exit_intr_info, unsigned long exit_qualification);
|
||||
void nested_sync_from_vmcs12(struct kvm_vcpu *vcpu);
|
||||
int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
|
||||
int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata);
|
||||
int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
|
||||
u32 vmx_instruction_info, bool wr, gva_t *ret);
|
||||
|
||||
static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return to_vmx(vcpu)->nested.cached_vmcs12;
|
||||
}
|
||||
|
||||
static inline struct vmcs12 *get_shadow_vmcs12(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return to_vmx(vcpu)->nested.cached_shadow_vmcs12;
|
||||
}
|
||||
|
||||
static inline int vmx_has_valid_vmcs12(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
||||
/*
|
||||
* In case we do two consecutive get/set_nested_state()s while L2 was
|
||||
* running hv_evmcs may end up not being mapped (we map it from
|
||||
* nested_vmx_run()/vmx_vcpu_run()). Check is_guest_mode() as we always
|
||||
* have vmcs12 if it is true.
|
||||
*/
|
||||
return is_guest_mode(vcpu) || vmx->nested.current_vmptr != -1ull ||
|
||||
vmx->nested.hv_evmcs;
|
||||
}
|
||||
|
||||
static inline unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* return the page table to be shadowed - in our case, EPT12 */
|
||||
return get_vmcs12(vcpu)->ept_pointer;
|
||||
}
|
||||
|
||||
static inline bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return nested_ept_get_cr3(vcpu) & VMX_EPTP_AD_ENABLE_BIT;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reflect a VM Exit into L1.
|
||||
*/
|
||||
static inline int nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu,
|
||||
u32 exit_reason)
|
||||
{
|
||||
u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
|
||||
|
||||
/*
|
||||
* At this point, the exit interruption info in exit_intr_info
|
||||
* is only valid for EXCEPTION_NMI exits. For EXTERNAL_INTERRUPT
|
||||
* we need to query the in-kernel LAPIC.
|
||||
*/
|
||||
WARN_ON(exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT);
|
||||
if ((exit_intr_info &
|
||||
(INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
|
||||
(INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) {
|
||||
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
||||
|
||||
vmcs12->vm_exit_intr_error_code =
|
||||
vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
|
||||
}
|
||||
|
||||
nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info,
|
||||
vmcs_readl(EXIT_QUALIFICATION));
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the cr0 value that a nested guest would read. This is a combination
|
||||
* of the real cr0 used to run the guest (guest_cr0), and the bits shadowed by
|
||||
* its hypervisor (cr0_read_shadow).
|
||||
*/
|
||||
static inline unsigned long nested_read_cr0(struct vmcs12 *fields)
|
||||
{
|
||||
return (fields->guest_cr0 & ~fields->cr0_guest_host_mask) |
|
||||
(fields->cr0_read_shadow & fields->cr0_guest_host_mask);
|
||||
}
|
||||
static inline unsigned long nested_read_cr4(struct vmcs12 *fields)
|
||||
{
|
||||
return (fields->guest_cr4 & ~fields->cr4_guest_host_mask) |
|
||||
(fields->cr4_read_shadow & fields->cr4_guest_host_mask);
|
||||
}
|
||||
|
||||
static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vmx_misc_cr3_count(to_vmx(vcpu)->nested.msrs.misc_low);
|
||||
}
|
||||
|
||||
/*
|
||||
* Do the virtual VMX capability MSRs specify that L1 can use VMWRITE
|
||||
* to modify any valid field of the VMCS, or are the VM-exit
|
||||
* information fields read-only?
|
||||
*/
|
||||
static inline bool nested_cpu_has_vmwrite_any_field(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return to_vmx(vcpu)->nested.msrs.misc_low &
|
||||
MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS;
|
||||
}
|
||||
|
||||
static inline bool nested_cpu_has_zero_length_injection(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return to_vmx(vcpu)->nested.msrs.misc_low & VMX_MISC_ZERO_LEN_INS;
|
||||
}
|
||||
|
||||
static inline bool nested_cpu_supports_monitor_trap_flag(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return to_vmx(vcpu)->nested.msrs.procbased_ctls_high &
|
||||
CPU_BASED_MONITOR_TRAP_FLAG;
|
||||
}
|
||||
|
||||
static inline bool nested_cpu_has_vmx_shadow_vmcs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return to_vmx(vcpu)->nested.msrs.secondary_ctls_high &
|
||||
SECONDARY_EXEC_SHADOW_VMCS;
|
||||
}
|
||||
|
||||
static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
|
||||
{
|
||||
return vmcs12->cpu_based_vm_exec_control & bit;
|
||||
}
|
||||
|
||||
static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
|
||||
{
|
||||
return (vmcs12->cpu_based_vm_exec_control &
|
||||
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
|
||||
(vmcs12->secondary_vm_exec_control & bit);
|
||||
}
|
||||
|
||||
static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
|
||||
{
|
||||
return vmcs12->pin_based_vm_exec_control &
|
||||
PIN_BASED_VMX_PREEMPTION_TIMER;
|
||||
}
|
||||
|
||||
static inline bool nested_cpu_has_nmi_exiting(struct vmcs12 *vmcs12)
|
||||
{
|
||||
return vmcs12->pin_based_vm_exec_control & PIN_BASED_NMI_EXITING;
|
||||
}
|
||||
|
||||
static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
|
||||
{
|
||||
return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
|
||||
}
|
||||
|
||||
static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
|
||||
{
|
||||
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
|
||||
}
|
||||
|
||||
static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
|
||||
{
|
||||
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
|
||||
}
|
||||
|
||||
static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12)
|
||||
{
|
||||
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML);
|
||||
}
|
||||
|
||||
static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
|
||||
{
|
||||
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
|
||||
}
|
||||
|
||||
static inline bool nested_cpu_has_vpid(struct vmcs12 *vmcs12)
|
||||
{
|
||||
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VPID);
|
||||
}
|
||||
|
||||
static inline bool nested_cpu_has_apic_reg_virt(struct vmcs12 *vmcs12)
|
||||
{
|
||||
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_APIC_REGISTER_VIRT);
|
||||
}
|
||||
|
||||
static inline bool nested_cpu_has_vid(struct vmcs12 *vmcs12)
|
||||
{
|
||||
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
|
||||
}
|
||||
|
||||
static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12)
|
||||
{
|
||||
return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR;
|
||||
}
|
||||
|
||||
static inline bool nested_cpu_has_vmfunc(struct vmcs12 *vmcs12)
|
||||
{
|
||||
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC);
|
||||
}
|
||||
|
||||
static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12)
|
||||
{
|
||||
return nested_cpu_has_vmfunc(vmcs12) &&
|
||||
(vmcs12->vm_function_control &
|
||||
VMX_VMFUNC_EPTP_SWITCHING);
|
||||
}
|
||||
|
||||
static inline bool nested_cpu_has_shadow_vmcs(struct vmcs12 *vmcs12)
|
||||
{
|
||||
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_SHADOW_VMCS);
|
||||
}
|
||||
|
||||
static inline bool nested_cpu_has_save_preemption_timer(struct vmcs12 *vmcs12)
|
||||
{
|
||||
return vmcs12->vm_exit_controls &
|
||||
VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
|
||||
}
|
||||
|
||||
/*
|
||||
* In nested virtualization, check if L1 asked to exit on external interrupts.
|
||||
* For most existing hypervisors, this will always return true.
|
||||
*/
|
||||
static inline bool nested_exit_on_intr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return get_vmcs12(vcpu)->pin_based_vm_exec_control &
|
||||
PIN_BASED_EXT_INTR_MASK;
|
||||
}
|
||||
|
||||
/*
|
||||
* if fixed0[i] == 1: val[i] must be 1
|
||||
* if fixed1[i] == 0: val[i] must be 0
|
||||
*/
|
||||
static inline bool fixed_bits_valid(u64 val, u64 fixed0, u64 fixed1)
|
||||
{
|
||||
return ((val & fixed1) | fixed0) == val;
|
||||
}
|
||||
|
||||
static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
|
||||
{
|
||||
u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
|
||||
u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
|
||||
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
||||
|
||||
if (to_vmx(vcpu)->nested.msrs.secondary_ctls_high &
|
||||
SECONDARY_EXEC_UNRESTRICTED_GUEST &&
|
||||
nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
|
||||
fixed0 &= ~(X86_CR0_PE | X86_CR0_PG);
|
||||
|
||||
return fixed_bits_valid(val, fixed0, fixed1);
|
||||
}
|
||||
|
||||
static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
|
||||
{
|
||||
u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
|
||||
u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
|
||||
|
||||
return fixed_bits_valid(val, fixed0, fixed1);
|
||||
}
|
||||
|
||||
static bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
|
||||
{
|
||||
u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0;
|
||||
u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1;
|
||||
|
||||
return fixed_bits_valid(val, fixed0, fixed1);
|
||||
}
|
||||
|
||||
/* No difference in the restrictions on guest and host CR4 in VMX operation. */
|
||||
#define nested_guest_cr4_valid nested_cr4_valid
|
||||
#define nested_host_cr4_valid nested_cr4_valid
|
||||
|
||||
#endif /* __KVM_X86_VMX_NESTED_H */
|
285
arch/x86/kvm/vmx/ops.h
Normal file
285
arch/x86/kvm/vmx/ops.h
Normal file
@ -0,0 +1,285 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __KVM_X86_VMX_INSN_H
|
||||
#define __KVM_X86_VMX_INSN_H
|
||||
|
||||
#include <linux/nospec.h>
|
||||
|
||||
#include <asm/kvm_host.h>
|
||||
#include <asm/vmx.h>
|
||||
|
||||
#include "evmcs.h"
|
||||
#include "vmcs.h"
|
||||
|
||||
#define __ex(x) __kvm_handle_fault_on_reboot(x)
|
||||
#define __ex_clear(x, reg) \
|
||||
____kvm_handle_fault_on_reboot(x, "xor " reg ", " reg)
|
||||
|
||||
static __always_inline void vmcs_check16(unsigned long field)
|
||||
{
|
||||
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000,
|
||||
"16-bit accessor invalid for 64-bit field");
|
||||
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
|
||||
"16-bit accessor invalid for 64-bit high field");
|
||||
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
|
||||
"16-bit accessor invalid for 32-bit high field");
|
||||
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
|
||||
"16-bit accessor invalid for natural width field");
|
||||
}
|
||||
|
||||
static __always_inline void vmcs_check32(unsigned long field)
|
||||
{
|
||||
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
|
||||
"32-bit accessor invalid for 16-bit field");
|
||||
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
|
||||
"32-bit accessor invalid for natural width field");
|
||||
}
|
||||
|
||||
static __always_inline void vmcs_check64(unsigned long field)
|
||||
{
|
||||
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
|
||||
"64-bit accessor invalid for 16-bit field");
|
||||
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
|
||||
"64-bit accessor invalid for 64-bit high field");
|
||||
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
|
||||
"64-bit accessor invalid for 32-bit field");
|
||||
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
|
||||
"64-bit accessor invalid for natural width field");
|
||||
}
|
||||
|
||||
static __always_inline void vmcs_checkl(unsigned long field)
|
||||
{
|
||||
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
|
||||
"Natural width accessor invalid for 16-bit field");
|
||||
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000,
|
||||
"Natural width accessor invalid for 64-bit field");
|
||||
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
|
||||
"Natural width accessor invalid for 64-bit high field");
|
||||
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
|
||||
"Natural width accessor invalid for 32-bit field");
|
||||
}
|
||||
|
||||
static __always_inline unsigned long __vmcs_readl(unsigned long field)
|
||||
{
|
||||
unsigned long value;
|
||||
|
||||
asm volatile (__ex_clear("vmread %1, %0", "%k0")
|
||||
: "=r"(value) : "r"(field));
|
||||
return value;
|
||||
}
|
||||
|
||||
static __always_inline u16 vmcs_read16(unsigned long field)
|
||||
{
|
||||
vmcs_check16(field);
|
||||
if (static_branch_unlikely(&enable_evmcs))
|
||||
return evmcs_read16(field);
|
||||
return __vmcs_readl(field);
|
||||
}
|
||||
|
||||
static __always_inline u32 vmcs_read32(unsigned long field)
|
||||
{
|
||||
vmcs_check32(field);
|
||||
if (static_branch_unlikely(&enable_evmcs))
|
||||
return evmcs_read32(field);
|
||||
return __vmcs_readl(field);
|
||||
}
|
||||
|
||||
static __always_inline u64 vmcs_read64(unsigned long field)
|
||||
{
|
||||
vmcs_check64(field);
|
||||
if (static_branch_unlikely(&enable_evmcs))
|
||||
return evmcs_read64(field);
|
||||
#ifdef CONFIG_X86_64
|
||||
return __vmcs_readl(field);
|
||||
#else
|
||||
return __vmcs_readl(field) | ((u64)__vmcs_readl(field+1) << 32);
|
||||
#endif
|
||||
}
|
||||
|
||||
static __always_inline unsigned long vmcs_readl(unsigned long field)
|
||||
{
|
||||
vmcs_checkl(field);
|
||||
if (static_branch_unlikely(&enable_evmcs))
|
||||
return evmcs_read64(field);
|
||||
return __vmcs_readl(field);
|
||||
}
|
||||
|
||||
static noinline void vmwrite_error(unsigned long field, unsigned long value)
|
||||
{
|
||||
printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n",
|
||||
field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
|
||||
dump_stack();
|
||||
}
|
||||
|
||||
static __always_inline void __vmcs_writel(unsigned long field, unsigned long value)
|
||||
{
|
||||
bool error;
|
||||
|
||||
asm volatile (__ex("vmwrite %2, %1") CC_SET(na)
|
||||
: CC_OUT(na) (error) : "r"(field), "rm"(value));
|
||||
if (unlikely(error))
|
||||
vmwrite_error(field, value);
|
||||
}
|
||||
|
||||
static __always_inline void vmcs_write16(unsigned long field, u16 value)
|
||||
{
|
||||
vmcs_check16(field);
|
||||
if (static_branch_unlikely(&enable_evmcs))
|
||||
return evmcs_write16(field, value);
|
||||
|
||||
__vmcs_writel(field, value);
|
||||
}
|
||||
|
||||
static __always_inline void vmcs_write32(unsigned long field, u32 value)
|
||||
{
|
||||
vmcs_check32(field);
|
||||
if (static_branch_unlikely(&enable_evmcs))
|
||||
return evmcs_write32(field, value);
|
||||
|
||||
__vmcs_writel(field, value);
|
||||
}
|
||||
|
||||
static __always_inline void vmcs_write64(unsigned long field, u64 value)
|
||||
{
|
||||
vmcs_check64(field);
|
||||
if (static_branch_unlikely(&enable_evmcs))
|
||||
return evmcs_write64(field, value);
|
||||
|
||||
__vmcs_writel(field, value);
|
||||
#ifndef CONFIG_X86_64
|
||||
asm volatile ("");
|
||||
__vmcs_writel(field+1, value >> 32);
|
||||
#endif
|
||||
}
|
||||
|
||||
static __always_inline void vmcs_writel(unsigned long field, unsigned long value)
|
||||
{
|
||||
vmcs_checkl(field);
|
||||
if (static_branch_unlikely(&enable_evmcs))
|
||||
return evmcs_write64(field, value);
|
||||
|
||||
__vmcs_writel(field, value);
|
||||
}
|
||||
|
||||
static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask)
|
||||
{
|
||||
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
|
||||
"vmcs_clear_bits does not support 64-bit fields");
|
||||
if (static_branch_unlikely(&enable_evmcs))
|
||||
return evmcs_write32(field, evmcs_read32(field) & ~mask);
|
||||
|
||||
__vmcs_writel(field, __vmcs_readl(field) & ~mask);
|
||||
}
|
||||
|
||||
static __always_inline void vmcs_set_bits(unsigned long field, u32 mask)
|
||||
{
|
||||
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
|
||||
"vmcs_set_bits does not support 64-bit fields");
|
||||
if (static_branch_unlikely(&enable_evmcs))
|
||||
return evmcs_write32(field, evmcs_read32(field) | mask);
|
||||
|
||||
__vmcs_writel(field, __vmcs_readl(field) | mask);
|
||||
}
|
||||
|
||||
static inline void vmcs_clear(struct vmcs *vmcs)
|
||||
{
|
||||
u64 phys_addr = __pa(vmcs);
|
||||
bool error;
|
||||
|
||||
asm volatile (__ex("vmclear %1") CC_SET(na)
|
||||
: CC_OUT(na) (error) : "m"(phys_addr));
|
||||
if (unlikely(error))
|
||||
printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",
|
||||
vmcs, phys_addr);
|
||||
}
|
||||
|
||||
static inline void vmcs_load(struct vmcs *vmcs)
|
||||
{
|
||||
u64 phys_addr = __pa(vmcs);
|
||||
bool error;
|
||||
|
||||
if (static_branch_unlikely(&enable_evmcs))
|
||||
return evmcs_load(phys_addr);
|
||||
|
||||
asm volatile (__ex("vmptrld %1") CC_SET(na)
|
||||
: CC_OUT(na) (error) : "m"(phys_addr));
|
||||
if (unlikely(error))
|
||||
printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
|
||||
vmcs, phys_addr);
|
||||
}
|
||||
|
||||
static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva)
|
||||
{
|
||||
struct {
|
||||
u64 vpid : 16;
|
||||
u64 rsvd : 48;
|
||||
u64 gva;
|
||||
} operand = { vpid, 0, gva };
|
||||
bool error;
|
||||
|
||||
asm volatile (__ex("invvpid %2, %1") CC_SET(na)
|
||||
: CC_OUT(na) (error) : "r"(ext), "m"(operand));
|
||||
BUG_ON(error);
|
||||
}
|
||||
|
||||
static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa)
|
||||
{
|
||||
struct {
|
||||
u64 eptp, gpa;
|
||||
} operand = {eptp, gpa};
|
||||
bool error;
|
||||
|
||||
asm volatile (__ex("invept %2, %1") CC_SET(na)
|
||||
: CC_OUT(na) (error) : "r"(ext), "m"(operand));
|
||||
BUG_ON(error);
|
||||
}
|
||||
|
||||
static inline bool vpid_sync_vcpu_addr(int vpid, gva_t addr)
|
||||
{
|
||||
if (vpid == 0)
|
||||
return true;
|
||||
|
||||
if (cpu_has_vmx_invvpid_individual_addr()) {
|
||||
__invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR, vpid, addr);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void vpid_sync_vcpu_single(int vpid)
|
||||
{
|
||||
if (vpid == 0)
|
||||
return;
|
||||
|
||||
if (cpu_has_vmx_invvpid_single())
|
||||
__invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vpid, 0);
|
||||
}
|
||||
|
||||
static inline void vpid_sync_vcpu_global(void)
|
||||
{
|
||||
if (cpu_has_vmx_invvpid_global())
|
||||
__invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0);
|
||||
}
|
||||
|
||||
static inline void vpid_sync_context(int vpid)
|
||||
{
|
||||
if (cpu_has_vmx_invvpid_single())
|
||||
vpid_sync_vcpu_single(vpid);
|
||||
else
|
||||
vpid_sync_vcpu_global();
|
||||
}
|
||||
|
||||
static inline void ept_sync_global(void)
|
||||
{
|
||||
__invept(VMX_EPT_EXTENT_GLOBAL, 0, 0);
|
||||
}
|
||||
|
||||
static inline void ept_sync_context(u64 eptp)
|
||||
{
|
||||
if (cpu_has_vmx_invept_context())
|
||||
__invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);
|
||||
else
|
||||
ept_sync_global();
|
||||
}
|
||||
|
||||
#endif /* __KVM_X86_VMX_INSN_H */
|
136
arch/x86/kvm/vmx/vmcs.h
Normal file
136
arch/x86/kvm/vmx/vmcs.h
Normal file
@ -0,0 +1,136 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __KVM_X86_VMX_VMCS_H
|
||||
#define __KVM_X86_VMX_VMCS_H
|
||||
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/nospec.h>
|
||||
|
||||
#include <asm/kvm.h>
|
||||
#include <asm/vmx.h>
|
||||
|
||||
#include "capabilities.h"
|
||||
|
||||
struct vmcs_hdr {
|
||||
u32 revision_id:31;
|
||||
u32 shadow_vmcs:1;
|
||||
};
|
||||
|
||||
struct vmcs {
|
||||
struct vmcs_hdr hdr;
|
||||
u32 abort;
|
||||
char data[0];
|
||||
};
|
||||
|
||||
DECLARE_PER_CPU(struct vmcs *, current_vmcs);
|
||||
|
||||
/*
|
||||
* vmcs_host_state tracks registers that are loaded from the VMCS on VMEXIT
|
||||
* and whose values change infrequently, but are not constant. I.e. this is
|
||||
* used as a write-through cache of the corresponding VMCS fields.
|
||||
*/
|
||||
struct vmcs_host_state {
|
||||
unsigned long cr3; /* May not match real cr3 */
|
||||
unsigned long cr4; /* May not match real cr4 */
|
||||
unsigned long gs_base;
|
||||
unsigned long fs_base;
|
||||
|
||||
u16 fs_sel, gs_sel, ldt_sel;
|
||||
#ifdef CONFIG_X86_64
|
||||
u16 ds_sel, es_sel;
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
* Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also
|
||||
* remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs
|
||||
* loaded on this CPU (so we can clear them if the CPU goes down).
|
||||
*/
|
||||
struct loaded_vmcs {
|
||||
struct vmcs *vmcs;
|
||||
struct vmcs *shadow_vmcs;
|
||||
int cpu;
|
||||
bool launched;
|
||||
bool nmi_known_unmasked;
|
||||
bool hv_timer_armed;
|
||||
/* Support for vnmi-less CPUs */
|
||||
int soft_vnmi_blocked;
|
||||
ktime_t entry_time;
|
||||
s64 vnmi_blocked_time;
|
||||
unsigned long *msr_bitmap;
|
||||
struct list_head loaded_vmcss_on_cpu_link;
|
||||
struct vmcs_host_state host_state;
|
||||
};
|
||||
|
||||
static inline bool is_exception_n(u32 intr_info, u8 vector)
|
||||
{
|
||||
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
|
||||
INTR_INFO_VALID_MASK)) ==
|
||||
(INTR_TYPE_HARD_EXCEPTION | vector | INTR_INFO_VALID_MASK);
|
||||
}
|
||||
|
||||
static inline bool is_debug(u32 intr_info)
|
||||
{
|
||||
return is_exception_n(intr_info, DB_VECTOR);
|
||||
}
|
||||
|
||||
static inline bool is_breakpoint(u32 intr_info)
|
||||
{
|
||||
return is_exception_n(intr_info, BP_VECTOR);
|
||||
}
|
||||
|
||||
static inline bool is_page_fault(u32 intr_info)
|
||||
{
|
||||
return is_exception_n(intr_info, PF_VECTOR);
|
||||
}
|
||||
|
||||
static inline bool is_invalid_opcode(u32 intr_info)
|
||||
{
|
||||
return is_exception_n(intr_info, UD_VECTOR);
|
||||
}
|
||||
|
||||
static inline bool is_gp_fault(u32 intr_info)
|
||||
{
|
||||
return is_exception_n(intr_info, GP_VECTOR);
|
||||
}
|
||||
|
||||
static inline bool is_machine_check(u32 intr_info)
|
||||
{
|
||||
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
|
||||
INTR_INFO_VALID_MASK)) ==
|
||||
(INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK);
|
||||
}
|
||||
|
||||
/* Undocumented: icebp/int1 */
|
||||
static inline bool is_icebp(u32 intr_info)
|
||||
{
|
||||
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
|
||||
== (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK);
|
||||
}
|
||||
|
||||
static inline bool is_nmi(u32 intr_info)
|
||||
{
|
||||
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
|
||||
== (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK);
|
||||
}
|
||||
|
||||
enum vmcs_field_width {
|
||||
VMCS_FIELD_WIDTH_U16 = 0,
|
||||
VMCS_FIELD_WIDTH_U64 = 1,
|
||||
VMCS_FIELD_WIDTH_U32 = 2,
|
||||
VMCS_FIELD_WIDTH_NATURAL_WIDTH = 3
|
||||
};
|
||||
|
||||
static inline int vmcs_field_width(unsigned long field)
|
||||
{
|
||||
if (0x1 & field) /* the *_HIGH fields are all 32 bit */
|
||||
return VMCS_FIELD_WIDTH_U32;
|
||||
return (field >> 13) & 0x3;
|
||||
}
|
||||
|
||||
static inline int vmcs_field_readonly(unsigned long field)
|
||||
{
|
||||
return (((field >> 10) & 0x3) == 1);
|
||||
}
|
||||
|
||||
#endif /* __KVM_X86_VMX_VMCS_H */
|
157
arch/x86/kvm/vmx/vmcs12.c
Normal file
157
arch/x86/kvm/vmx/vmcs12.c
Normal file
@ -0,0 +1,157 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "vmcs12.h"
|
||||
|
||||
#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
|
||||
#define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
|
||||
#define FIELD(number, name) [ROL16(number, 6)] = VMCS12_OFFSET(name)
|
||||
#define FIELD64(number, name) \
|
||||
FIELD(number, name), \
|
||||
[ROL16(number##_HIGH, 6)] = VMCS12_OFFSET(name) + sizeof(u32)
|
||||
|
||||
const unsigned short vmcs_field_to_offset_table[] = {
|
||||
FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
|
||||
FIELD(POSTED_INTR_NV, posted_intr_nv),
|
||||
FIELD(GUEST_ES_SELECTOR, guest_es_selector),
|
||||
FIELD(GUEST_CS_SELECTOR, guest_cs_selector),
|
||||
FIELD(GUEST_SS_SELECTOR, guest_ss_selector),
|
||||
FIELD(GUEST_DS_SELECTOR, guest_ds_selector),
|
||||
FIELD(GUEST_FS_SELECTOR, guest_fs_selector),
|
||||
FIELD(GUEST_GS_SELECTOR, guest_gs_selector),
|
||||
FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector),
|
||||
FIELD(GUEST_TR_SELECTOR, guest_tr_selector),
|
||||
FIELD(GUEST_INTR_STATUS, guest_intr_status),
|
||||
FIELD(GUEST_PML_INDEX, guest_pml_index),
|
||||
FIELD(HOST_ES_SELECTOR, host_es_selector),
|
||||
FIELD(HOST_CS_SELECTOR, host_cs_selector),
|
||||
FIELD(HOST_SS_SELECTOR, host_ss_selector),
|
||||
FIELD(HOST_DS_SELECTOR, host_ds_selector),
|
||||
FIELD(HOST_FS_SELECTOR, host_fs_selector),
|
||||
FIELD(HOST_GS_SELECTOR, host_gs_selector),
|
||||
FIELD(HOST_TR_SELECTOR, host_tr_selector),
|
||||
FIELD64(IO_BITMAP_A, io_bitmap_a),
|
||||
FIELD64(IO_BITMAP_B, io_bitmap_b),
|
||||
FIELD64(MSR_BITMAP, msr_bitmap),
|
||||
FIELD64(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr),
|
||||
FIELD64(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr),
|
||||
FIELD64(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr),
|
||||
FIELD64(PML_ADDRESS, pml_address),
|
||||
FIELD64(TSC_OFFSET, tsc_offset),
|
||||
FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
|
||||
FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
|
||||
FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr),
|
||||
FIELD64(VM_FUNCTION_CONTROL, vm_function_control),
|
||||
FIELD64(EPT_POINTER, ept_pointer),
|
||||
FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0),
|
||||
FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
|
||||
FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
|
||||
FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
|
||||
FIELD64(EPTP_LIST_ADDRESS, eptp_list_address),
|
||||
FIELD64(VMREAD_BITMAP, vmread_bitmap),
|
||||
FIELD64(VMWRITE_BITMAP, vmwrite_bitmap),
|
||||
FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
|
||||
FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
|
||||
FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
|
||||
FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
|
||||
FIELD64(GUEST_IA32_PAT, guest_ia32_pat),
|
||||
FIELD64(GUEST_IA32_EFER, guest_ia32_efer),
|
||||
FIELD64(GUEST_IA32_PERF_GLOBAL_CTRL, guest_ia32_perf_global_ctrl),
|
||||
FIELD64(GUEST_PDPTR0, guest_pdptr0),
|
||||
FIELD64(GUEST_PDPTR1, guest_pdptr1),
|
||||
FIELD64(GUEST_PDPTR2, guest_pdptr2),
|
||||
FIELD64(GUEST_PDPTR3, guest_pdptr3),
|
||||
FIELD64(GUEST_BNDCFGS, guest_bndcfgs),
|
||||
FIELD64(HOST_IA32_PAT, host_ia32_pat),
|
||||
FIELD64(HOST_IA32_EFER, host_ia32_efer),
|
||||
FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl),
|
||||
FIELD(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control),
|
||||
FIELD(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control),
|
||||
FIELD(EXCEPTION_BITMAP, exception_bitmap),
|
||||
FIELD(PAGE_FAULT_ERROR_CODE_MASK, page_fault_error_code_mask),
|
||||
FIELD(PAGE_FAULT_ERROR_CODE_MATCH, page_fault_error_code_match),
|
||||
FIELD(CR3_TARGET_COUNT, cr3_target_count),
|
||||
FIELD(VM_EXIT_CONTROLS, vm_exit_controls),
|
||||
FIELD(VM_EXIT_MSR_STORE_COUNT, vm_exit_msr_store_count),
|
||||
FIELD(VM_EXIT_MSR_LOAD_COUNT, vm_exit_msr_load_count),
|
||||
FIELD(VM_ENTRY_CONTROLS, vm_entry_controls),
|
||||
FIELD(VM_ENTRY_MSR_LOAD_COUNT, vm_entry_msr_load_count),
|
||||
FIELD(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field),
|
||||
FIELD(VM_ENTRY_EXCEPTION_ERROR_CODE, vm_entry_exception_error_code),
|
||||
FIELD(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len),
|
||||
FIELD(TPR_THRESHOLD, tpr_threshold),
|
||||
FIELD(SECONDARY_VM_EXEC_CONTROL, secondary_vm_exec_control),
|
||||
FIELD(VM_INSTRUCTION_ERROR, vm_instruction_error),
|
||||
FIELD(VM_EXIT_REASON, vm_exit_reason),
|
||||
FIELD(VM_EXIT_INTR_INFO, vm_exit_intr_info),
|
||||
FIELD(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code),
|
||||
FIELD(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field),
|
||||
FIELD(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code),
|
||||
FIELD(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len),
|
||||
FIELD(VMX_INSTRUCTION_INFO, vmx_instruction_info),
|
||||
FIELD(GUEST_ES_LIMIT, guest_es_limit),
|
||||
FIELD(GUEST_CS_LIMIT, guest_cs_limit),
|
||||
FIELD(GUEST_SS_LIMIT, guest_ss_limit),
|
||||
FIELD(GUEST_DS_LIMIT, guest_ds_limit),
|
||||
FIELD(GUEST_FS_LIMIT, guest_fs_limit),
|
||||
FIELD(GUEST_GS_LIMIT, guest_gs_limit),
|
||||
FIELD(GUEST_LDTR_LIMIT, guest_ldtr_limit),
|
||||
FIELD(GUEST_TR_LIMIT, guest_tr_limit),
|
||||
FIELD(GUEST_GDTR_LIMIT, guest_gdtr_limit),
|
||||
FIELD(GUEST_IDTR_LIMIT, guest_idtr_limit),
|
||||
FIELD(GUEST_ES_AR_BYTES, guest_es_ar_bytes),
|
||||
FIELD(GUEST_CS_AR_BYTES, guest_cs_ar_bytes),
|
||||
FIELD(GUEST_SS_AR_BYTES, guest_ss_ar_bytes),
|
||||
FIELD(GUEST_DS_AR_BYTES, guest_ds_ar_bytes),
|
||||
FIELD(GUEST_FS_AR_BYTES, guest_fs_ar_bytes),
|
||||
FIELD(GUEST_GS_AR_BYTES, guest_gs_ar_bytes),
|
||||
FIELD(GUEST_LDTR_AR_BYTES, guest_ldtr_ar_bytes),
|
||||
FIELD(GUEST_TR_AR_BYTES, guest_tr_ar_bytes),
|
||||
FIELD(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info),
|
||||
FIELD(GUEST_ACTIVITY_STATE, guest_activity_state),
|
||||
FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs),
|
||||
FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs),
|
||||
FIELD(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value),
|
||||
FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask),
|
||||
FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask),
|
||||
FIELD(CR0_READ_SHADOW, cr0_read_shadow),
|
||||
FIELD(CR4_READ_SHADOW, cr4_read_shadow),
|
||||
FIELD(CR3_TARGET_VALUE0, cr3_target_value0),
|
||||
FIELD(CR3_TARGET_VALUE1, cr3_target_value1),
|
||||
FIELD(CR3_TARGET_VALUE2, cr3_target_value2),
|
||||
FIELD(CR3_TARGET_VALUE3, cr3_target_value3),
|
||||
FIELD(EXIT_QUALIFICATION, exit_qualification),
|
||||
FIELD(GUEST_LINEAR_ADDRESS, guest_linear_address),
|
||||
FIELD(GUEST_CR0, guest_cr0),
|
||||
FIELD(GUEST_CR3, guest_cr3),
|
||||
FIELD(GUEST_CR4, guest_cr4),
|
||||
FIELD(GUEST_ES_BASE, guest_es_base),
|
||||
FIELD(GUEST_CS_BASE, guest_cs_base),
|
||||
FIELD(GUEST_SS_BASE, guest_ss_base),
|
||||
FIELD(GUEST_DS_BASE, guest_ds_base),
|
||||
FIELD(GUEST_FS_BASE, guest_fs_base),
|
||||
FIELD(GUEST_GS_BASE, guest_gs_base),
|
||||
FIELD(GUEST_LDTR_BASE, guest_ldtr_base),
|
||||
FIELD(GUEST_TR_BASE, guest_tr_base),
|
||||
FIELD(GUEST_GDTR_BASE, guest_gdtr_base),
|
||||
FIELD(GUEST_IDTR_BASE, guest_idtr_base),
|
||||
FIELD(GUEST_DR7, guest_dr7),
|
||||
FIELD(GUEST_RSP, guest_rsp),
|
||||
FIELD(GUEST_RIP, guest_rip),
|
||||
FIELD(GUEST_RFLAGS, guest_rflags),
|
||||
FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions),
|
||||
FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp),
|
||||
FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip),
|
||||
FIELD(HOST_CR0, host_cr0),
|
||||
FIELD(HOST_CR3, host_cr3),
|
||||
FIELD(HOST_CR4, host_cr4),
|
||||
FIELD(HOST_FS_BASE, host_fs_base),
|
||||
FIELD(HOST_GS_BASE, host_gs_base),
|
||||
FIELD(HOST_TR_BASE, host_tr_base),
|
||||
FIELD(HOST_GDTR_BASE, host_gdtr_base),
|
||||
FIELD(HOST_IDTR_BASE, host_idtr_base),
|
||||
FIELD(HOST_IA32_SYSENTER_ESP, host_ia32_sysenter_esp),
|
||||
FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip),
|
||||
FIELD(HOST_RSP, host_rsp),
|
||||
FIELD(HOST_RIP, host_rip),
|
||||
};
|
||||
const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs_field_to_offset_table);
|
462
arch/x86/kvm/vmx/vmcs12.h
Normal file
462
arch/x86/kvm/vmx/vmcs12.h
Normal file
@ -0,0 +1,462 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __KVM_X86_VMX_VMCS12_H
|
||||
#define __KVM_X86_VMX_VMCS12_H
|
||||
|
||||
#include <linux/build_bug.h>
|
||||
|
||||
#include "vmcs.h"
|
||||
|
||||
/*
|
||||
* struct vmcs12 describes the state that our guest hypervisor (L1) keeps for a
|
||||
* single nested guest (L2), hence the name vmcs12. Any VMX implementation has
|
||||
* a VMCS structure, and vmcs12 is our emulated VMX's VMCS. This structure is
|
||||
* stored in guest memory specified by VMPTRLD, but is opaque to the guest,
|
||||
* which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
|
||||
* More than one of these structures may exist, if L1 runs multiple L2 guests.
|
||||
* nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the
|
||||
* underlying hardware which will be used to run L2.
|
||||
* This structure is packed to ensure that its layout is identical across
|
||||
* machines (necessary for live migration).
|
||||
*
|
||||
* IMPORTANT: Changing the layout of existing fields in this structure
|
||||
* will break save/restore compatibility with older kvm releases. When
|
||||
* adding new fields, either use space in the reserved padding* arrays
|
||||
* or add the new fields to the end of the structure.
|
||||
*/
|
||||
typedef u64 natural_width;
|
||||
struct __packed vmcs12 {
|
||||
/* According to the Intel spec, a VMCS region must start with the
|
||||
* following two fields. Then follow implementation-specific data.
|
||||
*/
|
||||
struct vmcs_hdr hdr;
|
||||
u32 abort;
|
||||
|
||||
u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
|
||||
u32 padding[7]; /* room for future expansion */
|
||||
|
||||
u64 io_bitmap_a;
|
||||
u64 io_bitmap_b;
|
||||
u64 msr_bitmap;
|
||||
u64 vm_exit_msr_store_addr;
|
||||
u64 vm_exit_msr_load_addr;
|
||||
u64 vm_entry_msr_load_addr;
|
||||
u64 tsc_offset;
|
||||
u64 virtual_apic_page_addr;
|
||||
u64 apic_access_addr;
|
||||
u64 posted_intr_desc_addr;
|
||||
u64 ept_pointer;
|
||||
u64 eoi_exit_bitmap0;
|
||||
u64 eoi_exit_bitmap1;
|
||||
u64 eoi_exit_bitmap2;
|
||||
u64 eoi_exit_bitmap3;
|
||||
u64 xss_exit_bitmap;
|
||||
u64 guest_physical_address;
|
||||
u64 vmcs_link_pointer;
|
||||
u64 guest_ia32_debugctl;
|
||||
u64 guest_ia32_pat;
|
||||
u64 guest_ia32_efer;
|
||||
u64 guest_ia32_perf_global_ctrl;
|
||||
u64 guest_pdptr0;
|
||||
u64 guest_pdptr1;
|
||||
u64 guest_pdptr2;
|
||||
u64 guest_pdptr3;
|
||||
u64 guest_bndcfgs;
|
||||
u64 host_ia32_pat;
|
||||
u64 host_ia32_efer;
|
||||
u64 host_ia32_perf_global_ctrl;
|
||||
u64 vmread_bitmap;
|
||||
u64 vmwrite_bitmap;
|
||||
u64 vm_function_control;
|
||||
u64 eptp_list_address;
|
||||
u64 pml_address;
|
||||
u64 padding64[3]; /* room for future expansion */
|
||||
/*
|
||||
* To allow migration of L1 (complete with its L2 guests) between
|
||||
* machines of different natural widths (32 or 64 bit), we cannot have
|
||||
* unsigned long fields with no explicit size. We use u64 (aliased
|
||||
* natural_width) instead. Luckily, x86 is little-endian.
|
||||
*/
|
||||
natural_width cr0_guest_host_mask;
|
||||
natural_width cr4_guest_host_mask;
|
||||
natural_width cr0_read_shadow;
|
||||
natural_width cr4_read_shadow;
|
||||
natural_width cr3_target_value0;
|
||||
natural_width cr3_target_value1;
|
||||
natural_width cr3_target_value2;
|
||||
natural_width cr3_target_value3;
|
||||
natural_width exit_qualification;
|
||||
natural_width guest_linear_address;
|
||||
natural_width guest_cr0;
|
||||
natural_width guest_cr3;
|
||||
natural_width guest_cr4;
|
||||
natural_width guest_es_base;
|
||||
natural_width guest_cs_base;
|
||||
natural_width guest_ss_base;
|
||||
natural_width guest_ds_base;
|
||||
natural_width guest_fs_base;
|
||||
natural_width guest_gs_base;
|
||||
natural_width guest_ldtr_base;
|
||||
natural_width guest_tr_base;
|
||||
natural_width guest_gdtr_base;
|
||||
natural_width guest_idtr_base;
|
||||
natural_width guest_dr7;
|
||||
natural_width guest_rsp;
|
||||
natural_width guest_rip;
|
||||
natural_width guest_rflags;
|
||||
natural_width guest_pending_dbg_exceptions;
|
||||
natural_width guest_sysenter_esp;
|
||||
natural_width guest_sysenter_eip;
|
||||
natural_width host_cr0;
|
||||
natural_width host_cr3;
|
||||
natural_width host_cr4;
|
||||
natural_width host_fs_base;
|
||||
natural_width host_gs_base;
|
||||
natural_width host_tr_base;
|
||||
natural_width host_gdtr_base;
|
||||
natural_width host_idtr_base;
|
||||
natural_width host_ia32_sysenter_esp;
|
||||
natural_width host_ia32_sysenter_eip;
|
||||
natural_width host_rsp;
|
||||
natural_width host_rip;
|
||||
natural_width paddingl[8]; /* room for future expansion */
|
||||
u32 pin_based_vm_exec_control;
|
||||
u32 cpu_based_vm_exec_control;
|
||||
u32 exception_bitmap;
|
||||
u32 page_fault_error_code_mask;
|
||||
u32 page_fault_error_code_match;
|
||||
u32 cr3_target_count;
|
||||
u32 vm_exit_controls;
|
||||
u32 vm_exit_msr_store_count;
|
||||
u32 vm_exit_msr_load_count;
|
||||
u32 vm_entry_controls;
|
||||
u32 vm_entry_msr_load_count;
|
||||
u32 vm_entry_intr_info_field;
|
||||
u32 vm_entry_exception_error_code;
|
||||
u32 vm_entry_instruction_len;
|
||||
u32 tpr_threshold;
|
||||
u32 secondary_vm_exec_control;
|
||||
u32 vm_instruction_error;
|
||||
u32 vm_exit_reason;
|
||||
u32 vm_exit_intr_info;
|
||||
u32 vm_exit_intr_error_code;
|
||||
u32 idt_vectoring_info_field;
|
||||
u32 idt_vectoring_error_code;
|
||||
u32 vm_exit_instruction_len;
|
||||
u32 vmx_instruction_info;
|
||||
u32 guest_es_limit;
|
||||
u32 guest_cs_limit;
|
||||
u32 guest_ss_limit;
|
||||
u32 guest_ds_limit;
|
||||
u32 guest_fs_limit;
|
||||
u32 guest_gs_limit;
|
||||
u32 guest_ldtr_limit;
|
||||
u32 guest_tr_limit;
|
||||
u32 guest_gdtr_limit;
|
||||
u32 guest_idtr_limit;
|
||||
u32 guest_es_ar_bytes;
|
||||
u32 guest_cs_ar_bytes;
|
||||
u32 guest_ss_ar_bytes;
|
||||
u32 guest_ds_ar_bytes;
|
||||
u32 guest_fs_ar_bytes;
|
||||
u32 guest_gs_ar_bytes;
|
||||
u32 guest_ldtr_ar_bytes;
|
||||
u32 guest_tr_ar_bytes;
|
||||
u32 guest_interruptibility_info;
|
||||
u32 guest_activity_state;
|
||||
u32 guest_sysenter_cs;
|
||||
u32 host_ia32_sysenter_cs;
|
||||
u32 vmx_preemption_timer_value;
|
||||
u32 padding32[7]; /* room for future expansion */
|
||||
u16 virtual_processor_id;
|
||||
u16 posted_intr_nv;
|
||||
u16 guest_es_selector;
|
||||
u16 guest_cs_selector;
|
||||
u16 guest_ss_selector;
|
||||
u16 guest_ds_selector;
|
||||
u16 guest_fs_selector;
|
||||
u16 guest_gs_selector;
|
||||
u16 guest_ldtr_selector;
|
||||
u16 guest_tr_selector;
|
||||
u16 guest_intr_status;
|
||||
u16 host_es_selector;
|
||||
u16 host_cs_selector;
|
||||
u16 host_ss_selector;
|
||||
u16 host_ds_selector;
|
||||
u16 host_fs_selector;
|
||||
u16 host_gs_selector;
|
||||
u16 host_tr_selector;
|
||||
u16 guest_pml_index;
|
||||
};
|
||||
|
||||
/*
|
||||
* VMCS12_REVISION is an arbitrary id that should be changed if the content or
|
||||
* layout of struct vmcs12 is changed. MSR_IA32_VMX_BASIC returns this id, and
|
||||
* VMPTRLD verifies that the VMCS region that L1 is loading contains this id.
|
||||
*
|
||||
* IMPORTANT: Changing this value will break save/restore compatibility with
|
||||
* older kvm releases.
|
||||
*/
|
||||
#define VMCS12_REVISION 0x11e57ed0
|
||||
|
||||
/*
|
||||
* VMCS12_SIZE is the number of bytes L1 should allocate for the VMXON region
|
||||
* and any VMCS region. Although only sizeof(struct vmcs12) are used by the
|
||||
* current implementation, 4K are reserved to avoid future complications.
|
||||
*/
|
||||
#define VMCS12_SIZE 0x1000
|
||||
|
||||
/*
|
||||
* VMCS12_MAX_FIELD_INDEX is the highest index value used in any
|
||||
* supported VMCS12 field encoding.
|
||||
*/
|
||||
#define VMCS12_MAX_FIELD_INDEX 0x17
|
||||
|
||||
/*
|
||||
* For save/restore compatibility, the vmcs12 field offsets must not change.
|
||||
*/
|
||||
#define CHECK_OFFSET(field, loc) \
|
||||
BUILD_BUG_ON_MSG(offsetof(struct vmcs12, field) != (loc), \
|
||||
"Offset of " #field " in struct vmcs12 has changed.")
|
||||
|
||||
static inline void vmx_check_vmcs12_offsets(void)
|
||||
{
|
||||
CHECK_OFFSET(hdr, 0);
|
||||
CHECK_OFFSET(abort, 4);
|
||||
CHECK_OFFSET(launch_state, 8);
|
||||
CHECK_OFFSET(io_bitmap_a, 40);
|
||||
CHECK_OFFSET(io_bitmap_b, 48);
|
||||
CHECK_OFFSET(msr_bitmap, 56);
|
||||
CHECK_OFFSET(vm_exit_msr_store_addr, 64);
|
||||
CHECK_OFFSET(vm_exit_msr_load_addr, 72);
|
||||
CHECK_OFFSET(vm_entry_msr_load_addr, 80);
|
||||
CHECK_OFFSET(tsc_offset, 88);
|
||||
CHECK_OFFSET(virtual_apic_page_addr, 96);
|
||||
CHECK_OFFSET(apic_access_addr, 104);
|
||||
CHECK_OFFSET(posted_intr_desc_addr, 112);
|
||||
CHECK_OFFSET(ept_pointer, 120);
|
||||
CHECK_OFFSET(eoi_exit_bitmap0, 128);
|
||||
CHECK_OFFSET(eoi_exit_bitmap1, 136);
|
||||
CHECK_OFFSET(eoi_exit_bitmap2, 144);
|
||||
CHECK_OFFSET(eoi_exit_bitmap3, 152);
|
||||
CHECK_OFFSET(xss_exit_bitmap, 160);
|
||||
CHECK_OFFSET(guest_physical_address, 168);
|
||||
CHECK_OFFSET(vmcs_link_pointer, 176);
|
||||
CHECK_OFFSET(guest_ia32_debugctl, 184);
|
||||
CHECK_OFFSET(guest_ia32_pat, 192);
|
||||
CHECK_OFFSET(guest_ia32_efer, 200);
|
||||
CHECK_OFFSET(guest_ia32_perf_global_ctrl, 208);
|
||||
CHECK_OFFSET(guest_pdptr0, 216);
|
||||
CHECK_OFFSET(guest_pdptr1, 224);
|
||||
CHECK_OFFSET(guest_pdptr2, 232);
|
||||
CHECK_OFFSET(guest_pdptr3, 240);
|
||||
CHECK_OFFSET(guest_bndcfgs, 248);
|
||||
CHECK_OFFSET(host_ia32_pat, 256);
|
||||
CHECK_OFFSET(host_ia32_efer, 264);
|
||||
CHECK_OFFSET(host_ia32_perf_global_ctrl, 272);
|
||||
CHECK_OFFSET(vmread_bitmap, 280);
|
||||
CHECK_OFFSET(vmwrite_bitmap, 288);
|
||||
CHECK_OFFSET(vm_function_control, 296);
|
||||
CHECK_OFFSET(eptp_list_address, 304);
|
||||
CHECK_OFFSET(pml_address, 312);
|
||||
CHECK_OFFSET(cr0_guest_host_mask, 344);
|
||||
CHECK_OFFSET(cr4_guest_host_mask, 352);
|
||||
CHECK_OFFSET(cr0_read_shadow, 360);
|
||||
CHECK_OFFSET(cr4_read_shadow, 368);
|
||||
CHECK_OFFSET(cr3_target_value0, 376);
|
||||
CHECK_OFFSET(cr3_target_value1, 384);
|
||||
CHECK_OFFSET(cr3_target_value2, 392);
|
||||
CHECK_OFFSET(cr3_target_value3, 400);
|
||||
CHECK_OFFSET(exit_qualification, 408);
|
||||
CHECK_OFFSET(guest_linear_address, 416);
|
||||
CHECK_OFFSET(guest_cr0, 424);
|
||||
CHECK_OFFSET(guest_cr3, 432);
|
||||
CHECK_OFFSET(guest_cr4, 440);
|
||||
CHECK_OFFSET(guest_es_base, 448);
|
||||
CHECK_OFFSET(guest_cs_base, 456);
|
||||
CHECK_OFFSET(guest_ss_base, 464);
|
||||
CHECK_OFFSET(guest_ds_base, 472);
|
||||
CHECK_OFFSET(guest_fs_base, 480);
|
||||
CHECK_OFFSET(guest_gs_base, 488);
|
||||
CHECK_OFFSET(guest_ldtr_base, 496);
|
||||
CHECK_OFFSET(guest_tr_base, 504);
|
||||
CHECK_OFFSET(guest_gdtr_base, 512);
|
||||
CHECK_OFFSET(guest_idtr_base, 520);
|
||||
CHECK_OFFSET(guest_dr7, 528);
|
||||
CHECK_OFFSET(guest_rsp, 536);
|
||||
CHECK_OFFSET(guest_rip, 544);
|
||||
CHECK_OFFSET(guest_rflags, 552);
|
||||
CHECK_OFFSET(guest_pending_dbg_exceptions, 560);
|
||||
CHECK_OFFSET(guest_sysenter_esp, 568);
|
||||
CHECK_OFFSET(guest_sysenter_eip, 576);
|
||||
CHECK_OFFSET(host_cr0, 584);
|
||||
CHECK_OFFSET(host_cr3, 592);
|
||||
CHECK_OFFSET(host_cr4, 600);
|
||||
CHECK_OFFSET(host_fs_base, 608);
|
||||
CHECK_OFFSET(host_gs_base, 616);
|
||||
CHECK_OFFSET(host_tr_base, 624);
|
||||
CHECK_OFFSET(host_gdtr_base, 632);
|
||||
CHECK_OFFSET(host_idtr_base, 640);
|
||||
CHECK_OFFSET(host_ia32_sysenter_esp, 648);
|
||||
CHECK_OFFSET(host_ia32_sysenter_eip, 656);
|
||||
CHECK_OFFSET(host_rsp, 664);
|
||||
CHECK_OFFSET(host_rip, 672);
|
||||
CHECK_OFFSET(pin_based_vm_exec_control, 744);
|
||||
CHECK_OFFSET(cpu_based_vm_exec_control, 748);
|
||||
CHECK_OFFSET(exception_bitmap, 752);
|
||||
CHECK_OFFSET(page_fault_error_code_mask, 756);
|
||||
CHECK_OFFSET(page_fault_error_code_match, 760);
|
||||
CHECK_OFFSET(cr3_target_count, 764);
|
||||
CHECK_OFFSET(vm_exit_controls, 768);
|
||||
CHECK_OFFSET(vm_exit_msr_store_count, 772);
|
||||
CHECK_OFFSET(vm_exit_msr_load_count, 776);
|
||||
CHECK_OFFSET(vm_entry_controls, 780);
|
||||
CHECK_OFFSET(vm_entry_msr_load_count, 784);
|
||||
CHECK_OFFSET(vm_entry_intr_info_field, 788);
|
||||
CHECK_OFFSET(vm_entry_exception_error_code, 792);
|
||||
CHECK_OFFSET(vm_entry_instruction_len, 796);
|
||||
CHECK_OFFSET(tpr_threshold, 800);
|
||||
CHECK_OFFSET(secondary_vm_exec_control, 804);
|
||||
CHECK_OFFSET(vm_instruction_error, 808);
|
||||
CHECK_OFFSET(vm_exit_reason, 812);
|
||||
CHECK_OFFSET(vm_exit_intr_info, 816);
|
||||
CHECK_OFFSET(vm_exit_intr_error_code, 820);
|
||||
CHECK_OFFSET(idt_vectoring_info_field, 824);
|
||||
CHECK_OFFSET(idt_vectoring_error_code, 828);
|
||||
CHECK_OFFSET(vm_exit_instruction_len, 832);
|
||||
CHECK_OFFSET(vmx_instruction_info, 836);
|
||||
CHECK_OFFSET(guest_es_limit, 840);
|
||||
CHECK_OFFSET(guest_cs_limit, 844);
|
||||
CHECK_OFFSET(guest_ss_limit, 848);
|
||||
CHECK_OFFSET(guest_ds_limit, 852);
|
||||
CHECK_OFFSET(guest_fs_limit, 856);
|
||||
CHECK_OFFSET(guest_gs_limit, 860);
|
||||
CHECK_OFFSET(guest_ldtr_limit, 864);
|
||||
CHECK_OFFSET(guest_tr_limit, 868);
|
||||
CHECK_OFFSET(guest_gdtr_limit, 872);
|
||||
CHECK_OFFSET(guest_idtr_limit, 876);
|
||||
CHECK_OFFSET(guest_es_ar_bytes, 880);
|
||||
CHECK_OFFSET(guest_cs_ar_bytes, 884);
|
||||
CHECK_OFFSET(guest_ss_ar_bytes, 888);
|
||||
CHECK_OFFSET(guest_ds_ar_bytes, 892);
|
||||
CHECK_OFFSET(guest_fs_ar_bytes, 896);
|
||||
CHECK_OFFSET(guest_gs_ar_bytes, 900);
|
||||
CHECK_OFFSET(guest_ldtr_ar_bytes, 904);
|
||||
CHECK_OFFSET(guest_tr_ar_bytes, 908);
|
||||
CHECK_OFFSET(guest_interruptibility_info, 912);
|
||||
CHECK_OFFSET(guest_activity_state, 916);
|
||||
CHECK_OFFSET(guest_sysenter_cs, 920);
|
||||
CHECK_OFFSET(host_ia32_sysenter_cs, 924);
|
||||
CHECK_OFFSET(vmx_preemption_timer_value, 928);
|
||||
CHECK_OFFSET(virtual_processor_id, 960);
|
||||
CHECK_OFFSET(posted_intr_nv, 962);
|
||||
CHECK_OFFSET(guest_es_selector, 964);
|
||||
CHECK_OFFSET(guest_cs_selector, 966);
|
||||
CHECK_OFFSET(guest_ss_selector, 968);
|
||||
CHECK_OFFSET(guest_ds_selector, 970);
|
||||
CHECK_OFFSET(guest_fs_selector, 972);
|
||||
CHECK_OFFSET(guest_gs_selector, 974);
|
||||
CHECK_OFFSET(guest_ldtr_selector, 976);
|
||||
CHECK_OFFSET(guest_tr_selector, 978);
|
||||
CHECK_OFFSET(guest_intr_status, 980);
|
||||
CHECK_OFFSET(host_es_selector, 982);
|
||||
CHECK_OFFSET(host_cs_selector, 984);
|
||||
CHECK_OFFSET(host_ss_selector, 986);
|
||||
CHECK_OFFSET(host_ds_selector, 988);
|
||||
CHECK_OFFSET(host_fs_selector, 990);
|
||||
CHECK_OFFSET(host_gs_selector, 992);
|
||||
CHECK_OFFSET(host_tr_selector, 994);
|
||||
CHECK_OFFSET(guest_pml_index, 996);
|
||||
}
|
||||
|
||||
extern const unsigned short vmcs_field_to_offset_table[];
|
||||
extern const unsigned int nr_vmcs12_fields;
|
||||
|
||||
#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
|
||||
|
||||
static inline short vmcs_field_to_offset(unsigned long field)
|
||||
{
|
||||
unsigned short offset;
|
||||
unsigned int index;
|
||||
|
||||
if (field >> 15)
|
||||
return -ENOENT;
|
||||
|
||||
index = ROL16(field, 6);
|
||||
if (index >= nr_vmcs12_fields)
|
||||
return -ENOENT;
|
||||
|
||||
index = array_index_nospec(index, nr_vmcs12_fields);
|
||||
offset = vmcs_field_to_offset_table[index];
|
||||
if (offset == 0)
|
||||
return -ENOENT;
|
||||
return offset;
|
||||
}
|
||||
|
||||
#undef ROL16
|
||||
|
||||
/*
|
||||
* Read a vmcs12 field. Since these can have varying lengths and we return
|
||||
* one type, we chose the biggest type (u64) and zero-extend the return value
|
||||
* to that size. Note that the caller, handle_vmread, might need to use only
|
||||
* some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
|
||||
* 64-bit fields are to be returned).
|
||||
*/
|
||||
static inline int vmcs12_read_any(struct vmcs12 *vmcs12,
|
||||
unsigned long field, u64 *ret)
|
||||
{
|
||||
short offset = vmcs_field_to_offset(field);
|
||||
char *p;
|
||||
|
||||
if (offset < 0)
|
||||
return offset;
|
||||
|
||||
p = (char *)vmcs12 + offset;
|
||||
|
||||
switch (vmcs_field_width(field)) {
|
||||
case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
|
||||
*ret = *((natural_width *)p);
|
||||
return 0;
|
||||
case VMCS_FIELD_WIDTH_U16:
|
||||
*ret = *((u16 *)p);
|
||||
return 0;
|
||||
case VMCS_FIELD_WIDTH_U32:
|
||||
*ret = *((u32 *)p);
|
||||
return 0;
|
||||
case VMCS_FIELD_WIDTH_U64:
|
||||
*ret = *((u64 *)p);
|
||||
return 0;
|
||||
default:
|
||||
WARN_ON(1);
|
||||
return -ENOENT;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int vmcs12_write_any(struct vmcs12 *vmcs12,
|
||||
unsigned long field, u64 field_value){
|
||||
short offset = vmcs_field_to_offset(field);
|
||||
char *p = (char *)vmcs12 + offset;
|
||||
|
||||
if (offset < 0)
|
||||
return offset;
|
||||
|
||||
switch (vmcs_field_width(field)) {
|
||||
case VMCS_FIELD_WIDTH_U16:
|
||||
*(u16 *)p = field_value;
|
||||
return 0;
|
||||
case VMCS_FIELD_WIDTH_U32:
|
||||
*(u32 *)p = field_value;
|
||||
return 0;
|
||||
case VMCS_FIELD_WIDTH_U64:
|
||||
*(u64 *)p = field_value;
|
||||
return 0;
|
||||
case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
|
||||
*(natural_width *)p = field_value;
|
||||
return 0;
|
||||
default:
|
||||
WARN_ON(1);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif /* __KVM_X86_VMX_VMCS12_H */
|
57
arch/x86/kvm/vmx/vmenter.S
Normal file
57
arch/x86/kvm/vmx/vmenter.S
Normal file
@ -0,0 +1,57 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/asm.h>
|
||||
|
||||
.text
|
||||
|
||||
/**
|
||||
* vmx_vmenter - VM-Enter the current loaded VMCS
|
||||
*
|
||||
* %RFLAGS.ZF: !VMCS.LAUNCHED, i.e. controls VMLAUNCH vs. VMRESUME
|
||||
*
|
||||
* Returns:
|
||||
* %RFLAGS.CF is set on VM-Fail Invalid
|
||||
* %RFLAGS.ZF is set on VM-Fail Valid
|
||||
* %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit
|
||||
*
|
||||
* Note that VMRESUME/VMLAUNCH fall-through and return directly if
|
||||
* they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump
|
||||
* to vmx_vmexit.
|
||||
*/
|
||||
ENTRY(vmx_vmenter)
|
||||
/* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */
|
||||
je 2f
|
||||
|
||||
1: vmresume
|
||||
ret
|
||||
|
||||
2: vmlaunch
|
||||
ret
|
||||
|
||||
3: cmpb $0, kvm_rebooting
|
||||
jne 4f
|
||||
call kvm_spurious_fault
|
||||
4: ret
|
||||
|
||||
.pushsection .fixup, "ax"
|
||||
5: jmp 3b
|
||||
.popsection
|
||||
|
||||
_ASM_EXTABLE(1b, 5b)
|
||||
_ASM_EXTABLE(2b, 5b)
|
||||
|
||||
ENDPROC(vmx_vmenter)
|
||||
|
||||
/**
|
||||
* vmx_vmexit - Handle a VMX VM-Exit
|
||||
*
|
||||
* Returns:
|
||||
* %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit
|
||||
*
|
||||
* This is vmx_vmenter's partner in crime. On a VM-Exit, control will jump
|
||||
* here after hardware loads the host's state, i.e. this is the destination
|
||||
* referred to by VMCS.HOST_RIP.
|
||||
*/
|
||||
ENTRY(vmx_vmexit)
|
||||
ret
|
||||
ENDPROC(vmx_vmexit)
|
7935
arch/x86/kvm/vmx/vmx.c
Normal file
7935
arch/x86/kvm/vmx/vmx.c
Normal file
File diff suppressed because it is too large
Load Diff
519
arch/x86/kvm/vmx/vmx.h
Normal file
519
arch/x86/kvm/vmx/vmx.h
Normal file
@ -0,0 +1,519 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __KVM_X86_VMX_H
|
||||
#define __KVM_X86_VMX_H
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
#include <asm/kvm.h>
|
||||
#include <asm/intel_pt.h>
|
||||
|
||||
#include "capabilities.h"
|
||||
#include "ops.h"
|
||||
#include "vmcs.h"
|
||||
|
||||
extern const u32 vmx_msr_index[];
|
||||
extern u64 host_efer;
|
||||
|
||||
#define MSR_TYPE_R 1
|
||||
#define MSR_TYPE_W 2
|
||||
#define MSR_TYPE_RW 3
|
||||
|
||||
#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
|
||||
|
||||
#define NR_AUTOLOAD_MSRS 8
|
||||
|
||||
struct vmx_msrs {
|
||||
unsigned int nr;
|
||||
struct vmx_msr_entry val[NR_AUTOLOAD_MSRS];
|
||||
};
|
||||
|
||||
struct shared_msr_entry {
|
||||
unsigned index;
|
||||
u64 data;
|
||||
u64 mask;
|
||||
};
|
||||
|
||||
enum segment_cache_field {
|
||||
SEG_FIELD_SEL = 0,
|
||||
SEG_FIELD_BASE = 1,
|
||||
SEG_FIELD_LIMIT = 2,
|
||||
SEG_FIELD_AR = 3,
|
||||
|
||||
SEG_FIELD_NR = 4
|
||||
};
|
||||
|
||||
/* Posted-Interrupt Descriptor */
|
||||
struct pi_desc {
|
||||
u32 pir[8]; /* Posted interrupt requested */
|
||||
union {
|
||||
struct {
|
||||
/* bit 256 - Outstanding Notification */
|
||||
u16 on : 1,
|
||||
/* bit 257 - Suppress Notification */
|
||||
sn : 1,
|
||||
/* bit 271:258 - Reserved */
|
||||
rsvd_1 : 14;
|
||||
/* bit 279:272 - Notification Vector */
|
||||
u8 nv;
|
||||
/* bit 287:280 - Reserved */
|
||||
u8 rsvd_2;
|
||||
/* bit 319:288 - Notification Destination */
|
||||
u32 ndst;
|
||||
};
|
||||
u64 control;
|
||||
};
|
||||
u32 rsvd[6];
|
||||
} __aligned(64);
|
||||
|
||||
#define RTIT_ADDR_RANGE 4
|
||||
|
||||
struct pt_ctx {
|
||||
u64 ctl;
|
||||
u64 status;
|
||||
u64 output_base;
|
||||
u64 output_mask;
|
||||
u64 cr3_match;
|
||||
u64 addr_a[RTIT_ADDR_RANGE];
|
||||
u64 addr_b[RTIT_ADDR_RANGE];
|
||||
};
|
||||
|
||||
struct pt_desc {
|
||||
u64 ctl_bitmask;
|
||||
u32 addr_range;
|
||||
u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
|
||||
struct pt_ctx host;
|
||||
struct pt_ctx guest;
|
||||
};
|
||||
|
||||
/*
|
||||
* The nested_vmx structure is part of vcpu_vmx, and holds information we need
|
||||
* for correct emulation of VMX (i.e., nested VMX) on this vcpu.
|
||||
*/
|
||||
struct nested_vmx {
|
||||
/* Has the level1 guest done vmxon? */
|
||||
bool vmxon;
|
||||
gpa_t vmxon_ptr;
|
||||
bool pml_full;
|
||||
|
||||
/* The guest-physical address of the current VMCS L1 keeps for L2 */
|
||||
gpa_t current_vmptr;
|
||||
/*
|
||||
* Cache of the guest's VMCS, existing outside of guest memory.
|
||||
* Loaded from guest memory during VMPTRLD. Flushed to guest
|
||||
* memory during VMCLEAR and VMPTRLD.
|
||||
*/
|
||||
struct vmcs12 *cached_vmcs12;
|
||||
/*
|
||||
* Cache of the guest's shadow VMCS, existing outside of guest
|
||||
* memory. Loaded from guest memory during VM entry. Flushed
|
||||
* to guest memory during VM exit.
|
||||
*/
|
||||
struct vmcs12 *cached_shadow_vmcs12;
|
||||
/*
|
||||
* Indicates if the shadow vmcs or enlightened vmcs must be updated
|
||||
* with the data held by struct vmcs12.
|
||||
*/
|
||||
bool need_vmcs12_sync;
|
||||
bool dirty_vmcs12;
|
||||
|
||||
/*
|
||||
* vmcs02 has been initialized, i.e. state that is constant for
|
||||
* vmcs02 has been written to the backing VMCS. Initialization
|
||||
* is delayed until L1 actually attempts to run a nested VM.
|
||||
*/
|
||||
bool vmcs02_initialized;
|
||||
|
||||
bool change_vmcs01_virtual_apic_mode;
|
||||
|
||||
/*
|
||||
* Enlightened VMCS has been enabled. It does not mean that L1 has to
|
||||
* use it. However, VMX features available to L1 will be limited based
|
||||
* on what the enlightened VMCS supports.
|
||||
*/
|
||||
bool enlightened_vmcs_enabled;
|
||||
|
||||
/* L2 must run next, and mustn't decide to exit to L1. */
|
||||
bool nested_run_pending;
|
||||
|
||||
struct loaded_vmcs vmcs02;
|
||||
|
||||
/*
|
||||
* Guest pages referred to in the vmcs02 with host-physical
|
||||
* pointers, so we must keep them pinned while L2 runs.
|
||||
*/
|
||||
struct page *apic_access_page;
|
||||
struct page *virtual_apic_page;
|
||||
struct page *pi_desc_page;
|
||||
struct pi_desc *pi_desc;
|
||||
bool pi_pending;
|
||||
u16 posted_intr_nv;
|
||||
|
||||
struct hrtimer preemption_timer;
|
||||
bool preemption_timer_expired;
|
||||
|
||||
/* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
|
||||
u64 vmcs01_debugctl;
|
||||
u64 vmcs01_guest_bndcfgs;
|
||||
|
||||
u16 vpid02;
|
||||
u16 last_vpid;
|
||||
|
||||
struct nested_vmx_msrs msrs;
|
||||
|
||||
/* SMM related state */
|
||||
struct {
|
||||
/* in VMX operation on SMM entry? */
|
||||
bool vmxon;
|
||||
/* in guest mode on SMM entry? */
|
||||
bool guest_mode;
|
||||
} smm;
|
||||
|
||||
gpa_t hv_evmcs_vmptr;
|
||||
struct page *hv_evmcs_page;
|
||||
struct hv_enlightened_vmcs *hv_evmcs;
|
||||
};
|
||||
|
||||
struct vcpu_vmx {
|
||||
struct kvm_vcpu vcpu;
|
||||
unsigned long host_rsp;
|
||||
u8 fail;
|
||||
u8 msr_bitmap_mode;
|
||||
u32 exit_intr_info;
|
||||
u32 idt_vectoring_info;
|
||||
ulong rflags;
|
||||
struct shared_msr_entry *guest_msrs;
|
||||
int nmsrs;
|
||||
int save_nmsrs;
|
||||
bool guest_msrs_dirty;
|
||||
unsigned long host_idt_base;
|
||||
#ifdef CONFIG_X86_64
|
||||
u64 msr_host_kernel_gs_base;
|
||||
u64 msr_guest_kernel_gs_base;
|
||||
#endif
|
||||
|
||||
u64 arch_capabilities;
|
||||
u64 spec_ctrl;
|
||||
|
||||
u32 vm_entry_controls_shadow;
|
||||
u32 vm_exit_controls_shadow;
|
||||
u32 secondary_exec_control;
|
||||
|
||||
/*
|
||||
* loaded_vmcs points to the VMCS currently used in this vcpu. For a
|
||||
* non-nested (L1) guest, it always points to vmcs01. For a nested
|
||||
* guest (L2), it points to a different VMCS. loaded_cpu_state points
|
||||
* to the VMCS whose state is loaded into the CPU registers that only
|
||||
* need to be switched when transitioning to/from the kernel; a NULL
|
||||
* value indicates that host state is loaded.
|
||||
*/
|
||||
struct loaded_vmcs vmcs01;
|
||||
struct loaded_vmcs *loaded_vmcs;
|
||||
struct loaded_vmcs *loaded_cpu_state;
|
||||
bool __launched; /* temporary, used in vmx_vcpu_run */
|
||||
struct msr_autoload {
|
||||
struct vmx_msrs guest;
|
||||
struct vmx_msrs host;
|
||||
} msr_autoload;
|
||||
|
||||
struct {
|
||||
int vm86_active;
|
||||
ulong save_rflags;
|
||||
struct kvm_segment segs[8];
|
||||
} rmode;
|
||||
struct {
|
||||
u32 bitmask; /* 4 bits per segment (1 bit per field) */
|
||||
struct kvm_save_segment {
|
||||
u16 selector;
|
||||
unsigned long base;
|
||||
u32 limit;
|
||||
u32 ar;
|
||||
} seg[8];
|
||||
} segment_cache;
|
||||
int vpid;
|
||||
bool emulation_required;
|
||||
|
||||
u32 exit_reason;
|
||||
|
||||
/* Posted interrupt descriptor */
|
||||
struct pi_desc pi_desc;
|
||||
|
||||
/* Support for a guest hypervisor (nested VMX) */
|
||||
struct nested_vmx nested;
|
||||
|
||||
/* Dynamic PLE window. */
|
||||
int ple_window;
|
||||
bool ple_window_dirty;
|
||||
|
||||
bool req_immediate_exit;
|
||||
|
||||
/* Support for PML */
|
||||
#define PML_ENTITY_NUM 512
|
||||
struct page *pml_pg;
|
||||
|
||||
/* apic deadline value in host tsc */
|
||||
u64 hv_deadline_tsc;
|
||||
|
||||
u64 current_tsc_ratio;
|
||||
|
||||
u32 host_pkru;
|
||||
|
||||
unsigned long host_debugctlmsr;
|
||||
|
||||
/*
|
||||
* Only bits masked by msr_ia32_feature_control_valid_bits can be set in
|
||||
* msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included
|
||||
* in msr_ia32_feature_control_valid_bits.
|
||||
*/
|
||||
u64 msr_ia32_feature_control;
|
||||
u64 msr_ia32_feature_control_valid_bits;
|
||||
u64 ept_pointer;
|
||||
|
||||
struct pt_desc pt_desc;
|
||||
};
|
||||
|
||||
enum ept_pointers_status {
|
||||
EPT_POINTERS_CHECK = 0,
|
||||
EPT_POINTERS_MATCH = 1,
|
||||
EPT_POINTERS_MISMATCH = 2
|
||||
};
|
||||
|
||||
struct kvm_vmx {
|
||||
struct kvm kvm;
|
||||
|
||||
unsigned int tss_addr;
|
||||
bool ept_identity_pagetable_done;
|
||||
gpa_t ept_identity_map_addr;
|
||||
|
||||
enum ept_pointers_status ept_pointers_match;
|
||||
spinlock_t ept_pointer_lock;
|
||||
};
|
||||
|
||||
bool nested_vmx_allowed(struct kvm_vcpu *vcpu);
|
||||
void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
|
||||
void vmx_vcpu_put(struct kvm_vcpu *vcpu);
|
||||
int allocate_vpid(void);
|
||||
void free_vpid(int vpid);
|
||||
void vmx_set_constant_host_state(struct vcpu_vmx *vmx);
|
||||
void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
|
||||
int vmx_get_cpl(struct kvm_vcpu *vcpu);
|
||||
unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);
|
||||
void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
|
||||
u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu);
|
||||
void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask);
|
||||
void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer);
|
||||
void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
|
||||
void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
|
||||
int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
|
||||
void set_cr4_guest_host_mask(struct vcpu_vmx *vmx);
|
||||
void ept_save_pdptrs(struct kvm_vcpu *vcpu);
|
||||
void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
|
||||
void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
|
||||
u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
|
||||
void update_exception_bitmap(struct kvm_vcpu *vcpu);
|
||||
void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
|
||||
bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
|
||||
void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
|
||||
void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
|
||||
struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr);
|
||||
void pt_update_intercept_for_msr(struct vcpu_vmx *vmx);
|
||||
|
||||
#define POSTED_INTR_ON 0
|
||||
#define POSTED_INTR_SN 1
|
||||
|
||||
static inline bool pi_test_and_set_on(struct pi_desc *pi_desc)
|
||||
{
|
||||
return test_and_set_bit(POSTED_INTR_ON,
|
||||
(unsigned long *)&pi_desc->control);
|
||||
}
|
||||
|
||||
static inline bool pi_test_and_clear_on(struct pi_desc *pi_desc)
|
||||
{
|
||||
return test_and_clear_bit(POSTED_INTR_ON,
|
||||
(unsigned long *)&pi_desc->control);
|
||||
}
|
||||
|
||||
static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
|
||||
{
|
||||
return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
|
||||
}
|
||||
|
||||
static inline void pi_clear_sn(struct pi_desc *pi_desc)
|
||||
{
|
||||
return clear_bit(POSTED_INTR_SN,
|
||||
(unsigned long *)&pi_desc->control);
|
||||
}
|
||||
|
||||
static inline void pi_set_sn(struct pi_desc *pi_desc)
|
||||
{
|
||||
return set_bit(POSTED_INTR_SN,
|
||||
(unsigned long *)&pi_desc->control);
|
||||
}
|
||||
|
||||
static inline void pi_clear_on(struct pi_desc *pi_desc)
|
||||
{
|
||||
clear_bit(POSTED_INTR_ON,
|
||||
(unsigned long *)&pi_desc->control);
|
||||
}
|
||||
|
||||
static inline int pi_test_on(struct pi_desc *pi_desc)
|
||||
{
|
||||
return test_bit(POSTED_INTR_ON,
|
||||
(unsigned long *)&pi_desc->control);
|
||||
}
|
||||
|
||||
static inline int pi_test_sn(struct pi_desc *pi_desc)
|
||||
{
|
||||
return test_bit(POSTED_INTR_SN,
|
||||
(unsigned long *)&pi_desc->control);
|
||||
}
|
||||
|
||||
static inline u8 vmx_get_rvi(void)
|
||||
{
|
||||
return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
|
||||
}
|
||||
|
||||
static inline void vm_entry_controls_reset_shadow(struct vcpu_vmx *vmx)
|
||||
{
|
||||
vmx->vm_entry_controls_shadow = vmcs_read32(VM_ENTRY_CONTROLS);
|
||||
}
|
||||
|
||||
static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
|
||||
{
|
||||
vmcs_write32(VM_ENTRY_CONTROLS, val);
|
||||
vmx->vm_entry_controls_shadow = val;
|
||||
}
|
||||
|
||||
static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val)
|
||||
{
|
||||
if (vmx->vm_entry_controls_shadow != val)
|
||||
vm_entry_controls_init(vmx, val);
|
||||
}
|
||||
|
||||
static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx)
|
||||
{
|
||||
return vmx->vm_entry_controls_shadow;
|
||||
}
|
||||
|
||||
static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val)
|
||||
{
|
||||
vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val);
|
||||
}
|
||||
|
||||
static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
|
||||
{
|
||||
vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val);
|
||||
}
|
||||
|
||||
static inline void vm_exit_controls_reset_shadow(struct vcpu_vmx *vmx)
|
||||
{
|
||||
vmx->vm_exit_controls_shadow = vmcs_read32(VM_EXIT_CONTROLS);
|
||||
}
|
||||
|
||||
static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val)
|
||||
{
|
||||
vmcs_write32(VM_EXIT_CONTROLS, val);
|
||||
vmx->vm_exit_controls_shadow = val;
|
||||
}
|
||||
|
||||
static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val)
|
||||
{
|
||||
if (vmx->vm_exit_controls_shadow != val)
|
||||
vm_exit_controls_init(vmx, val);
|
||||
}
|
||||
|
||||
static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx)
|
||||
{
|
||||
return vmx->vm_exit_controls_shadow;
|
||||
}
|
||||
|
||||
static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val)
|
||||
{
|
||||
vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val);
|
||||
}
|
||||
|
||||
static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
|
||||
{
|
||||
vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val);
|
||||
}
|
||||
|
||||
static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
|
||||
{
|
||||
vmx->segment_cache.bitmask = 0;
|
||||
}
|
||||
|
||||
static inline u32 vmx_vmentry_ctrl(void)
|
||||
{
|
||||
u32 vmentry_ctrl = vmcs_config.vmentry_ctrl;
|
||||
if (pt_mode == PT_MODE_SYSTEM)
|
||||
vmentry_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP | VM_EXIT_CLEAR_IA32_RTIT_CTL);
|
||||
/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
|
||||
return vmentry_ctrl &
|
||||
~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER);
|
||||
}
|
||||
|
||||
static inline u32 vmx_vmexit_ctrl(void)
|
||||
{
|
||||
u32 vmexit_ctrl = vmcs_config.vmexit_ctrl;
|
||||
if (pt_mode == PT_MODE_SYSTEM)
|
||||
vmexit_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP | VM_ENTRY_LOAD_IA32_RTIT_CTL);
|
||||
/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
|
||||
return vmcs_config.vmexit_ctrl &
|
||||
~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER);
|
||||
}
|
||||
|
||||
u32 vmx_exec_control(struct vcpu_vmx *vmx);
|
||||
|
||||
static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
|
||||
{
|
||||
return container_of(kvm, struct kvm_vmx, kvm);
|
||||
}
|
||||
|
||||
static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return container_of(vcpu, struct vcpu_vmx, vcpu);
|
||||
}
|
||||
|
||||
static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return &(to_vmx(vcpu)->pi_desc);
|
||||
}
|
||||
|
||||
struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu);
|
||||
void free_vmcs(struct vmcs *vmcs);
|
||||
int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs);
|
||||
void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs);
|
||||
void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs);
|
||||
void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs);
|
||||
|
||||
static inline struct vmcs *alloc_vmcs(bool shadow)
|
||||
{
|
||||
return alloc_vmcs_cpu(shadow, raw_smp_processor_id());
|
||||
}
|
||||
|
||||
u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
|
||||
|
||||
static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid,
|
||||
bool invalidate_gpa)
|
||||
{
|
||||
if (enable_ept && (invalidate_gpa || !enable_vpid)) {
|
||||
if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
|
||||
return;
|
||||
ept_sync_context(construct_eptp(vcpu,
|
||||
vcpu->arch.mmu->root_hpa));
|
||||
} else {
|
||||
vpid_sync_context(vpid);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
|
||||
{
|
||||
__vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa);
|
||||
}
|
||||
|
||||
static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx)
|
||||
{
|
||||
vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio;
|
||||
vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
|
||||
}
|
||||
|
||||
#endif /* __KVM_X86_VMX_H */
|
@ -69,6 +69,7 @@
|
||||
#include <asm/irq_remapping.h>
|
||||
#include <asm/mshyperv.h>
|
||||
#include <asm/hypervisor.h>
|
||||
#include <asm/intel_pt.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "trace.h"
|
||||
@ -213,6 +214,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
|
||||
|
||||
u64 __read_mostly host_xcr0;
|
||||
|
||||
struct kmem_cache *x86_fpu_cache;
|
||||
EXPORT_SYMBOL_GPL(x86_fpu_cache);
|
||||
|
||||
static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
|
||||
|
||||
static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
|
||||
@ -1121,7 +1125,13 @@ static u32 msrs_to_save[] = {
|
||||
#endif
|
||||
MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
|
||||
MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
|
||||
MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES
|
||||
MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES,
|
||||
MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH,
|
||||
MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK,
|
||||
MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B,
|
||||
MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
|
||||
MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
|
||||
MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
|
||||
};
|
||||
|
||||
static unsigned num_msrs_to_save;
|
||||
@ -2999,6 +3009,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_HYPERV_TLBFLUSH:
|
||||
case KVM_CAP_HYPERV_SEND_IPI:
|
||||
case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
|
||||
case KVM_CAP_HYPERV_CPUID:
|
||||
case KVM_CAP_PCI_SEGMENT:
|
||||
case KVM_CAP_DEBUGREGS:
|
||||
case KVM_CAP_X86_ROBUST_SINGLESTEP:
|
||||
@ -3010,7 +3021,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_HYPERV_TIME:
|
||||
case KVM_CAP_IOAPIC_POLARITY_IGNORED:
|
||||
case KVM_CAP_TSC_DEADLINE_TIMER:
|
||||
case KVM_CAP_ENABLE_CAP_VM:
|
||||
case KVM_CAP_DISABLE_QUIRKS:
|
||||
case KVM_CAP_SET_BOOT_CPU_ID:
|
||||
case KVM_CAP_SPLIT_IRQCHIP:
|
||||
@ -3632,7 +3642,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
|
||||
|
||||
static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
|
||||
struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
|
||||
u64 xstate_bv = xsave->header.xfeatures;
|
||||
u64 valid;
|
||||
|
||||
@ -3674,7 +3684,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
|
||||
|
||||
static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
|
||||
{
|
||||
struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
|
||||
struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
|
||||
u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
|
||||
u64 valid;
|
||||
|
||||
@ -3722,7 +3732,7 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
|
||||
fill_xsave((u8 *) guest_xsave->region, vcpu);
|
||||
} else {
|
||||
memcpy(guest_xsave->region,
|
||||
&vcpu->arch.guest_fpu.state.fxsave,
|
||||
&vcpu->arch.guest_fpu->state.fxsave,
|
||||
sizeof(struct fxregs_state));
|
||||
*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
|
||||
XFEATURE_MASK_FPSSE;
|
||||
@ -3752,7 +3762,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
|
||||
if (xstate_bv & ~XFEATURE_MASK_FPSSE ||
|
||||
mxcsr & ~mxcsr_feature_mask)
|
||||
return -EINVAL;
|
||||
memcpy(&vcpu->arch.guest_fpu.state.fxsave,
|
||||
memcpy(&vcpu->arch.guest_fpu->state.fxsave,
|
||||
guest_xsave->region, sizeof(struct fxregs_state));
|
||||
}
|
||||
return 0;
|
||||
@ -3830,6 +3840,8 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
|
||||
return kvm_hv_activate_synic(vcpu, cap->cap ==
|
||||
KVM_CAP_HYPERV_SYNIC2);
|
||||
case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
|
||||
if (!kvm_x86_ops->nested_enable_evmcs)
|
||||
return -ENOTTY;
|
||||
r = kvm_x86_ops->nested_enable_evmcs(vcpu, &vmcs_version);
|
||||
if (!r) {
|
||||
user_ptr = (void __user *)(uintptr_t)cap->args[0];
|
||||
@ -4192,6 +4204,25 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
|
||||
break;
|
||||
}
|
||||
case KVM_GET_SUPPORTED_HV_CPUID: {
|
||||
struct kvm_cpuid2 __user *cpuid_arg = argp;
|
||||
struct kvm_cpuid2 cpuid;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
|
||||
goto out;
|
||||
|
||||
r = kvm_vcpu_ioctl_get_hv_cpuid(vcpu, &cpuid,
|
||||
cpuid_arg->entries);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
|
||||
goto out;
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
r = -EINVAL;
|
||||
}
|
||||
@ -4396,7 +4427,7 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
|
||||
*/
|
||||
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
|
||||
{
|
||||
bool is_dirty = false;
|
||||
bool flush = false;
|
||||
int r;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
@ -4407,14 +4438,41 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
|
||||
if (kvm_x86_ops->flush_log_dirty)
|
||||
kvm_x86_ops->flush_log_dirty(kvm);
|
||||
|
||||
r = kvm_get_dirty_log_protect(kvm, log, &is_dirty);
|
||||
r = kvm_get_dirty_log_protect(kvm, log, &flush);
|
||||
|
||||
/*
|
||||
* All the TLBs can be flushed out of mmu lock, see the comments in
|
||||
* kvm_mmu_slot_remove_write_access().
|
||||
*/
|
||||
lockdep_assert_held(&kvm->slots_lock);
|
||||
if (is_dirty)
|
||||
if (flush)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
|
||||
{
|
||||
bool flush = false;
|
||||
int r;
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
/*
|
||||
* Flush potentially hardware-cached dirty pages to dirty_bitmap.
|
||||
*/
|
||||
if (kvm_x86_ops->flush_log_dirty)
|
||||
kvm_x86_ops->flush_log_dirty(kvm);
|
||||
|
||||
r = kvm_clear_dirty_log_protect(kvm, log, &flush);
|
||||
|
||||
/*
|
||||
* All the TLBs can be flushed out of mmu lock, see the comments in
|
||||
* kvm_mmu_slot_remove_write_access().
|
||||
*/
|
||||
lockdep_assert_held(&kvm->slots_lock);
|
||||
if (flush)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
@ -4433,8 +4491,8 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
||||
struct kvm_enable_cap *cap)
|
||||
int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
||||
struct kvm_enable_cap *cap)
|
||||
{
|
||||
int r;
|
||||
|
||||
@ -4767,15 +4825,6 @@ set_identity_unlock:
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_ENABLE_CAP: {
|
||||
struct kvm_enable_cap cap;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&cap, argp, sizeof(cap)))
|
||||
goto out;
|
||||
r = kvm_vm_ioctl_enable_cap(kvm, &cap);
|
||||
break;
|
||||
}
|
||||
case KVM_MEMORY_ENCRYPT_OP: {
|
||||
r = -ENOTTY;
|
||||
if (kvm_x86_ops->mem_enc_op)
|
||||
@ -4844,6 +4893,30 @@ static void kvm_init_msr_list(void)
|
||||
if (!kvm_x86_ops->rdtscp_supported())
|
||||
continue;
|
||||
break;
|
||||
case MSR_IA32_RTIT_CTL:
|
||||
case MSR_IA32_RTIT_STATUS:
|
||||
if (!kvm_x86_ops->pt_supported())
|
||||
continue;
|
||||
break;
|
||||
case MSR_IA32_RTIT_CR3_MATCH:
|
||||
if (!kvm_x86_ops->pt_supported() ||
|
||||
!intel_pt_validate_hw_cap(PT_CAP_cr3_filtering))
|
||||
continue;
|
||||
break;
|
||||
case MSR_IA32_RTIT_OUTPUT_BASE:
|
||||
case MSR_IA32_RTIT_OUTPUT_MASK:
|
||||
if (!kvm_x86_ops->pt_supported() ||
|
||||
(!intel_pt_validate_hw_cap(PT_CAP_topa_output) &&
|
||||
!intel_pt_validate_hw_cap(PT_CAP_single_range_output)))
|
||||
continue;
|
||||
break;
|
||||
case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: {
|
||||
if (!kvm_x86_ops->pt_supported() ||
|
||||
msrs_to_save[i] - MSR_IA32_RTIT_ADDR0_A >=
|
||||
intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -6815,11 +6888,30 @@ int kvm_arch_init(void *opaque)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* KVM explicitly assumes that the guest has an FPU and
|
||||
* FXSAVE/FXRSTOR. For example, the KVM_GET_FPU explicitly casts the
|
||||
* vCPU's FPU state as a fxregs_state struct.
|
||||
*/
|
||||
if (!boot_cpu_has(X86_FEATURE_FPU) || !boot_cpu_has(X86_FEATURE_FXSR)) {
|
||||
printk(KERN_ERR "kvm: inadequate fpu\n");
|
||||
r = -EOPNOTSUPP;
|
||||
goto out;
|
||||
}
|
||||
|
||||
r = -ENOMEM;
|
||||
x86_fpu_cache = kmem_cache_create("x86_fpu", sizeof(struct fpu),
|
||||
__alignof__(struct fpu), SLAB_ACCOUNT,
|
||||
NULL);
|
||||
if (!x86_fpu_cache) {
|
||||
printk(KERN_ERR "kvm: failed to allocate cache for x86 fpu\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
shared_msrs = alloc_percpu(struct kvm_shared_msrs);
|
||||
if (!shared_msrs) {
|
||||
printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n");
|
||||
goto out;
|
||||
goto out_free_x86_fpu_cache;
|
||||
}
|
||||
|
||||
r = kvm_mmu_module_init();
|
||||
@ -6852,6 +6944,8 @@ int kvm_arch_init(void *opaque)
|
||||
|
||||
out_free_percpu:
|
||||
free_percpu(shared_msrs);
|
||||
out_free_x86_fpu_cache:
|
||||
kmem_cache_destroy(x86_fpu_cache);
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
@ -6875,6 +6969,7 @@ void kvm_arch_exit(void)
|
||||
kvm_x86_ops = NULL;
|
||||
kvm_mmu_module_exit();
|
||||
free_percpu(shared_msrs);
|
||||
kmem_cache_destroy(x86_fpu_cache);
|
||||
}
|
||||
|
||||
int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
|
||||
@ -7998,9 +8093,9 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
|
||||
static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
preempt_disable();
|
||||
copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
|
||||
copy_fpregs_to_fpstate(¤t->thread.fpu);
|
||||
/* PKRU is separately restored in kvm_x86_ops->run. */
|
||||
__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
|
||||
__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
|
||||
~XFEATURE_MASK_PKRU);
|
||||
preempt_enable();
|
||||
trace_kvm_fpu(1);
|
||||
@ -8010,8 +8105,8 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
|
||||
static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
preempt_disable();
|
||||
copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
|
||||
copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
|
||||
copy_fpregs_to_fpstate(vcpu->arch.guest_fpu);
|
||||
copy_kernel_to_fpregs(¤t->thread.fpu.state);
|
||||
preempt_enable();
|
||||
++vcpu->stat.fpu_reload;
|
||||
trace_kvm_fpu(0);
|
||||
@ -8505,7 +8600,7 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
|
||||
|
||||
vcpu_load(vcpu);
|
||||
|
||||
fxsave = &vcpu->arch.guest_fpu.state.fxsave;
|
||||
fxsave = &vcpu->arch.guest_fpu->state.fxsave;
|
||||
memcpy(fpu->fpr, fxsave->st_space, 128);
|
||||
fpu->fcw = fxsave->cwd;
|
||||
fpu->fsw = fxsave->swd;
|
||||
@ -8525,7 +8620,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
|
||||
|
||||
vcpu_load(vcpu);
|
||||
|
||||
fxsave = &vcpu->arch.guest_fpu.state.fxsave;
|
||||
fxsave = &vcpu->arch.guest_fpu->state.fxsave;
|
||||
|
||||
memcpy(fxsave->st_space, fpu->fpr, 128);
|
||||
fxsave->cwd = fpu->fcw;
|
||||
@ -8581,9 +8676,9 @@ static int sync_regs(struct kvm_vcpu *vcpu)
|
||||
|
||||
static void fx_init(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
fpstate_init(&vcpu->arch.guest_fpu.state);
|
||||
fpstate_init(&vcpu->arch.guest_fpu->state);
|
||||
if (boot_cpu_has(X86_FEATURE_XSAVES))
|
||||
vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv =
|
||||
vcpu->arch.guest_fpu->state.xsave.header.xcomp_bv =
|
||||
host_xcr0 | XSTATE_COMPACTION_ENABLED;
|
||||
|
||||
/*
|
||||
@ -8621,6 +8716,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
|
||||
|
||||
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
|
||||
kvm_vcpu_mtrr_init(vcpu);
|
||||
vcpu_load(vcpu);
|
||||
kvm_vcpu_reset(vcpu, false);
|
||||
@ -8707,11 +8803,11 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||
*/
|
||||
if (init_event)
|
||||
kvm_put_guest_fpu(vcpu);
|
||||
mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave,
|
||||
mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
|
||||
XFEATURE_MASK_BNDREGS);
|
||||
if (mpx_state_buffer)
|
||||
memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state));
|
||||
mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave,
|
||||
mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
|
||||
XFEATURE_MASK_BNDCSR);
|
||||
if (mpx_state_buffer)
|
||||
memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
|
||||
@ -8723,7 +8819,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||
kvm_pmu_reset(vcpu);
|
||||
vcpu->arch.smbase = 0x30000;
|
||||
|
||||
vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
|
||||
vcpu->arch.msr_misc_features_enables = 0;
|
||||
|
||||
vcpu->arch.xcr0 = XFEATURE_MASK_FP;
|
||||
@ -9282,7 +9377,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
|
||||
* with dirty logging disabled in order to eliminate unnecessary GPA
|
||||
* logging in PML buffer (and potential PML buffer full VMEXT). This
|
||||
* guarantees leaving PML enabled during guest's lifetime won't have
|
||||
* any additonal overhead from PML when guest is running with dirty
|
||||
* any additional overhead from PML when guest is running with dirty
|
||||
* logging disabled for memory slots.
|
||||
*
|
||||
* kvm_x86_ops->slot_enable_log_dirty is called when switching new slot
|
||||
|
@ -143,7 +143,7 @@ static int hv_ce_shutdown(struct clock_event_device *evt)
|
||||
|
||||
static int hv_ce_set_oneshot(struct clock_event_device *evt)
|
||||
{
|
||||
union hv_timer_config timer_cfg;
|
||||
union hv_stimer_config timer_cfg;
|
||||
|
||||
timer_cfg.as_uint64 = 0;
|
||||
timer_cfg.enable = 1;
|
||||
|
@ -44,74 +44,6 @@
|
||||
*/
|
||||
#define HV_UTIL_NEGO_TIMEOUT 55
|
||||
|
||||
/* Define synthetic interrupt controller flag constants. */
|
||||
#define HV_EVENT_FLAGS_COUNT (256 * 8)
|
||||
#define HV_EVENT_FLAGS_LONG_COUNT (256 / sizeof(unsigned long))
|
||||
|
||||
/*
|
||||
* Timer configuration register.
|
||||
*/
|
||||
union hv_timer_config {
|
||||
u64 as_uint64;
|
||||
struct {
|
||||
u64 enable:1;
|
||||
u64 periodic:1;
|
||||
u64 lazy:1;
|
||||
u64 auto_enable:1;
|
||||
u64 apic_vector:8;
|
||||
u64 direct_mode:1;
|
||||
u64 reserved_z0:3;
|
||||
u64 sintx:4;
|
||||
u64 reserved_z1:44;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
/* Define the synthetic interrupt controller event flags format. */
|
||||
union hv_synic_event_flags {
|
||||
unsigned long flags[HV_EVENT_FLAGS_LONG_COUNT];
|
||||
};
|
||||
|
||||
/* Define SynIC control register. */
|
||||
union hv_synic_scontrol {
|
||||
u64 as_uint64;
|
||||
struct {
|
||||
u64 enable:1;
|
||||
u64 reserved:63;
|
||||
};
|
||||
};
|
||||
|
||||
/* Define synthetic interrupt source. */
|
||||
union hv_synic_sint {
|
||||
u64 as_uint64;
|
||||
struct {
|
||||
u64 vector:8;
|
||||
u64 reserved1:8;
|
||||
u64 masked:1;
|
||||
u64 auto_eoi:1;
|
||||
u64 reserved2:46;
|
||||
};
|
||||
};
|
||||
|
||||
/* Define the format of the SIMP register */
|
||||
union hv_synic_simp {
|
||||
u64 as_uint64;
|
||||
struct {
|
||||
u64 simp_enabled:1;
|
||||
u64 preserved:11;
|
||||
u64 base_simp_gpa:52;
|
||||
};
|
||||
};
|
||||
|
||||
/* Define the format of the SIEFP register */
|
||||
union hv_synic_siefp {
|
||||
u64 as_uint64;
|
||||
struct {
|
||||
u64 siefp_enabled:1;
|
||||
u64 preserved:11;
|
||||
u64 base_siefp_gpa:52;
|
||||
};
|
||||
};
|
||||
|
||||
/* Definitions for the monitored notification facility */
|
||||
union hv_monitor_trigger_group {
|
||||
|
@ -21,7 +21,6 @@
|
||||
|
||||
#include <linux/clocksource.h>
|
||||
#include <linux/hrtimer.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
struct arch_timer_context {
|
||||
/* Registers: control register, timer value */
|
||||
@ -52,9 +51,6 @@ struct arch_timer_cpu {
|
||||
/* Background timer used when the guest is not running */
|
||||
struct hrtimer bg_timer;
|
||||
|
||||
/* Work queued with the above timer expires */
|
||||
struct work_struct expired;
|
||||
|
||||
/* Physical timer emulation */
|
||||
struct hrtimer phys_timer;
|
||||
|
||||
|
@ -37,7 +37,6 @@
|
||||
# define __GCC4_has_attribute___designated_init__ 0
|
||||
# define __GCC4_has_attribute___externally_visible__ 1
|
||||
# define __GCC4_has_attribute___noclone__ 1
|
||||
# define __GCC4_has_attribute___optimize__ 1
|
||||
# define __GCC4_has_attribute___nonstring__ 0
|
||||
# define __GCC4_has_attribute___no_sanitize_address__ (__GNUC_MINOR__ >= 8)
|
||||
#endif
|
||||
@ -163,17 +162,11 @@
|
||||
|
||||
/*
|
||||
* Optional: not supported by clang
|
||||
* Note: icc does not recognize gcc's no-tracer
|
||||
*
|
||||
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noclone-function-attribute
|
||||
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-optimize-function-attribute
|
||||
*/
|
||||
#if __has_attribute(__noclone__)
|
||||
# if __has_attribute(__optimize__)
|
||||
# define __noclone __attribute__((__noclone__, __optimize__("no-tracer")))
|
||||
# else
|
||||
# define __noclone __attribute__((__noclone__))
|
||||
# endif
|
||||
# define __noclone __attribute__((__noclone__))
|
||||
#else
|
||||
# define __noclone
|
||||
#endif
|
||||
|
@ -449,6 +449,7 @@ struct kvm {
|
||||
#endif
|
||||
long tlbs_dirty;
|
||||
struct list_head devices;
|
||||
bool manual_dirty_log_protect;
|
||||
struct dentry *debugfs_dentry;
|
||||
struct kvm_stat_data **debugfs_stat_data;
|
||||
struct srcu_struct srcu;
|
||||
@ -694,7 +695,8 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
|
||||
int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
|
||||
void *data, unsigned long len);
|
||||
int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
|
||||
void *data, int offset, unsigned long len);
|
||||
void *data, unsigned int offset,
|
||||
unsigned long len);
|
||||
int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
|
||||
gpa_t gpa, unsigned long len);
|
||||
int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
|
||||
@ -753,7 +755,9 @@ int kvm_get_dirty_log(struct kvm *kvm,
|
||||
struct kvm_dirty_log *log, int *is_dirty);
|
||||
|
||||
int kvm_get_dirty_log_protect(struct kvm *kvm,
|
||||
struct kvm_dirty_log *log, bool *is_dirty);
|
||||
struct kvm_dirty_log *log, bool *flush);
|
||||
int kvm_clear_dirty_log_protect(struct kvm *kvm,
|
||||
struct kvm_clear_dirty_log *log, bool *flush);
|
||||
|
||||
void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
@ -762,9 +766,13 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
|
||||
|
||||
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
|
||||
struct kvm_dirty_log *log);
|
||||
int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm,
|
||||
struct kvm_clear_dirty_log *log);
|
||||
|
||||
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
|
||||
bool line_status);
|
||||
int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
||||
struct kvm_enable_cap *cap);
|
||||
long kvm_arch_vm_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg);
|
||||
|
||||
|
@ -492,6 +492,17 @@ struct kvm_dirty_log {
|
||||
};
|
||||
};
|
||||
|
||||
/* for KVM_CLEAR_DIRTY_LOG */
|
||||
struct kvm_clear_dirty_log {
|
||||
__u32 slot;
|
||||
__u32 num_pages;
|
||||
__u64 first_page;
|
||||
union {
|
||||
void __user *dirty_bitmap; /* one bit per page */
|
||||
__u64 padding2;
|
||||
};
|
||||
};
|
||||
|
||||
/* for KVM_SET_SIGNAL_MASK */
|
||||
struct kvm_signal_mask {
|
||||
__u32 len;
|
||||
@ -975,6 +986,8 @@ struct kvm_ppc_resize_hpt {
|
||||
#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163
|
||||
#define KVM_CAP_EXCEPTION_PAYLOAD 164
|
||||
#define KVM_CAP_ARM_VM_IPA_SIZE 165
|
||||
#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166
|
||||
#define KVM_CAP_HYPERV_CPUID 167
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
@ -1421,6 +1434,12 @@ struct kvm_enc_region {
|
||||
#define KVM_GET_NESTED_STATE _IOWR(KVMIO, 0xbe, struct kvm_nested_state)
|
||||
#define KVM_SET_NESTED_STATE _IOW(KVMIO, 0xbf, struct kvm_nested_state)
|
||||
|
||||
/* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT */
|
||||
#define KVM_CLEAR_DIRTY_LOG _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log)
|
||||
|
||||
/* Available with KVM_CAP_HYPERV_CPUID */
|
||||
#define KVM_GET_SUPPORTED_HV_CPUID _IOWR(KVMIO, 0xc1, struct kvm_cpuid2)
|
||||
|
||||
/* Secure Encrypted Virtualization command */
|
||||
enum sev_cmd_id {
|
||||
/* Guest initialization commands */
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/python
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# top-like utility for displaying kvm statistics
|
||||
#
|
||||
|
@ -6,7 +6,7 @@ TEST_PROGS := run.sh
|
||||
|
||||
include ../lib.mk
|
||||
|
||||
all: khdr
|
||||
all:
|
||||
@for DIR in $(SUBDIRS); do \
|
||||
BUILD_TARGET=$(OUTPUT)/$$DIR; \
|
||||
mkdir $$BUILD_TARGET -p; \
|
||||
|
@ -19,6 +19,7 @@ TEST_GEN_FILES := \
|
||||
TEST_PROGS := run.sh
|
||||
|
||||
top_srcdir = ../../../../..
|
||||
KSFT_KHDR_INSTALL := 1
|
||||
include ../../lib.mk
|
||||
|
||||
$(TEST_GEN_FILES): $(HEADERS)
|
||||
|
@ -10,8 +10,6 @@ TEST_PROGS_EXTENDED := gpio-mockup-chardev
|
||||
GPIODIR := $(realpath ../../../gpio)
|
||||
GPIOOBJ := gpio-utils.o
|
||||
|
||||
include ../lib.mk
|
||||
|
||||
all: $(TEST_PROGS_EXTENDED)
|
||||
|
||||
override define CLEAN
|
||||
@ -19,7 +17,9 @@ override define CLEAN
|
||||
$(MAKE) -C $(GPIODIR) OUTPUT=$(GPIODIR)/ clean
|
||||
endef
|
||||
|
||||
$(TEST_PROGS_EXTENDED):| khdr
|
||||
KSFT_KHDR_INSTALL := 1
|
||||
include ../lib.mk
|
||||
|
||||
$(TEST_PROGS_EXTENDED): $(GPIODIR)/$(GPIOOBJ)
|
||||
|
||||
$(GPIODIR)/$(GPIOOBJ):
|
||||
|
@ -1,6 +1,7 @@
|
||||
all:
|
||||
|
||||
top_srcdir = ../../../..
|
||||
KSFT_KHDR_INSTALL := 1
|
||||
UNAME_M := $(shell uname -m)
|
||||
|
||||
LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/ucall.c lib/sparsebit.c
|
||||
@ -14,9 +15,12 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/state_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
|
||||
TEST_GEN_PROGS_x86_64 += dirty_log_test
|
||||
TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
|
||||
|
||||
TEST_GEN_PROGS_aarch64 += dirty_log_test
|
||||
TEST_GEN_PROGS_aarch64 += clear_dirty_log_test
|
||||
|
||||
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
|
||||
LIBKVM += $(LIBKVM_$(UNAME_M))
|
||||
@ -44,7 +48,6 @@ $(OUTPUT)/libkvm.a: $(LIBKVM_OBJ)
|
||||
|
||||
all: $(STATIC_LIBS)
|
||||
$(TEST_GEN_PROGS): $(STATIC_LIBS)
|
||||
$(STATIC_LIBS):| khdr
|
||||
|
||||
cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib ..
|
||||
cscope:
|
||||
|
2
tools/testing/selftests/kvm/clear_dirty_log_test.c
Normal file
2
tools/testing/selftests/kvm/clear_dirty_log_test.c
Normal file
@ -0,0 +1,2 @@
|
||||
#define USE_CLEAR_DIRTY_LOG
|
||||
#include "dirty_log_test.c"
|
@ -51,10 +51,17 @@ static uint64_t random_array[TEST_PAGES_PER_LOOP];
|
||||
static uint64_t iteration;
|
||||
|
||||
/*
|
||||
* GPA offset of the testing memory slot. Must be bigger than
|
||||
* DEFAULT_GUEST_PHY_PAGES.
|
||||
* Guest physical memory offset of the testing memory slot.
|
||||
* This will be set to the topmost valid physical address minus
|
||||
* the test memory size.
|
||||
*/
|
||||
static uint64_t guest_test_mem = DEFAULT_GUEST_TEST_MEM;
|
||||
static uint64_t guest_test_phys_mem;
|
||||
|
||||
/*
|
||||
* Guest virtual memory offset of the testing memory slot.
|
||||
* Must not conflict with identity mapped test code.
|
||||
*/
|
||||
static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
|
||||
|
||||
/*
|
||||
* Continuously write to the first 8 bytes of a random pages within
|
||||
@ -66,7 +73,7 @@ static void guest_code(void)
|
||||
|
||||
while (true) {
|
||||
for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
|
||||
uint64_t addr = guest_test_mem;
|
||||
uint64_t addr = guest_test_virt_mem;
|
||||
addr += (READ_ONCE(random_array[i]) % guest_num_pages)
|
||||
* guest_page_size;
|
||||
addr &= ~(host_page_size - 1);
|
||||
@ -209,12 +216,14 @@ static void vm_dirty_log_verify(unsigned long *bmap)
|
||||
}
|
||||
|
||||
static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
|
||||
uint64_t extra_mem_pages, void *guest_code)
|
||||
uint64_t extra_mem_pages, void *guest_code,
|
||||
unsigned long type)
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
|
||||
|
||||
vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
|
||||
vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
|
||||
O_RDWR, type);
|
||||
kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
|
||||
#ifdef __x86_64__
|
||||
vm_create_irqchip(vm);
|
||||
@ -224,13 +233,14 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
|
||||
}
|
||||
|
||||
static void run_test(enum vm_guest_mode mode, unsigned long iterations,
|
||||
unsigned long interval, bool top_offset)
|
||||
unsigned long interval, uint64_t phys_offset)
|
||||
{
|
||||
unsigned int guest_pa_bits, guest_page_shift;
|
||||
pthread_t vcpu_thread;
|
||||
struct kvm_vm *vm;
|
||||
uint64_t max_gfn;
|
||||
unsigned long *bmap;
|
||||
unsigned long type = 0;
|
||||
|
||||
switch (mode) {
|
||||
case VM_MODE_P52V48_4K:
|
||||
@ -241,6 +251,14 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
|
||||
guest_pa_bits = 52;
|
||||
guest_page_shift = 16;
|
||||
break;
|
||||
case VM_MODE_P48V48_4K:
|
||||
guest_pa_bits = 48;
|
||||
guest_page_shift = 12;
|
||||
break;
|
||||
case VM_MODE_P48V48_64K:
|
||||
guest_pa_bits = 48;
|
||||
guest_page_shift = 16;
|
||||
break;
|
||||
case VM_MODE_P40V48_4K:
|
||||
guest_pa_bits = 40;
|
||||
guest_page_shift = 12;
|
||||
@ -255,6 +273,19 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
|
||||
|
||||
DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode));
|
||||
|
||||
#ifdef __x86_64__
|
||||
/*
|
||||
* FIXME
|
||||
* The x86_64 kvm selftests framework currently only supports a
|
||||
* single PML4 which restricts the number of physical address
|
||||
* bits we can change to 39.
|
||||
*/
|
||||
guest_pa_bits = 39;
|
||||
#endif
|
||||
#ifdef __aarch64__
|
||||
if (guest_pa_bits != 40)
|
||||
type = KVM_VM_TYPE_ARM_IPA_SIZE(guest_pa_bits);
|
||||
#endif
|
||||
max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1;
|
||||
guest_page_size = (1ul << guest_page_shift);
|
||||
/* 1G of guest page sized pages */
|
||||
@ -263,31 +294,41 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
|
||||
host_num_pages = (guest_num_pages * guest_page_size) / host_page_size +
|
||||
!!((guest_num_pages * guest_page_size) % host_page_size);
|
||||
|
||||
if (top_offset) {
|
||||
guest_test_mem = (max_gfn - guest_num_pages) * guest_page_size;
|
||||
guest_test_mem &= ~(host_page_size - 1);
|
||||
if (!phys_offset) {
|
||||
guest_test_phys_mem = (max_gfn - guest_num_pages) * guest_page_size;
|
||||
guest_test_phys_mem &= ~(host_page_size - 1);
|
||||
} else {
|
||||
guest_test_phys_mem = phys_offset;
|
||||
}
|
||||
|
||||
DEBUG("guest test mem offset: 0x%lx\n", guest_test_mem);
|
||||
DEBUG("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
|
||||
|
||||
bmap = bitmap_alloc(host_num_pages);
|
||||
host_bmap_track = bitmap_alloc(host_num_pages);
|
||||
|
||||
vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code);
|
||||
vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code, type);
|
||||
|
||||
#ifdef USE_CLEAR_DIRTY_LOG
|
||||
struct kvm_enable_cap cap = {};
|
||||
|
||||
cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT;
|
||||
cap.args[0] = 1;
|
||||
vm_enable_cap(vm, &cap);
|
||||
#endif
|
||||
|
||||
/* Add an extra memory slot for testing dirty logging */
|
||||
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
|
||||
guest_test_mem,
|
||||
guest_test_phys_mem,
|
||||
TEST_MEM_SLOT_INDEX,
|
||||
guest_num_pages,
|
||||
KVM_MEM_LOG_DIRTY_PAGES);
|
||||
|
||||
/* Do 1:1 mapping for the dirty track memory slot */
|
||||
virt_map(vm, guest_test_mem, guest_test_mem,
|
||||
/* Do mapping for the dirty track memory slot */
|
||||
virt_map(vm, guest_test_virt_mem, guest_test_phys_mem,
|
||||
guest_num_pages * guest_page_size, 0);
|
||||
|
||||
/* Cache the HVA pointer of the region */
|
||||
host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_mem);
|
||||
host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
|
||||
|
||||
#ifdef __x86_64__
|
||||
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
|
||||
@ -299,7 +340,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
|
||||
/* Export the shared variables to the guest */
|
||||
sync_global_to_guest(vm, host_page_size);
|
||||
sync_global_to_guest(vm, guest_page_size);
|
||||
sync_global_to_guest(vm, guest_test_mem);
|
||||
sync_global_to_guest(vm, guest_test_virt_mem);
|
||||
sync_global_to_guest(vm, guest_num_pages);
|
||||
|
||||
/* Start the iterations */
|
||||
@ -316,6 +357,10 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
|
||||
/* Give the vcpu thread some time to dirty some pages */
|
||||
usleep(interval * 1000);
|
||||
kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
|
||||
#ifdef USE_CLEAR_DIRTY_LOG
|
||||
kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
|
||||
DIV_ROUND_UP(host_num_pages, 64) * 64);
|
||||
#endif
|
||||
vm_dirty_log_verify(bmap);
|
||||
iteration++;
|
||||
sync_global_to_guest(vm, iteration);
|
||||
@ -335,23 +380,16 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
||||
static struct vm_guest_modes {
|
||||
enum vm_guest_mode mode;
|
||||
struct vm_guest_mode_params {
|
||||
bool supported;
|
||||
bool enabled;
|
||||
} vm_guest_modes[NUM_VM_MODES] = {
|
||||
#if defined(__x86_64__)
|
||||
{ VM_MODE_P52V48_4K, 1, 1, },
|
||||
{ VM_MODE_P52V48_64K, 0, 0, },
|
||||
{ VM_MODE_P40V48_4K, 0, 0, },
|
||||
{ VM_MODE_P40V48_64K, 0, 0, },
|
||||
#elif defined(__aarch64__)
|
||||
{ VM_MODE_P52V48_4K, 0, 0, },
|
||||
{ VM_MODE_P52V48_64K, 0, 0, },
|
||||
{ VM_MODE_P40V48_4K, 1, 1, },
|
||||
{ VM_MODE_P40V48_64K, 1, 1, },
|
||||
#endif
|
||||
};
|
||||
struct vm_guest_mode_params vm_guest_mode_params[NUM_VM_MODES];
|
||||
|
||||
#define vm_guest_mode_params_init(mode, supported, enabled) \
|
||||
({ \
|
||||
vm_guest_mode_params[mode] = (struct vm_guest_mode_params){ supported, enabled }; \
|
||||
})
|
||||
|
||||
static void help(char *name)
|
||||
{
|
||||
@ -359,25 +397,21 @@ static void help(char *name)
|
||||
|
||||
puts("");
|
||||
printf("usage: %s [-h] [-i iterations] [-I interval] "
|
||||
"[-o offset] [-t] [-m mode]\n", name);
|
||||
"[-p offset] [-m mode]\n", name);
|
||||
puts("");
|
||||
printf(" -i: specify iteration counts (default: %"PRIu64")\n",
|
||||
TEST_HOST_LOOP_N);
|
||||
printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n",
|
||||
TEST_HOST_LOOP_INTERVAL);
|
||||
printf(" -o: guest test memory offset (default: 0x%lx)\n",
|
||||
DEFAULT_GUEST_TEST_MEM);
|
||||
printf(" -t: map guest test memory at the top of the allowed "
|
||||
"physical address range\n");
|
||||
printf(" -p: specify guest physical test memory offset\n"
|
||||
" Warning: a low offset can conflict with the loaded test code.\n");
|
||||
printf(" -m: specify the guest mode ID to test "
|
||||
"(default: test all supported modes)\n"
|
||||
" This option may be used multiple times.\n"
|
||||
" Guest mode IDs:\n");
|
||||
for (i = 0; i < NUM_VM_MODES; ++i) {
|
||||
printf(" %d: %s%s\n",
|
||||
vm_guest_modes[i].mode,
|
||||
vm_guest_mode_string(vm_guest_modes[i].mode),
|
||||
vm_guest_modes[i].supported ? " (supported)" : "");
|
||||
printf(" %d: %s%s\n", i, vm_guest_mode_string(i),
|
||||
vm_guest_mode_params[i].supported ? " (supported)" : "");
|
||||
}
|
||||
puts("");
|
||||
exit(0);
|
||||
@ -388,11 +422,34 @@ int main(int argc, char *argv[])
|
||||
unsigned long iterations = TEST_HOST_LOOP_N;
|
||||
unsigned long interval = TEST_HOST_LOOP_INTERVAL;
|
||||
bool mode_selected = false;
|
||||
bool top_offset = false;
|
||||
unsigned int mode;
|
||||
uint64_t phys_offset = 0;
|
||||
unsigned int mode, host_ipa_limit;
|
||||
int opt, i;
|
||||
|
||||
while ((opt = getopt(argc, argv, "hi:I:o:tm:")) != -1) {
|
||||
#ifdef USE_CLEAR_DIRTY_LOG
|
||||
if (!kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT)) {
|
||||
fprintf(stderr, "KVM_CLEAR_DIRTY_LOG not available, skipping tests\n");
|
||||
exit(KSFT_SKIP);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __x86_64__
|
||||
vm_guest_mode_params_init(VM_MODE_P52V48_4K, true, true);
|
||||
#endif
|
||||
#ifdef __aarch64__
|
||||
vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true);
|
||||
vm_guest_mode_params_init(VM_MODE_P40V48_64K, true, true);
|
||||
|
||||
host_ipa_limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
|
||||
if (host_ipa_limit >= 52)
|
||||
vm_guest_mode_params_init(VM_MODE_P52V48_64K, true, true);
|
||||
if (host_ipa_limit >= 48) {
|
||||
vm_guest_mode_params_init(VM_MODE_P48V48_4K, true, true);
|
||||
vm_guest_mode_params_init(VM_MODE_P48V48_64K, true, true);
|
||||
}
|
||||
#endif
|
||||
|
||||
while ((opt = getopt(argc, argv, "hi:I:p:m:")) != -1) {
|
||||
switch (opt) {
|
||||
case 'i':
|
||||
iterations = strtol(optarg, NULL, 10);
|
||||
@ -400,22 +457,19 @@ int main(int argc, char *argv[])
|
||||
case 'I':
|
||||
interval = strtol(optarg, NULL, 10);
|
||||
break;
|
||||
case 'o':
|
||||
guest_test_mem = strtoull(optarg, NULL, 0);
|
||||
break;
|
||||
case 't':
|
||||
top_offset = true;
|
||||
case 'p':
|
||||
phys_offset = strtoull(optarg, NULL, 0);
|
||||
break;
|
||||
case 'm':
|
||||
if (!mode_selected) {
|
||||
for (i = 0; i < NUM_VM_MODES; ++i)
|
||||
vm_guest_modes[i].enabled = 0;
|
||||
vm_guest_mode_params[i].enabled = false;
|
||||
mode_selected = true;
|
||||
}
|
||||
mode = strtoul(optarg, NULL, 10);
|
||||
TEST_ASSERT(mode < NUM_VM_MODES,
|
||||
"Guest mode ID %d too big", mode);
|
||||
vm_guest_modes[mode].enabled = 1;
|
||||
vm_guest_mode_params[mode].enabled = true;
|
||||
break;
|
||||
case 'h':
|
||||
default:
|
||||
@ -426,8 +480,6 @@ int main(int argc, char *argv[])
|
||||
|
||||
TEST_ASSERT(iterations > 2, "Iterations must be greater than two");
|
||||
TEST_ASSERT(interval > 0, "Interval must be greater than zero");
|
||||
TEST_ASSERT(!top_offset || guest_test_mem == DEFAULT_GUEST_TEST_MEM,
|
||||
"Cannot use both -o [offset] and -t at the same time");
|
||||
|
||||
DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
|
||||
iterations, interval);
|
||||
@ -435,13 +487,12 @@ int main(int argc, char *argv[])
|
||||
srandom(time(0));
|
||||
|
||||
for (i = 0; i < NUM_VM_MODES; ++i) {
|
||||
if (!vm_guest_modes[i].enabled)
|
||||
if (!vm_guest_mode_params[i].enabled)
|
||||
continue;
|
||||
TEST_ASSERT(vm_guest_modes[i].supported,
|
||||
TEST_ASSERT(vm_guest_mode_params[i].supported,
|
||||
"Guest mode ID %d (%s) not supported.",
|
||||
vm_guest_modes[i].mode,
|
||||
vm_guest_mode_string(vm_guest_modes[i].mode));
|
||||
run_test(vm_guest_modes[i].mode, iterations, interval, top_offset);
|
||||
i, vm_guest_mode_string(i));
|
||||
run_test(i, iterations, interval, phys_offset);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -36,6 +36,8 @@ typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
|
||||
enum vm_guest_mode {
|
||||
VM_MODE_P52V48_4K,
|
||||
VM_MODE_P52V48_64K,
|
||||
VM_MODE_P48V48_4K,
|
||||
VM_MODE_P48V48_64K,
|
||||
VM_MODE_P40V48_4K,
|
||||
VM_MODE_P40V48_64K,
|
||||
NUM_VM_MODES,
|
||||
@ -54,10 +56,14 @@ int kvm_check_cap(long cap);
|
||||
int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
|
||||
|
||||
struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
|
||||
struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages,
|
||||
int perm, unsigned long type);
|
||||
void kvm_vm_free(struct kvm_vm *vmp);
|
||||
void kvm_vm_restart(struct kvm_vm *vmp, int perm);
|
||||
void kvm_vm_release(struct kvm_vm *vmp);
|
||||
void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
|
||||
void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
|
||||
uint64_t first_page, uint32_t num_pages);
|
||||
|
||||
int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
|
||||
size_t len);
|
||||
@ -78,6 +84,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
|
||||
|
||||
void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
|
||||
void *arg);
|
||||
int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
|
||||
void *arg);
|
||||
void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
|
||||
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
|
||||
void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot,
|
||||
|
@ -268,13 +268,20 @@ void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
|
||||
|
||||
switch (vm->mode) {
|
||||
case VM_MODE_P52V48_4K:
|
||||
tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
|
||||
tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
|
||||
break;
|
||||
TEST_ASSERT(false, "AArch64 does not support 4K sized pages "
|
||||
"with 52-bit physical address ranges");
|
||||
case VM_MODE_P52V48_64K:
|
||||
tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
|
||||
tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
|
||||
break;
|
||||
case VM_MODE_P48V48_4K:
|
||||
tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
|
||||
tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
|
||||
break;
|
||||
case VM_MODE_P48V48_64K:
|
||||
tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
|
||||
tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
|
||||
break;
|
||||
case VM_MODE_P40V48_4K:
|
||||
tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
|
||||
tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
|
||||
@ -305,7 +312,6 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
|
||||
get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pstate), &pstate);
|
||||
get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), &pc);
|
||||
|
||||
fprintf(stream, "%*spstate: 0x%.16llx pc: 0x%.16llx\n",
|
||||
indent, "", pstate, pc);
|
||||
|
||||
fprintf(stream, "%*spstate: 0x%.16llx pc: 0x%.16llx\n",
|
||||
indent, "", pstate, pc);
|
||||
}
|
||||
|
@ -85,13 +85,13 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void vm_open(struct kvm_vm *vm, int perm)
|
||||
static void vm_open(struct kvm_vm *vm, int perm, unsigned long type)
|
||||
{
|
||||
vm->kvm_fd = open(KVM_DEV_PATH, perm);
|
||||
if (vm->kvm_fd < 0)
|
||||
exit(KSFT_SKIP);
|
||||
|
||||
vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, NULL);
|
||||
vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, type);
|
||||
TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
|
||||
"rc: %i errno: %i", vm->fd, errno);
|
||||
}
|
||||
@ -99,9 +99,13 @@ static void vm_open(struct kvm_vm *vm, int perm)
|
||||
const char * const vm_guest_mode_string[] = {
|
||||
"PA-bits:52, VA-bits:48, 4K pages",
|
||||
"PA-bits:52, VA-bits:48, 64K pages",
|
||||
"PA-bits:48, VA-bits:48, 4K pages",
|
||||
"PA-bits:48, VA-bits:48, 64K pages",
|
||||
"PA-bits:40, VA-bits:48, 4K pages",
|
||||
"PA-bits:40, VA-bits:48, 64K pages",
|
||||
};
|
||||
_Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES,
|
||||
"Missing new mode strings?");
|
||||
|
||||
/*
|
||||
* VM Create
|
||||
@ -122,7 +126,8 @@ const char * const vm_guest_mode_string[] = {
|
||||
* descriptor to control the created VM is created with the permissions
|
||||
* given by perm (e.g. O_RDWR).
|
||||
*/
|
||||
struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
|
||||
struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages,
|
||||
int perm, unsigned long type)
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
int kvm_fd;
|
||||
@ -131,22 +136,38 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
|
||||
TEST_ASSERT(vm != NULL, "Insufficient Memory");
|
||||
|
||||
vm->mode = mode;
|
||||
vm_open(vm, perm);
|
||||
vm->type = type;
|
||||
vm_open(vm, perm, type);
|
||||
|
||||
/* Setup mode specific traits. */
|
||||
switch (vm->mode) {
|
||||
case VM_MODE_P52V48_4K:
|
||||
vm->pgtable_levels = 4;
|
||||
vm->pa_bits = 52;
|
||||
vm->va_bits = 48;
|
||||
vm->page_size = 0x1000;
|
||||
vm->page_shift = 12;
|
||||
vm->va_bits = 48;
|
||||
break;
|
||||
case VM_MODE_P52V48_64K:
|
||||
vm->pgtable_levels = 3;
|
||||
vm->pa_bits = 52;
|
||||
vm->va_bits = 48;
|
||||
vm->page_size = 0x10000;
|
||||
vm->page_shift = 16;
|
||||
break;
|
||||
case VM_MODE_P48V48_4K:
|
||||
vm->pgtable_levels = 4;
|
||||
vm->pa_bits = 48;
|
||||
vm->va_bits = 48;
|
||||
vm->page_size = 0x1000;
|
||||
vm->page_shift = 12;
|
||||
break;
|
||||
case VM_MODE_P48V48_64K:
|
||||
vm->pgtable_levels = 3;
|
||||
vm->pa_bits = 48;
|
||||
vm->va_bits = 48;
|
||||
vm->page_size = 0x10000;
|
||||
vm->page_shift = 16;
|
||||
break;
|
||||
case VM_MODE_P40V48_4K:
|
||||
vm->pgtable_levels = 4;
|
||||
@ -186,6 +207,11 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
|
||||
return vm;
|
||||
}
|
||||
|
||||
struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
|
||||
{
|
||||
return _vm_create(mode, phy_pages, perm, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* VM Restart
|
||||
*
|
||||
@ -203,7 +229,7 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm)
|
||||
{
|
||||
struct userspace_mem_region *region;
|
||||
|
||||
vm_open(vmp, perm);
|
||||
vm_open(vmp, perm, vmp->type);
|
||||
if (vmp->has_irqchip)
|
||||
vm_create_irqchip(vmp);
|
||||
|
||||
@ -231,6 +257,19 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
|
||||
strerror(-ret));
|
||||
}
|
||||
|
||||
void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
|
||||
uint64_t first_page, uint32_t num_pages)
|
||||
{
|
||||
struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot,
|
||||
.first_page = first_page,
|
||||
.num_pages = num_pages };
|
||||
int ret;
|
||||
|
||||
ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args);
|
||||
TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s",
|
||||
strerror(-ret));
|
||||
}
|
||||
|
||||
/*
|
||||
* Userspace Memory Region Find
|
||||
*
|
||||
@ -1269,6 +1308,16 @@ int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
|
||||
*/
|
||||
void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
|
||||
unsigned long cmd, void *arg)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = _vcpu_ioctl(vm, vcpuid, cmd, arg);
|
||||
TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)",
|
||||
cmd, ret, errno, strerror(errno));
|
||||
}
|
||||
|
||||
int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
|
||||
unsigned long cmd, void *arg)
|
||||
{
|
||||
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
|
||||
int ret;
|
||||
@ -1276,8 +1325,8 @@ void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
|
||||
TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
|
||||
|
||||
ret = ioctl(vcpu->fd, cmd, arg);
|
||||
TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)",
|
||||
cmd, ret, errno, strerror(errno));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1422,7 +1471,7 @@ const char *exit_reason_str(unsigned int exit_reason)
|
||||
*
|
||||
* Within the VM specified by vm, locates a range of available physical
|
||||
* pages at or above paddr_min. If found, the pages are marked as in use
|
||||
* and thier base address is returned. A TEST_ASSERT failure occurs if
|
||||
* and their base address is returned. A TEST_ASSERT failure occurs if
|
||||
* not enough pages are available at or above paddr_min.
|
||||
*/
|
||||
vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user