- fix lock initialization race in gfn-to-pfn cache (+selftests)
 
 - fix two refcounting errors
 
 - emulator fixes
 
 - mask off reserved bits in CPUID
 
 - fix bug with disabling SGX
 
 RISC-V:
 
 - update MAINTAINERS
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmNcYawUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroPMzwf/Uh1lO2Op6IJ3/no2gPfShc9bdqgM
 LiCkzfJp9PQAuDl/hs44CiQHUlPEfeIsI/ns0euNj37TlnB3zKmm46mtiWhEefIH
 rwcm/ngKgw3283pZEf8FeMTDfNexOaBg2ZNoODR7JQsU50tbToY4TNE2nNRgbdL5
 SNmzOwox1rZIQHxEa2r/k2B/HdRbeCFUU82EjwFqaNzH1yhzBXMcokdSCmGCBMsE
 3xfCzQ7uMkXw/rlkkG0be65+5dTNmhfiKQYGAQe4s7PycVPMD79D2EhCfbpvbK7t
 EmgOXStmvtW6+ukqPATHbRVCDwW0VmiQv5IWOGbLB1Qdy5/REynJ5ObC8g==
 =Hvro
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "x86:

   - fix lock initialization race in gfn-to-pfn cache (+selftests)

   - fix two refcounting errors

   - emulator fixes

   - mask off reserved bits in CPUID

   - fix bug with disabling SGX

  RISC-V:

   - update MAINTAINERS"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: x86/xen: Fix eventfd error handling in kvm_xen_eventfd_assign()
  KVM: x86: smm: number of GPRs in the SMRAM image depends on the image format
  KVM: x86: emulator: update the emulation mode after CR0 write
  KVM: x86: emulator: update the emulation mode after rsm
  KVM: x86: emulator: introduce emulator_recalc_and_set_mode
  KVM: x86: emulator: em_sysexit should update ctxt->mode
  KVM: selftests: Mark "guest_saw_irq" as volatile in xen_shinfo_test
  KVM: selftests: Add tests in xen_shinfo_test to detect lock races
  KVM: Reject attempts to consume or refresh inactive gfn_to_pfn_cache
  KVM: Initialize gfn_to_pfn_cache locks in dedicated helper
  KVM: VMX: fully disable SGX if SECONDARY_EXEC_ENCLS_EXITING unavailable
  KVM: x86: Exempt pending triple fault from event injection sanity check
  MAINTAINERS: git://github -> https://github.com for kvm-riscv
  KVM: debugfs: Return retval of simple_attr_open() if it fails
  KVM: x86: Reduce refcount if single_open() fails in kvm_mmu_rmaps_stat_open()
  KVM: x86: Mask off reserved bits in CPUID.8000001FH
  KVM: x86: Mask off reserved bits in CPUID.8000001AH
  KVM: x86: Mask off reserved bits in CPUID.80000008H
  KVM: x86: Mask off reserved bits in CPUID.80000006H
  KVM: x86: Mask off reserved bits in CPUID.80000001H
This commit is contained in:
Linus Torvalds 2022-11-01 12:28:52 -07:00
commit f526d6a822
11 changed files with 363 additions and 104 deletions

View File

@ -11248,7 +11248,7 @@ L: kvm@vger.kernel.org
L: kvm-riscv@lists.infradead.org
L: linux-riscv@lists.infradead.org
S: Maintained
T: git git://github.com/kvm-riscv/linux.git
T: git https://github.com/kvm-riscv/linux.git
F: arch/riscv/include/asm/kvm*
F: arch/riscv/include/uapi/asm/kvm*
F: arch/riscv/kvm/

View File

@ -1133,11 +1133,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->eax = max(entry->eax, 0x80000021);
break;
case 0x80000001:
entry->ebx &= ~GENMASK(27, 16);
cpuid_entry_override(entry, CPUID_8000_0001_EDX);
cpuid_entry_override(entry, CPUID_8000_0001_ECX);
break;
case 0x80000006:
/* L2 cache and TLB: pass through host info. */
/* Drop reserved bits, pass host L2 cache and TLB info. */
entry->edx &= ~GENMASK(17, 16);
break;
case 0x80000007: /* Advanced power management */
/* invariant TSC is CPUID.80000007H:EDX[8] */
@ -1167,6 +1169,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
g_phys_as = phys_as;
entry->eax = g_phys_as | (virt_as << 8);
entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8));
entry->edx = 0;
cpuid_entry_override(entry, CPUID_8000_0008_EBX);
break;
@ -1186,6 +1189,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->ecx = entry->edx = 0;
break;
case 0x8000001a:
entry->eax &= GENMASK(2, 0);
entry->ebx = entry->ecx = entry->edx = 0;
break;
case 0x8000001e:
break;
case 0x8000001F:
@ -1193,7 +1199,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
} else {
cpuid_entry_override(entry, CPUID_8000_001F_EAX);
/* Clear NumVMPL since KVM does not support VMPL. */
entry->ebx &= ~GENMASK(31, 12);
/*
* Enumerate '0' for "PA bits reduction", the adjusted
* MAXPHYADDR is enumerated directly (see 0x80000008).

View File

@ -158,11 +158,16 @@ out:
static int kvm_mmu_rmaps_stat_open(struct inode *inode, struct file *file)
{
struct kvm *kvm = inode->i_private;
int r;
if (!kvm_get_kvm_safe(kvm))
return -ENOENT;
return single_open(file, kvm_mmu_rmaps_stat_show, kvm);
r = single_open(file, kvm_mmu_rmaps_stat_show, kvm);
if (r < 0)
kvm_put_kvm(kvm);
return r;
}
static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file)

View File

@ -791,8 +791,7 @@ static int linearize(struct x86_emulate_ctxt *ctxt,
ctxt->mode, linear);
}
static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
enum x86emul_mode mode)
static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
{
ulong linear;
int rc;
@ -802,41 +801,71 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
if (ctxt->op_bytes != sizeof(unsigned long))
addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode, &linear);
if (rc == X86EMUL_CONTINUE)
ctxt->_eip = addr.ea;
return rc;
}
static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
static inline int emulator_recalc_and_set_mode(struct x86_emulate_ctxt *ctxt)
{
return assign_eip(ctxt, dst, ctxt->mode);
u64 efer;
struct desc_struct cs;
u16 selector;
u32 base3;
ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
if (!(ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE)) {
/* Real mode. cpu must not have long mode active */
if (efer & EFER_LMA)
return X86EMUL_UNHANDLEABLE;
ctxt->mode = X86EMUL_MODE_REAL;
return X86EMUL_CONTINUE;
}
if (ctxt->eflags & X86_EFLAGS_VM) {
/* Protected/VM86 mode. cpu must not have long mode active */
if (efer & EFER_LMA)
return X86EMUL_UNHANDLEABLE;
ctxt->mode = X86EMUL_MODE_VM86;
return X86EMUL_CONTINUE;
}
if (!ctxt->ops->get_segment(ctxt, &selector, &cs, &base3, VCPU_SREG_CS))
return X86EMUL_UNHANDLEABLE;
if (efer & EFER_LMA) {
if (cs.l) {
/* Proper long mode */
ctxt->mode = X86EMUL_MODE_PROT64;
} else if (cs.d) {
/* 32 bit compatibility mode*/
ctxt->mode = X86EMUL_MODE_PROT32;
} else {
ctxt->mode = X86EMUL_MODE_PROT16;
}
} else {
/* Legacy 32 bit / 16 bit mode */
ctxt->mode = cs.d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
}
return X86EMUL_CONTINUE;
}
static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
const struct desc_struct *cs_desc)
static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
{
enum x86emul_mode mode = ctxt->mode;
int rc;
return assign_eip(ctxt, dst);
}
#ifdef CONFIG_X86_64
if (ctxt->mode >= X86EMUL_MODE_PROT16) {
if (cs_desc->l) {
u64 efer = 0;
static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst)
{
int rc = emulator_recalc_and_set_mode(ctxt);
ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
if (efer & EFER_LMA)
mode = X86EMUL_MODE_PROT64;
} else
mode = X86EMUL_MODE_PROT32; /* temporary value */
}
#endif
if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
rc = assign_eip(ctxt, dst, mode);
if (rc == X86EMUL_CONTINUE)
ctxt->mode = mode;
return rc;
if (rc != X86EMUL_CONTINUE)
return rc;
return assign_eip(ctxt, dst);
}
static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
@ -2172,7 +2201,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
rc = assign_eip_far(ctxt, ctxt->src.val);
/* Error handling is not implemented. */
if (rc != X86EMUL_CONTINUE)
return X86EMUL_UNHANDLEABLE;
@ -2250,7 +2279,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
&new_desc);
if (rc != X86EMUL_CONTINUE)
return rc;
rc = assign_eip_far(ctxt, eip, &new_desc);
rc = assign_eip_far(ctxt, eip);
/* Error handling is not implemented. */
if (rc != X86EMUL_CONTINUE)
return X86EMUL_UNHANDLEABLE;
@ -2432,7 +2461,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0);
for (i = 0; i < NR_EMULATOR_GPRS; i++)
for (i = 0; i < 8; i++)
*reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
val = GET_SMSTATE(u32, smstate, 0x7fcc);
@ -2489,7 +2518,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
u16 selector;
int i, r;
for (i = 0; i < NR_EMULATOR_GPRS; i++)
for (i = 0; i < 16; i++)
*reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78);
@ -2633,7 +2662,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
* those side effects need to be explicitly handled for both success
* and shutdown.
*/
return X86EMUL_CONTINUE;
return emulator_recalc_and_set_mode(ctxt);
emulate_shutdown:
ctxt->ops->triple_fault(ctxt);
@ -2876,6 +2905,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
ctxt->_eip = rdx;
ctxt->mode = usermode;
*reg_write(ctxt, VCPU_REGS_RSP) = rcx;
return X86EMUL_CONTINUE;
@ -3469,7 +3499,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
rc = assign_eip_far(ctxt, ctxt->src.val);
if (rc != X86EMUL_CONTINUE)
goto fail;
@ -3611,11 +3641,25 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt)
static int em_cr_write(struct x86_emulate_ctxt *ctxt)
{
if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
int cr_num = ctxt->modrm_reg;
int r;
if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val))
return emulate_gp(ctxt, 0);
/* Disable writeback. */
ctxt->dst.type = OP_NONE;
if (cr_num == 0) {
/*
* CR0 write might have updated CR0.PE and/or CR0.PG
* which can affect the cpu's execution mode.
*/
r = emulator_recalc_and_set_mode(ctxt);
if (r != X86EMUL_CONTINUE)
return r;
}
return X86EMUL_CONTINUE;
}

View File

@ -8263,6 +8263,11 @@ static __init int hardware_setup(void)
if (!cpu_has_virtual_nmis())
enable_vnmi = 0;
#ifdef CONFIG_X86_SGX_KVM
if (!cpu_has_vmx_encls_vmexit())
enable_sgx = false;
#endif
/*
* set_apic_access_page_addr() is used to reload apic access
* page upon invalidation. No need to do anything if not

View File

@ -2315,11 +2315,11 @@ static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time,
/* we verify if the enable bit is set... */
if (system_time & 1) {
kvm_gfn_to_pfn_cache_init(vcpu->kvm, &vcpu->arch.pv_time, vcpu,
KVM_HOST_USES_PFN, system_time & ~1ULL,
sizeof(struct pvclock_vcpu_time_info));
kvm_gpc_activate(vcpu->kvm, &vcpu->arch.pv_time, vcpu,
KVM_HOST_USES_PFN, system_time & ~1ULL,
sizeof(struct pvclock_vcpu_time_info));
} else {
kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.pv_time);
kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.pv_time);
}
return;
@ -3388,7 +3388,7 @@ static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data)
static void kvmclock_reset(struct kvm_vcpu *vcpu)
{
kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.pv_time);
kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.pv_time);
vcpu->arch.time = 0;
}
@ -10044,7 +10044,20 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
kvm_x86_ops.nested_ops->has_events(vcpu))
*req_immediate_exit = true;
WARN_ON(kvm_is_exception_pending(vcpu));
/*
* KVM must never queue a new exception while injecting an event; KVM
* is done emulating and should only propagate the to-be-injected event
* to the VMCS/VMCB. Queueing a new exception can put the vCPU into an
* infinite loop as KVM will bail from VM-Enter to inject the pending
* exception and start the cycle all over.
*
* Exempt triple faults as they have special handling and won't put the
* vCPU into an infinite loop. Triple fault can be queued when running
* VMX without unrestricted guest, as that requires KVM to emulate Real
* Mode events (see kvm_inject_realmode_interrupt()).
*/
WARN_ON_ONCE(vcpu->arch.exception.pending ||
vcpu->arch.exception_vmexit.pending);
return 0;
out:
@ -11816,6 +11829,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.regs_avail = ~0;
vcpu->arch.regs_dirty = ~0;
kvm_gpc_init(&vcpu->arch.pv_time);
if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
else

View File

@ -42,13 +42,13 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
int idx = srcu_read_lock(&kvm->srcu);
if (gfn == GPA_INVALID) {
kvm_gfn_to_pfn_cache_destroy(kvm, gpc);
kvm_gpc_deactivate(kvm, gpc);
goto out;
}
do {
ret = kvm_gfn_to_pfn_cache_init(kvm, gpc, NULL, KVM_HOST_USES_PFN,
gpa, PAGE_SIZE);
ret = kvm_gpc_activate(kvm, gpc, NULL, KVM_HOST_USES_PFN, gpa,
PAGE_SIZE);
if (ret)
goto out;
@ -554,15 +554,15 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
offsetof(struct compat_vcpu_info, time));
if (data->u.gpa == GPA_INVALID) {
kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
r = 0;
break;
}
r = kvm_gfn_to_pfn_cache_init(vcpu->kvm,
&vcpu->arch.xen.vcpu_info_cache,
NULL, KVM_HOST_USES_PFN, data->u.gpa,
sizeof(struct vcpu_info));
r = kvm_gpc_activate(vcpu->kvm,
&vcpu->arch.xen.vcpu_info_cache, NULL,
KVM_HOST_USES_PFN, data->u.gpa,
sizeof(struct vcpu_info));
if (!r)
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@ -570,16 +570,16 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
if (data->u.gpa == GPA_INVALID) {
kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
&vcpu->arch.xen.vcpu_time_info_cache);
kvm_gpc_deactivate(vcpu->kvm,
&vcpu->arch.xen.vcpu_time_info_cache);
r = 0;
break;
}
r = kvm_gfn_to_pfn_cache_init(vcpu->kvm,
&vcpu->arch.xen.vcpu_time_info_cache,
NULL, KVM_HOST_USES_PFN, data->u.gpa,
sizeof(struct pvclock_vcpu_time_info));
r = kvm_gpc_activate(vcpu->kvm,
&vcpu->arch.xen.vcpu_time_info_cache,
NULL, KVM_HOST_USES_PFN, data->u.gpa,
sizeof(struct pvclock_vcpu_time_info));
if (!r)
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
break;
@ -590,16 +590,15 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
break;
}
if (data->u.gpa == GPA_INVALID) {
kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
&vcpu->arch.xen.runstate_cache);
kvm_gpc_deactivate(vcpu->kvm,
&vcpu->arch.xen.runstate_cache);
r = 0;
break;
}
r = kvm_gfn_to_pfn_cache_init(vcpu->kvm,
&vcpu->arch.xen.runstate_cache,
NULL, KVM_HOST_USES_PFN, data->u.gpa,
sizeof(struct vcpu_runstate_info));
r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache,
NULL, KVM_HOST_USES_PFN, data->u.gpa,
sizeof(struct vcpu_runstate_info));
break;
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
@ -1667,18 +1666,18 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm,
case EVTCHNSTAT_ipi:
/* IPI must map back to the same port# */
if (data->u.evtchn.deliver.port.port != data->u.evtchn.send_port)
goto out; /* -EINVAL */
goto out_noeventfd; /* -EINVAL */
break;
case EVTCHNSTAT_interdomain:
if (data->u.evtchn.deliver.port.port) {
if (data->u.evtchn.deliver.port.port >= max_evtchn_port(kvm))
goto out; /* -EINVAL */
goto out_noeventfd; /* -EINVAL */
} else {
eventfd = eventfd_ctx_fdget(data->u.evtchn.deliver.eventfd.fd);
if (IS_ERR(eventfd)) {
ret = PTR_ERR(eventfd);
goto out;
goto out_noeventfd;
}
}
break;
@ -1718,6 +1717,7 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm,
out:
if (eventfd)
eventfd_ctx_put(eventfd);
out_noeventfd:
kfree(evtchnfd);
return ret;
}
@ -1816,7 +1816,12 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu)
{
vcpu->arch.xen.vcpu_id = vcpu->vcpu_idx;
vcpu->arch.xen.poll_evtchn = 0;
timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0);
kvm_gpc_init(&vcpu->arch.xen.runstate_cache);
kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache);
kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache);
}
void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
@ -1824,18 +1829,17 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
if (kvm_xen_timer_enabled(vcpu))
kvm_xen_stop_timer(vcpu);
kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
&vcpu->arch.xen.runstate_cache);
kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
&vcpu->arch.xen.vcpu_info_cache);
kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
&vcpu->arch.xen.vcpu_time_info_cache);
kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate_cache);
kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_time_info_cache);
del_timer_sync(&vcpu->arch.xen.poll_timer);
}
void kvm_xen_init_vm(struct kvm *kvm)
{
idr_init(&kvm->arch.xen.evtchn_ports);
kvm_gpc_init(&kvm->arch.xen.shinfo_cache);
}
void kvm_xen_destroy_vm(struct kvm *kvm)
@ -1843,7 +1847,7 @@ void kvm_xen_destroy_vm(struct kvm *kvm)
struct evtchnfd *evtchnfd;
int i;
kvm_gfn_to_pfn_cache_destroy(kvm, &kvm->arch.xen.shinfo_cache);
kvm_gpc_deactivate(kvm, &kvm->arch.xen.shinfo_cache);
idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) {
if (!evtchnfd->deliver.port.port)

View File

@ -1240,8 +1240,18 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
/**
* kvm_gfn_to_pfn_cache_init - prepare a cached kernel mapping and HPA for a
* given guest physical address.
* kvm_gpc_init - initialize gfn_to_pfn_cache.
*
* @gpc: struct gfn_to_pfn_cache object.
*
* This sets up a gfn_to_pfn_cache by initializing locks. Note, the cache must
* be zero-allocated (or zeroed by the caller before init).
*/
void kvm_gpc_init(struct gfn_to_pfn_cache *gpc);
/**
* kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest
* physical address.
*
* @kvm: pointer to kvm instance.
* @gpc: struct gfn_to_pfn_cache object.
@ -1265,9 +1275,9 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
* kvm_gfn_to_pfn_cache_check() to ensure that the cache is valid before
* accessing the target page.
*/
int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
gpa_t gpa, unsigned long len);
int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
gpa_t gpa, unsigned long len);
/**
* kvm_gfn_to_pfn_cache_check - check validity of a gfn_to_pfn_cache.
@ -1324,7 +1334,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
/**
* kvm_gfn_to_pfn_cache_destroy - destroy and unlink a gfn_to_pfn_cache.
* kvm_gpc_deactivate - deactivate and unlink a gfn_to_pfn_cache.
*
* @kvm: pointer to kvm instance.
* @gpc: struct gfn_to_pfn_cache object.
@ -1332,7 +1342,7 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
* This removes a cache from the @kvm's list to be processed on MMU notifier
* invocation.
*/
void kvm_gfn_to_pfn_cache_destroy(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
void kvm_gpc_deactivate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
void kvm_sigset_activate(struct kvm_vcpu *vcpu);
void kvm_sigset_deactivate(struct kvm_vcpu *vcpu);

View File

@ -15,9 +15,13 @@
#include <time.h>
#include <sched.h>
#include <signal.h>
#include <pthread.h>
#include <sys/eventfd.h>
/* Defined in include/linux/kvm_types.h */
#define GPA_INVALID (~(ulong)0)
#define SHINFO_REGION_GVA 0xc0000000ULL
#define SHINFO_REGION_GPA 0xc0000000ULL
#define SHINFO_REGION_SLOT 10
@ -44,6 +48,8 @@
#define MIN_STEAL_TIME 50000
#define SHINFO_RACE_TIMEOUT 2 /* seconds */
#define __HYPERVISOR_set_timer_op 15
#define __HYPERVISOR_sched_op 29
#define __HYPERVISOR_event_channel_op 32
@ -126,7 +132,7 @@ struct {
struct kvm_irq_routing_entry entries[2];
} irq_routes;
bool guest_saw_irq;
static volatile bool guest_saw_irq;
static void evtchn_handler(struct ex_regs *regs)
{
@ -148,6 +154,7 @@ static void guest_wait_for_irq(void)
static void guest_code(void)
{
struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
int i;
__asm__ __volatile__(
"sti\n"
@ -325,6 +332,49 @@ static void guest_code(void)
guest_wait_for_irq();
GUEST_SYNC(21);
/* Racing host ioctls */
guest_wait_for_irq();
GUEST_SYNC(22);
/* Racing vmcall against host ioctl */
ports[0] = 0;
p = (struct sched_poll) {
.ports = ports,
.nr_ports = 1,
.timeout = 0
};
wait_for_timer:
/*
* Poll for a timer wake event while the worker thread is mucking with
* the shared info. KVM XEN drops timer IRQs if the shared info is
* invalid when the timer expires. Arbitrarily poll 100 times before
* giving up and asking the VMM to re-arm the timer. 100 polls should
* consume enough time to beat on KVM without taking too long if the
* timer IRQ is dropped due to an invalid event channel.
*/
for (i = 0; i < 100 && !guest_saw_irq; i++)
asm volatile("vmcall"
: "=a" (rax)
: "a" (__HYPERVISOR_sched_op),
"D" (SCHEDOP_poll),
"S" (&p)
: "memory");
/*
* Re-send the timer IRQ if it was (likely) dropped due to the timer
* expiring while the event channel was invalid.
*/
if (!guest_saw_irq) {
GUEST_SYNC(23);
goto wait_for_timer;
}
guest_saw_irq = false;
GUEST_SYNC(24);
}
static int cmp_timespec(struct timespec *a, struct timespec *b)
@ -352,11 +402,36 @@ static void handle_alrm(int sig)
TEST_FAIL("IRQ delivery timed out");
}
static void *juggle_shinfo_state(void *arg)
{
struct kvm_vm *vm = (struct kvm_vm *)arg;
struct kvm_xen_hvm_attr cache_init = {
.type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
.u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE
};
struct kvm_xen_hvm_attr cache_destroy = {
.type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
.u.shared_info.gfn = GPA_INVALID
};
for (;;) {
__vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_init);
__vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_destroy);
pthread_testcancel();
};
return NULL;
}
int main(int argc, char *argv[])
{
struct timespec min_ts, max_ts, vm_ts;
struct kvm_vm *vm;
pthread_t thread;
bool verbose;
int ret;
verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
!strncmp(argv[1], "--verbose", 10));
@ -785,6 +860,71 @@ int main(int argc, char *argv[])
case 21:
TEST_ASSERT(!evtchn_irq_expected,
"Expected event channel IRQ but it didn't happen");
alarm(0);
if (verbose)
printf("Testing shinfo lock corruption (KVM_XEN_HVM_EVTCHN_SEND)\n");
ret = pthread_create(&thread, NULL, &juggle_shinfo_state, (void *)vm);
TEST_ASSERT(ret == 0, "pthread_create() failed: %s", strerror(ret));
struct kvm_irq_routing_xen_evtchn uxe = {
.port = 1,
.vcpu = vcpu->id,
.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
};
evtchn_irq_expected = true;
for (time_t t = time(NULL) + SHINFO_RACE_TIMEOUT; time(NULL) < t;)
__vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &uxe);
break;
case 22:
TEST_ASSERT(!evtchn_irq_expected,
"Expected event channel IRQ but it didn't happen");
if (verbose)
printf("Testing shinfo lock corruption (SCHEDOP_poll)\n");
shinfo->evtchn_pending[0] = 1;
evtchn_irq_expected = true;
tmr.u.timer.expires_ns = rs->state_entry_time +
SHINFO_RACE_TIMEOUT * 1000000000ULL;
vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
break;
case 23:
/*
* Optional and possibly repeated sync point.
* Injecting the timer IRQ may fail if the
* shinfo is invalid when the timer expires.
* If the timer has expired but the IRQ hasn't
* been delivered, rearm the timer and retry.
*/
vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
/* Resume the guest if the timer is still pending. */
if (tmr.u.timer.expires_ns)
break;
/* All done if the IRQ was delivered. */
if (!evtchn_irq_expected)
break;
tmr.u.timer.expires_ns = rs->state_entry_time +
SHINFO_RACE_TIMEOUT * 1000000000ULL;
vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
break;
case 24:
TEST_ASSERT(!evtchn_irq_expected,
"Expected event channel IRQ but it didn't happen");
ret = pthread_cancel(thread);
TEST_ASSERT(ret == 0, "pthread_cancel() failed: %s", strerror(ret));
ret = pthread_join(thread, 0);
TEST_ASSERT(ret == 0, "pthread_join() failed: %s", strerror(ret));
goto done;
case 0x20:

View File

@ -5409,6 +5409,7 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file,
int (*get)(void *, u64 *), int (*set)(void *, u64),
const char *fmt)
{
int ret;
struct kvm_stat_data *stat_data = (struct kvm_stat_data *)
inode->i_private;
@ -5420,15 +5421,13 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file,
if (!kvm_get_kvm_safe(stat_data->kvm))
return -ENOENT;
if (simple_attr_open(inode, file, get,
kvm_stats_debugfs_mode(stat_data->desc) & 0222
? set : NULL,
fmt)) {
ret = simple_attr_open(inode, file, get,
kvm_stats_debugfs_mode(stat_data->desc) & 0222
? set : NULL, fmt);
if (ret)
kvm_put_kvm(stat_data->kvm);
return -ENOMEM;
}
return 0;
return ret;
}
static int kvm_debugfs_release(struct inode *inode, struct file *file)

View File

@ -81,6 +81,9 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
{
struct kvm_memslots *slots = kvm_memslots(kvm);
if (!gpc->active)
return false;
if ((gpa & ~PAGE_MASK) + len > PAGE_SIZE)
return false;
@ -240,10 +243,11 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
{
struct kvm_memslots *slots = kvm_memslots(kvm);
unsigned long page_offset = gpa & ~PAGE_MASK;
kvm_pfn_t old_pfn, new_pfn;
bool unmap_old = false;
unsigned long old_uhva;
kvm_pfn_t old_pfn;
void *old_khva;
int ret = 0;
int ret;
/*
* If must fit within a single page. The 'len' argument is
@ -261,6 +265,11 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
write_lock_irq(&gpc->lock);
if (!gpc->active) {
ret = -EINVAL;
goto out_unlock;
}
old_pfn = gpc->pfn;
old_khva = gpc->khva - offset_in_page(gpc->khva);
old_uhva = gpc->uhva;
@ -291,6 +300,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
/* If the HVA→PFN mapping was already valid, don't unmap it. */
old_pfn = KVM_PFN_ERR_FAULT;
old_khva = NULL;
ret = 0;
}
out:
@ -305,14 +315,15 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
gpc->khva = NULL;
}
/* Snapshot the new pfn before dropping the lock! */
new_pfn = gpc->pfn;
/* Detect a pfn change before dropping the lock! */
unmap_old = (old_pfn != gpc->pfn);
out_unlock:
write_unlock_irq(&gpc->lock);
mutex_unlock(&gpc->refresh_lock);
if (old_pfn != new_pfn)
if (unmap_old)
gpc_unmap_khva(kvm, old_pfn, old_khva);
return ret;
@ -346,42 +357,61 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
}
EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_unmap);
void kvm_gpc_init(struct gfn_to_pfn_cache *gpc)
{
rwlock_init(&gpc->lock);
mutex_init(&gpc->refresh_lock);
}
EXPORT_SYMBOL_GPL(kvm_gpc_init);
int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
gpa_t gpa, unsigned long len)
int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
gpa_t gpa, unsigned long len)
{
WARN_ON_ONCE(!usage || (usage & KVM_GUEST_AND_HOST_USE_PFN) != usage);
if (!gpc->active) {
rwlock_init(&gpc->lock);
mutex_init(&gpc->refresh_lock);
gpc->khva = NULL;
gpc->pfn = KVM_PFN_ERR_FAULT;
gpc->uhva = KVM_HVA_ERR_BAD;
gpc->vcpu = vcpu;
gpc->usage = usage;
gpc->valid = false;
gpc->active = true;
spin_lock(&kvm->gpc_lock);
list_add(&gpc->list, &kvm->gpc_list);
spin_unlock(&kvm->gpc_lock);
/*
* Activate the cache after adding it to the list, a concurrent
* refresh must not establish a mapping until the cache is
* reachable by mmu_notifier events.
*/
write_lock_irq(&gpc->lock);
gpc->active = true;
write_unlock_irq(&gpc->lock);
}
return kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpa, len);
}
EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_init);
EXPORT_SYMBOL_GPL(kvm_gpc_activate);
void kvm_gfn_to_pfn_cache_destroy(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
void kvm_gpc_deactivate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
{
if (gpc->active) {
/*
* Deactivate the cache before removing it from the list, KVM
* must stall mmu_notifier events until all users go away, i.e.
* until gpc->lock is dropped and refresh is guaranteed to fail.
*/
write_lock_irq(&gpc->lock);
gpc->active = false;
write_unlock_irq(&gpc->lock);
spin_lock(&kvm->gpc_lock);
list_del(&gpc->list);
spin_unlock(&kvm->gpc_lock);
kvm_gfn_to_pfn_cache_unmap(kvm, gpc);
gpc->active = false;
}
}
EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_destroy);
EXPORT_SYMBOL_GPL(kvm_gpc_deactivate);