x86:

- fix lock initialization race in gfn-to-pfn cache (+selftests) - fix two refcounting errors - emulator fixes - mask off reserved bits in CPUID - fix bug with disabling SGX RISC-V: - update MAINTAINERS -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmNcYawUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroPMzwf/Uh1lO2Op6IJ3/no2gPfShc9bdqgM LiCkzfJp9PQAuDl/hs44CiQHUlPEfeIsI/ns0euNj37TlnB3zKmm46mtiWhEefIH rwcm/ngKgw3283pZEf8FeMTDfNexOaBg2ZNoODR7JQsU50tbToY4TNE2nNRgbdL5 SNmzOwox1rZIQHxEa2r/k2B/HdRbeCFUU82EjwFqaNzH1yhzBXMcokdSCmGCBMsE 3xfCzQ7uMkXw/rlkkG0be65+5dTNmhfiKQYGAQe4s7PycVPMD79D2EhCfbpvbK7t EmgOXStmvtW6+ukqPATHbRVCDwW0VmiQv5IWOGbLB1Qdy5/REynJ5ObC8g== =Hvro -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm fixes from Paolo Bonzini: "x86: - fix lock initialization race in gfn-to-pfn cache (+selftests) - fix two refcounting errors - emulator fixes - mask off reserved bits in CPUID - fix bug with disabling SGX RISC-V: - update MAINTAINERS" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86/xen: Fix eventfd error handling in kvm_xen_eventfd_assign() KVM: x86: smm: number of GPRs in the SMRAM image depends on the image format KVM: x86: emulator: update the emulation mode after CR0 write KVM: x86: emulator: update the emulation mode after rsm KVM: x86: emulator: introduce emulator_recalc_and_set_mode KVM: x86: emulator: em_sysexit should update ctxt->mode KVM: selftests: Mark "guest_saw_irq" as volatile in xen_shinfo_test KVM: selftests: Add tests in xen_shinfo_test to detect lock races KVM: Reject attempts to consume or refresh inactive gfn_to_pfn_cache KVM: Initialize gfn_to_pfn_cache locks in dedicated helper KVM: VMX: fully disable SGX if SECONDARY_EXEC_ENCLS_EXITING unavailable KVM: x86: Exempt pending triple fault from event injection sanity check MAINTAINERS: git://github -> https://github.com for kvm-riscv KVM: debugfs: Return retval of simple_attr_open() if it fails KVM: x86: Reduce refcount if single_open() fails in kvm_mmu_rmaps_stat_open() KVM: x86: Mask off reserved bits in CPUID.8000001FH KVM: x86: Mask off reserved bits in CPUID.8000001AH KVM: x86: Mask off reserved bits in CPUID.80000008H KVM: x86: Mask off reserved bits in CPUID.80000006H KVM: x86: Mask off reserved bits in CPUID.80000001H
2022-11-01 12:28:52 -07:00 · 2022-11-01 12:28:52 -07:00 · f526d6a822
commit f526d6a822
parent d79dcde0bc 7353633814
11 changed files with 363 additions and 104 deletions
--- a/2
+++ b/2
@ -11248,7 +11248,7 @@ L:	kvm@vger.kernel.org
 L:	kvm-riscv@lists.infradead.org
 L:	linux-riscv@lists.infradead.org
 S:	Maintained
-T:	git git://github.com/kvm-riscv/linux.git
+T:	git https://github.com/kvm-riscv/linux.git
 F:	arch/riscv/include/asm/kvm*
 F:	arch/riscv/include/uapi/asm/kvm*
 F:	arch/riscv/kvm/
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@ -1133,11 +1133,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
 			entry->eax = max(entry->eax, 0x80000021);
 		break;
 	case 0x80000001:
 		entry->ebx &= ~GENMASK(27, 16);
 		cpuid_entry_override(entry, CPUID_8000_0001_EDX);
 		cpuid_entry_override(entry, CPUID_8000_0001_ECX);
 		break;
 	case 0x80000006:
-		/* L2 cache and TLB: pass through host info. */
+		/* Drop reserved bits, pass host L2 cache and TLB info. */
 		entry->edx &= ~GENMASK(17, 16);
 		break;
 	case 0x80000007: /* Advanced power management */
 		/* invariant TSC is CPUID.80000007H:EDX[8] */
@ -1167,6 +1169,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
 			g_phys_as = phys_as;
 		entry->eax = g_phys_as | (virt_as << 8);
 		entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8));
 		entry->edx = 0;
 		cpuid_entry_override(entry, CPUID_8000_0008_EBX);
 		break;
@ -1186,6 +1189,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
 		entry->ecx = entry->edx = 0;
 		break;
 	case 0x8000001a:
 		entry->eax &= GENMASK(2, 0);
 		entry->ebx = entry->ecx = entry->edx = 0;
 		break;
 	case 0x8000001e:
 		break;
 	case 0x8000001F:
@ -1193,7 +1199,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
 			entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
 		} else {
 			cpuid_entry_override(entry, CPUID_8000_001F_EAX);
-
+			/* Clear NumVMPL since KVM does not support VMPL.  */
 			entry->ebx &= ~GENMASK(31, 12);
 			/*
 			 * Enumerate '0' for "PA bits reduction", the adjusted
 			 * MAXPHYADDR is enumerated directly (see 0x80000008).
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@ -158,11 +158,16 @@ out:
 static int kvm_mmu_rmaps_stat_open(struct inode *inode, struct file *file)
 {
 	struct kvm *kvm = inode->i_private;
 	int r;
 	if (!kvm_get_kvm_safe(kvm))
 		return -ENOENT;
-	return single_open(file, kvm_mmu_rmaps_stat_show, kvm);
+	r = single_open(file, kvm_mmu_rmaps_stat_show, kvm);
 	if (r < 0)
 		kvm_put_kvm(kvm);
 	return r;
 }
 static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file)
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@ -791,8 +791,7 @@ static int linearize(struct x86_emulate_ctxt *ctxt,
 			   ctxt->mode, linear);
 }
-static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
+static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
 			     enum x86emul_mode mode)
 {
 	ulong linear;
 	int rc;
@ -802,41 +801,71 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
 	if (ctxt->op_bytes != sizeof(unsigned long))
 		addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
-	rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
+	rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode, &linear);
 	if (rc == X86EMUL_CONTINUE)
 		ctxt->_eip = addr.ea;
 	return rc;
 }
-static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
+static inline int emulator_recalc_and_set_mode(struct x86_emulate_ctxt *ctxt)
 {
-	return assign_eip(ctxt, dst, ctxt->mode);
+	u64 efer;
 	struct desc_struct cs;
 	u16 selector;
 	u32 base3;
 	ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
 	if (!(ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE)) {
 		/* Real mode. cpu must not have long mode active */
 		if (efer & EFER_LMA)
 			return X86EMUL_UNHANDLEABLE;
 		ctxt->mode = X86EMUL_MODE_REAL;
 		return X86EMUL_CONTINUE;
 	}
 	if (ctxt->eflags & X86_EFLAGS_VM) {
 		/* Protected/VM86 mode. cpu must not have long mode active */
 		if (efer & EFER_LMA)
 			return X86EMUL_UNHANDLEABLE;
 		ctxt->mode = X86EMUL_MODE_VM86;
 		return X86EMUL_CONTINUE;
 	}
 	if (!ctxt->ops->get_segment(ctxt, &selector, &cs, &base3, VCPU_SREG_CS))
 		return X86EMUL_UNHANDLEABLE;
 	if (efer & EFER_LMA) {
 		if (cs.l) {
 			/* Proper long mode */
 			ctxt->mode = X86EMUL_MODE_PROT64;
 		} else if (cs.d) {
 			/* 32 bit compatibility mode*/
 			ctxt->mode = X86EMUL_MODE_PROT32;
 		} else {
 			ctxt->mode = X86EMUL_MODE_PROT16;
 		}
 	} else {
 		/* Legacy 32 bit / 16 bit mode */
 		ctxt->mode = cs.d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
 	}
 	return X86EMUL_CONTINUE;
 }
-static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
+static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
 			  const struct desc_struct *cs_desc)
 {
-	enum x86emul_mode mode = ctxt->mode;
+	return assign_eip(ctxt, dst);
-	int rc;
+}
-#ifdef CONFIG_X86_64
+static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst)
-	if (ctxt->mode >= X86EMUL_MODE_PROT16) {
+{
-		if (cs_desc->l) {
+	int rc = emulator_recalc_and_set_mode(ctxt);
 			u64 efer = 0;
-			ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+	if (rc != X86EMUL_CONTINUE)
-			if (efer & EFER_LMA)
+		return rc;
-				mode = X86EMUL_MODE_PROT64;
+
-		} else
+	return assign_eip(ctxt, dst);
 			mode = X86EMUL_MODE_PROT32; /* temporary value */
 	}
 #endif
 	if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
 		mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
 	rc = assign_eip(ctxt, dst, mode);
 	if (rc == X86EMUL_CONTINUE)
 		ctxt->mode = mode;
 	return rc;
 }
 static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
@ -2172,7 +2201,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
-	rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
+	rc = assign_eip_far(ctxt, ctxt->src.val);
 	/* Error handling is not implemented. */
 	if (rc != X86EMUL_CONTINUE)
 		return X86EMUL_UNHANDLEABLE;
@ -2250,7 +2279,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
 				       &new_desc);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
-	rc = assign_eip_far(ctxt, eip, &new_desc);
+	rc = assign_eip_far(ctxt, eip);
 	/* Error handling is not implemented. */
 	if (rc != X86EMUL_CONTINUE)
 		return X86EMUL_UNHANDLEABLE;
@ -2432,7 +2461,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
 	ctxt->eflags =             GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
 	ctxt->_eip =               GET_SMSTATE(u32, smstate, 0x7ff0);
-	for (i = 0; i < NR_EMULATOR_GPRS; i++)
+	for (i = 0; i < 8; i++)
 		*reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
 	val = GET_SMSTATE(u32, smstate, 0x7fcc);
@ -2489,7 +2518,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
 	u16 selector;
 	int i, r;
-	for (i = 0; i < NR_EMULATOR_GPRS; i++)
+	for (i = 0; i < 16; i++)
 		*reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
 	ctxt->_eip   = GET_SMSTATE(u64, smstate, 0x7f78);
@ -2633,7 +2662,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
 	 * those side effects need to be explicitly handled for both success
 	 * and shutdown.
 	 */
-	return X86EMUL_CONTINUE;
+	return emulator_recalc_and_set_mode(ctxt);
 emulate_shutdown:
 	ctxt->ops->triple_fault(ctxt);
@ -2876,6 +2905,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
 	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
 	ctxt->_eip = rdx;
 	ctxt->mode = usermode;
 	*reg_write(ctxt, VCPU_REGS_RSP) = rcx;
 	return X86EMUL_CONTINUE;
@ -3469,7 +3499,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
-	rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
+	rc = assign_eip_far(ctxt, ctxt->src.val);
 	if (rc != X86EMUL_CONTINUE)
 		goto fail;
@ -3611,11 +3641,25 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt)
 static int em_cr_write(struct x86_emulate_ctxt *ctxt)
 {
-	if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
+	int cr_num = ctxt->modrm_reg;
 	int r;
 	if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val))
 		return emulate_gp(ctxt, 0);
 	/* Disable writeback. */
 	ctxt->dst.type = OP_NONE;
 	if (cr_num == 0) {
 		/*
 		 * CR0 write might have updated CR0.PE and/or CR0.PG
 		 * which can affect the cpu's execution mode.
 		 */
 		r = emulator_recalc_and_set_mode(ctxt);
 		if (r != X86EMUL_CONTINUE)
 			return r;
 	}
 	return X86EMUL_CONTINUE;
 }
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@ -8263,6 +8263,11 @@ static __init int hardware_setup(void)
 	if (!cpu_has_virtual_nmis())
 		enable_vnmi = 0;
 #ifdef CONFIG_X86_SGX_KVM
 	if (!cpu_has_vmx_encls_vmexit())
 		enable_sgx = false;
 #endif
 	/*
 	 * set_apic_access_page_addr() is used to reload apic access
 	 * page upon invalidation.  No need to do anything if not
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@ -2315,11 +2315,11 @@ static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time,
 	/* we verify if the enable bit is set... */
 	if (system_time & 1) {
-		kvm_gfn_to_pfn_cache_init(vcpu->kvm, &vcpu->arch.pv_time, vcpu,
+		kvm_gpc_activate(vcpu->kvm, &vcpu->arch.pv_time, vcpu,
-					  KVM_HOST_USES_PFN, system_time & ~1ULL,
+				 KVM_HOST_USES_PFN, system_time & ~1ULL,
-					  sizeof(struct pvclock_vcpu_time_info));
+				 sizeof(struct pvclock_vcpu_time_info));
 	} else {
-		kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.pv_time);
+		kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.pv_time);
 	}
 	return;
@ -3388,7 +3388,7 @@ static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data)
 static void kvmclock_reset(struct kvm_vcpu *vcpu)
 {
-	kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.pv_time);
+	kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.pv_time);
 	vcpu->arch.time = 0;
 }
@ -10044,7 +10044,20 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
 	    kvm_x86_ops.nested_ops->has_events(vcpu))
 		*req_immediate_exit = true;
-	WARN_ON(kvm_is_exception_pending(vcpu));
+	/*
 	 * KVM must never queue a new exception while injecting an event; KVM
 	 * is done emulating and should only propagate the to-be-injected event
 	 * to the VMCS/VMCB.  Queueing a new exception can put the vCPU into an
 	 * infinite loop as KVM will bail from VM-Enter to inject the pending
 	 * exception and start the cycle all over.
 	 *
 	 * Exempt triple faults as they have special handling and won't put the
 	 * vCPU into an infinite loop.  Triple fault can be queued when running
 	 * VMX without unrestricted guest, as that requires KVM to emulate Real
 	 * Mode events (see kvm_inject_realmode_interrupt()).
 	 */
 	WARN_ON_ONCE(vcpu->arch.exception.pending ||
 		     vcpu->arch.exception_vmexit.pending);
 	return 0;
 out:
@ -11816,6 +11829,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 	vcpu->arch.regs_avail = ~0;
 	vcpu->arch.regs_dirty = ~0;
 	kvm_gpc_init(&vcpu->arch.pv_time);
 	if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 	else
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@ -42,13 +42,13 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
 	int idx = srcu_read_lock(&kvm->srcu);
 	if (gfn == GPA_INVALID) {
-		kvm_gfn_to_pfn_cache_destroy(kvm, gpc);
+		kvm_gpc_deactivate(kvm, gpc);
 		goto out;
 	}
 	do {
-		ret = kvm_gfn_to_pfn_cache_init(kvm, gpc, NULL, KVM_HOST_USES_PFN,
+		ret = kvm_gpc_activate(kvm, gpc, NULL, KVM_HOST_USES_PFN, gpa,
-						gpa, PAGE_SIZE);
+				       PAGE_SIZE);
 		if (ret)
 			goto out;
@ -554,15 +554,15 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 			     offsetof(struct compat_vcpu_info, time));
 		if (data->u.gpa == GPA_INVALID) {
-			kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
+			kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
 			r = 0;
 			break;
 		}
-		r = kvm_gfn_to_pfn_cache_init(vcpu->kvm,
+		r = kvm_gpc_activate(vcpu->kvm,
-					      &vcpu->arch.xen.vcpu_info_cache,
+				     &vcpu->arch.xen.vcpu_info_cache, NULL,
-					      NULL, KVM_HOST_USES_PFN, data->u.gpa,
+				     KVM_HOST_USES_PFN, data->u.gpa,
-					      sizeof(struct vcpu_info));
+				     sizeof(struct vcpu_info));
 		if (!r)
 			kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@ -570,16 +570,16 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 	case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
 		if (data->u.gpa == GPA_INVALID) {
-			kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
+			kvm_gpc_deactivate(vcpu->kvm,
-						     &vcpu->arch.xen.vcpu_time_info_cache);
+					   &vcpu->arch.xen.vcpu_time_info_cache);
 			r = 0;
 			break;
 		}
-		r = kvm_gfn_to_pfn_cache_init(vcpu->kvm,
+		r = kvm_gpc_activate(vcpu->kvm,
-					      &vcpu->arch.xen.vcpu_time_info_cache,
+				     &vcpu->arch.xen.vcpu_time_info_cache,
-					      NULL, KVM_HOST_USES_PFN, data->u.gpa,
+				     NULL, KVM_HOST_USES_PFN, data->u.gpa,
-					      sizeof(struct pvclock_vcpu_time_info));
+				     sizeof(struct pvclock_vcpu_time_info));
 		if (!r)
 			kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 		break;
@ -590,16 +590,15 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 			break;
 		}
 		if (data->u.gpa == GPA_INVALID) {
-			kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
+			kvm_gpc_deactivate(vcpu->kvm,
-						     &vcpu->arch.xen.runstate_cache);
+					   &vcpu->arch.xen.runstate_cache);
 			r = 0;
 			break;
 		}
-		r = kvm_gfn_to_pfn_cache_init(vcpu->kvm,
+		r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache,
-					      &vcpu->arch.xen.runstate_cache,
+				     NULL, KVM_HOST_USES_PFN, data->u.gpa,
-					      NULL, KVM_HOST_USES_PFN, data->u.gpa,
+				     sizeof(struct vcpu_runstate_info));
 					      sizeof(struct vcpu_runstate_info));
 		break;
 	case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
@ -1667,18 +1666,18 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm,
 	case EVTCHNSTAT_ipi:
 		/* IPI  must map back to the same port# */
 		if (data->u.evtchn.deliver.port.port != data->u.evtchn.send_port)
-			goto out; /* -EINVAL */
+			goto out_noeventfd; /* -EINVAL */
 		break;
 	case EVTCHNSTAT_interdomain:
 		if (data->u.evtchn.deliver.port.port) {
 			if (data->u.evtchn.deliver.port.port >= max_evtchn_port(kvm))
-				goto out; /* -EINVAL */
+				goto out_noeventfd; /* -EINVAL */
 		} else {
 			eventfd = eventfd_ctx_fdget(data->u.evtchn.deliver.eventfd.fd);
 			if (IS_ERR(eventfd)) {
 				ret = PTR_ERR(eventfd);
-				goto out;
+				goto out_noeventfd;
 			}
 		}
 		break;
@ -1718,6 +1717,7 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm,
 out:
 	if (eventfd)
 		eventfd_ctx_put(eventfd);
 out_noeventfd:
 	kfree(evtchnfd);
 	return ret;
 }
@ -1816,7 +1816,12 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.xen.vcpu_id = vcpu->vcpu_idx;
 	vcpu->arch.xen.poll_evtchn = 0;
 	timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0);
 	kvm_gpc_init(&vcpu->arch.xen.runstate_cache);
 	kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache);
 	kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache);
 }
 void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
@ -1824,18 +1829,17 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
 	if (kvm_xen_timer_enabled(vcpu))
 		kvm_xen_stop_timer(vcpu);
-	kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
+	kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate_cache);
-				     &vcpu->arch.xen.runstate_cache);
+	kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
-	kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
+	kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_time_info_cache);
-				     &vcpu->arch.xen.vcpu_info_cache);
+
 	kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
 				     &vcpu->arch.xen.vcpu_time_info_cache);
 	del_timer_sync(&vcpu->arch.xen.poll_timer);
 }
 void kvm_xen_init_vm(struct kvm *kvm)
 {
 	idr_init(&kvm->arch.xen.evtchn_ports);
 	kvm_gpc_init(&kvm->arch.xen.shinfo_cache);
 }
 void kvm_xen_destroy_vm(struct kvm *kvm)
@ -1843,7 +1847,7 @@ void kvm_xen_destroy_vm(struct kvm *kvm)
 	struct evtchnfd *evtchnfd;
 	int i;
-	kvm_gfn_to_pfn_cache_destroy(kvm, &kvm->arch.xen.shinfo_cache);
+	kvm_gpc_deactivate(kvm, &kvm->arch.xen.shinfo_cache);
 	idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) {
 		if (!evtchnfd->deliver.port.port)
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@ -1240,8 +1240,18 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
 void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
 /**
- * kvm_gfn_to_pfn_cache_init - prepare a cached kernel mapping and HPA for a
+ * kvm_gpc_init - initialize gfn_to_pfn_cache.
- *                             given guest physical address.
+ *
 * @gpc:	   struct gfn_to_pfn_cache object.
 *
 * This sets up a gfn_to_pfn_cache by initializing locks.  Note, the cache must
 * be zero-allocated (or zeroed by the caller before init).
 */
 void kvm_gpc_init(struct gfn_to_pfn_cache *gpc);
 /**
 * kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest
 *                    physical address.
 *
 * @kvm:	   pointer to kvm instance.
 * @gpc:	   struct gfn_to_pfn_cache object.
@ -1265,9 +1275,9 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
 * kvm_gfn_to_pfn_cache_check() to ensure that the cache is valid before
 * accessing the target page.
 */
-int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
-			      struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
+		     struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
-			      gpa_t gpa, unsigned long len);
+		     gpa_t gpa, unsigned long len);
 /**
 * kvm_gfn_to_pfn_cache_check - check validity of a gfn_to_pfn_cache.
@ -1324,7 +1334,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
 void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
 /**
- * kvm_gfn_to_pfn_cache_destroy - destroy and unlink a gfn_to_pfn_cache.
+ * kvm_gpc_deactivate - deactivate and unlink a gfn_to_pfn_cache.
 *
 * @kvm:	   pointer to kvm instance.
 * @gpc:	   struct gfn_to_pfn_cache object.
@ -1332,7 +1342,7 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
 * This removes a cache from the @kvm's list to be processed on MMU notifier
 * invocation.
 */
-void kvm_gfn_to_pfn_cache_destroy(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
+void kvm_gpc_deactivate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
 void kvm_sigset_activate(struct kvm_vcpu *vcpu);
 void kvm_sigset_deactivate(struct kvm_vcpu *vcpu);
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@ -15,9 +15,13 @@
 #include <time.h>
 #include <sched.h>
 #include <signal.h>
 #include <pthread.h>
 #include <sys/eventfd.h>
 /* Defined in include/linux/kvm_types.h */
 #define GPA_INVALID		(~(ulong)0)
 #define SHINFO_REGION_GVA	0xc0000000ULL
 #define SHINFO_REGION_GPA	0xc0000000ULL
 #define SHINFO_REGION_SLOT	10
@ -44,6 +48,8 @@
 #define MIN_STEAL_TIME		50000
 #define SHINFO_RACE_TIMEOUT	2	/* seconds */
 #define __HYPERVISOR_set_timer_op	15
 #define __HYPERVISOR_sched_op		29
 #define __HYPERVISOR_event_channel_op	32
@ -126,7 +132,7 @@ struct {
 	struct kvm_irq_routing_entry entries[2];
 } irq_routes;
-bool guest_saw_irq;
+static volatile bool guest_saw_irq;
 static void evtchn_handler(struct ex_regs *regs)
 {
@ -148,6 +154,7 @@ static void guest_wait_for_irq(void)
 static void guest_code(void)
 {
 	struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
 	int i;
 	__asm__ __volatile__(
 		"sti\n"
@ -325,6 +332,49 @@ static void guest_code(void)
 	guest_wait_for_irq();
 	GUEST_SYNC(21);
 	/* Racing host ioctls */
 	guest_wait_for_irq();
 	GUEST_SYNC(22);
 	/* Racing vmcall against host ioctl */
 	ports[0] = 0;
 	p = (struct sched_poll) {
 		.ports = ports,
 		.nr_ports = 1,
 		.timeout = 0
 	};
 wait_for_timer:
 	/*
 	 * Poll for a timer wake event while the worker thread is mucking with
 	 * the shared info.  KVM XEN drops timer IRQs if the shared info is
 	 * invalid when the timer expires.  Arbitrarily poll 100 times before
 	 * giving up and asking the VMM to re-arm the timer.  100 polls should
 	 * consume enough time to beat on KVM without taking too long if the
 	 * timer IRQ is dropped due to an invalid event channel.
 	 */
 	for (i = 0; i < 100 && !guest_saw_irq; i++)
 		asm volatile("vmcall"
 			     : "=a" (rax)
 			     : "a" (__HYPERVISOR_sched_op),
 			       "D" (SCHEDOP_poll),
 			       "S" (&p)
 			     : "memory");
 	/*
 	 * Re-send the timer IRQ if it was (likely) dropped due to the timer
 	 * expiring while the event channel was invalid.
 	 */
 	if (!guest_saw_irq) {
 		GUEST_SYNC(23);
 		goto wait_for_timer;
 	}
 	guest_saw_irq = false;
 	GUEST_SYNC(24);
 }
 static int cmp_timespec(struct timespec *a, struct timespec *b)
@ -352,11 +402,36 @@ static void handle_alrm(int sig)
 	TEST_FAIL("IRQ delivery timed out");
 }
 static void *juggle_shinfo_state(void *arg)
 {
 	struct kvm_vm *vm = (struct kvm_vm *)arg;
 	struct kvm_xen_hvm_attr cache_init = {
 		.type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
 		.u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE
 	};
 	struct kvm_xen_hvm_attr cache_destroy = {
 		.type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
 		.u.shared_info.gfn = GPA_INVALID
 	};
 	for (;;) {
 		__vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_init);
 		__vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_destroy);
 		pthread_testcancel();
 	};
 	return NULL;
 }
 int main(int argc, char *argv[])
 {
 	struct timespec min_ts, max_ts, vm_ts;
 	struct kvm_vm *vm;
 	pthread_t thread;
 	bool verbose;
 	int ret;
 	verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
 			       !strncmp(argv[1], "--verbose", 10));
@ -785,6 +860,71 @@ int main(int argc, char *argv[])
 			case 21:
 				TEST_ASSERT(!evtchn_irq_expected,
 					    "Expected event channel IRQ but it didn't happen");
 				alarm(0);
 				if (verbose)
 					printf("Testing shinfo lock corruption (KVM_XEN_HVM_EVTCHN_SEND)\n");
 				ret = pthread_create(&thread, NULL, &juggle_shinfo_state, (void *)vm);
 				TEST_ASSERT(ret == 0, "pthread_create() failed: %s", strerror(ret));
 				struct kvm_irq_routing_xen_evtchn uxe = {
 					.port = 1,
 					.vcpu = vcpu->id,
 					.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
 				};
 				evtchn_irq_expected = true;
 				for (time_t t = time(NULL) + SHINFO_RACE_TIMEOUT; time(NULL) < t;)
 					__vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &uxe);
 				break;
 			case 22:
 				TEST_ASSERT(!evtchn_irq_expected,
 					    "Expected event channel IRQ but it didn't happen");
 				if (verbose)
 					printf("Testing shinfo lock corruption (SCHEDOP_poll)\n");
 				shinfo->evtchn_pending[0] = 1;
 				evtchn_irq_expected = true;
 				tmr.u.timer.expires_ns = rs->state_entry_time +
 							 SHINFO_RACE_TIMEOUT * 1000000000ULL;
 				vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
 				break;
 			case 23:
 				/*
 				 * Optional and possibly repeated sync point.
 				 * Injecting the timer IRQ may fail if the
 				 * shinfo is invalid when the timer expires.
 				 * If the timer has expired but the IRQ hasn't
 				 * been delivered, rearm the timer and retry.
 				 */
 				vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
 				/* Resume the guest if the timer is still pending. */
 				if (tmr.u.timer.expires_ns)
 					break;
 				/* All done if the IRQ was delivered. */
 				if (!evtchn_irq_expected)
 					break;
 				tmr.u.timer.expires_ns = rs->state_entry_time +
 							 SHINFO_RACE_TIMEOUT * 1000000000ULL;
 				vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
 				break;
 			case 24:
 				TEST_ASSERT(!evtchn_irq_expected,
 					    "Expected event channel IRQ but it didn't happen");
 				ret = pthread_cancel(thread);
 				TEST_ASSERT(ret == 0, "pthread_cancel() failed: %s", strerror(ret));
 				ret = pthread_join(thread, 0);
 				TEST_ASSERT(ret == 0, "pthread_join() failed: %s", strerror(ret));
 				goto done;
 			case 0x20:
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@ -5409,6 +5409,7 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file,
 			   int (*get)(void *, u64 *), int (*set)(void *, u64),
 			   const char *fmt)
 {
 	int ret;
 	struct kvm_stat_data *stat_data = (struct kvm_stat_data *)
 					  inode->i_private;
@ -5420,15 +5421,13 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file,
 	if (!kvm_get_kvm_safe(stat_data->kvm))
 		return -ENOENT;
-	if (simple_attr_open(inode, file, get,
+	ret = simple_attr_open(inode, file, get,
-		    kvm_stats_debugfs_mode(stat_data->desc) & 0222
+			       kvm_stats_debugfs_mode(stat_data->desc) & 0222
-		    ? set : NULL,
+			       ? set : NULL, fmt);
-		    fmt)) {
+	if (ret)
 		kvm_put_kvm(stat_data->kvm);
 		return -ENOMEM;
 	}
-	return 0;
+	return ret;
 }
 static int kvm_debugfs_release(struct inode *inode, struct file *file)
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@ -81,6 +81,9 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
 {
 	struct kvm_memslots *slots = kvm_memslots(kvm);
 	if (!gpc->active)
 		return false;
 	if ((gpa & ~PAGE_MASK) + len > PAGE_SIZE)
 		return false;
@ -240,10 +243,11 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
 {
 	struct kvm_memslots *slots = kvm_memslots(kvm);
 	unsigned long page_offset = gpa & ~PAGE_MASK;
-	kvm_pfn_t old_pfn, new_pfn;
+	bool unmap_old = false;
 	unsigned long old_uhva;
 	kvm_pfn_t old_pfn;
 	void *old_khva;
-	int ret = 0;
+	int ret;
 	/*
 	 * If must fit within a single page. The 'len' argument is
@ -261,6 +265,11 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
 	write_lock_irq(&gpc->lock);
 	if (!gpc->active) {
 		ret = -EINVAL;
 		goto out_unlock;
 	}
 	old_pfn = gpc->pfn;
 	old_khva = gpc->khva - offset_in_page(gpc->khva);
 	old_uhva = gpc->uhva;
@ -291,6 +300,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
 		/* If the HVA→PFN mapping was already valid, don't unmap it. */
 		old_pfn = KVM_PFN_ERR_FAULT;
 		old_khva = NULL;
 		ret = 0;
 	}
 out:
@ -305,14 +315,15 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
 		gpc->khva = NULL;
 	}
-	/* Snapshot the new pfn before dropping the lock! */
+	/* Detect a pfn change before dropping the lock! */
-	new_pfn = gpc->pfn;
+	unmap_old = (old_pfn != gpc->pfn);
 out_unlock:
 	write_unlock_irq(&gpc->lock);
 	mutex_unlock(&gpc->refresh_lock);
-	if (old_pfn != new_pfn)
+	if (unmap_old)
 		gpc_unmap_khva(kvm, old_pfn, old_khva);
 	return ret;
@ -346,42 +357,61 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
 }
 EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_unmap);
 void kvm_gpc_init(struct gfn_to_pfn_cache *gpc)
 {
 	rwlock_init(&gpc->lock);
 	mutex_init(&gpc->refresh_lock);
 }
 EXPORT_SYMBOL_GPL(kvm_gpc_init);
-int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
-			      struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
+		     struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
-			      gpa_t gpa, unsigned long len)
+		     gpa_t gpa, unsigned long len)
 {
 	WARN_ON_ONCE(!usage || (usage & KVM_GUEST_AND_HOST_USE_PFN) != usage);
 	if (!gpc->active) {
 		rwlock_init(&gpc->lock);
 		mutex_init(&gpc->refresh_lock);
 		gpc->khva = NULL;
 		gpc->pfn = KVM_PFN_ERR_FAULT;
 		gpc->uhva = KVM_HVA_ERR_BAD;
 		gpc->vcpu = vcpu;
 		gpc->usage = usage;
 		gpc->valid = false;
 		gpc->active = true;
 		spin_lock(&kvm->gpc_lock);
 		list_add(&gpc->list, &kvm->gpc_list);
 		spin_unlock(&kvm->gpc_lock);
 		/*
 		 * Activate the cache after adding it to the list, a concurrent
 		 * refresh must not establish a mapping until the cache is
 		 * reachable by mmu_notifier events.
 		 */
 		write_lock_irq(&gpc->lock);
 		gpc->active = true;
 		write_unlock_irq(&gpc->lock);
 	}
 	return kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpa, len);
 }
-EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_init);
+EXPORT_SYMBOL_GPL(kvm_gpc_activate);
-void kvm_gfn_to_pfn_cache_destroy(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
+void kvm_gpc_deactivate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
 {
 	if (gpc->active) {
 		/*
 		 * Deactivate the cache before removing it from the list, KVM
 		 * must stall mmu_notifier events until all users go away, i.e.
 		 * until gpc->lock is dropped and refresh is guaranteed to fail.
 		 */
 		write_lock_irq(&gpc->lock);
 		gpc->active = false;
 		write_unlock_irq(&gpc->lock);
 		spin_lock(&kvm->gpc_lock);
 		list_del(&gpc->list);
 		spin_unlock(&kvm->gpc_lock);
 		kvm_gfn_to_pfn_cache_unmap(kvm, gpc);
 		gpc->active = false;
 	}
 }
-EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_destroy);
+EXPORT_SYMBOL_GPL(kvm_gpc_deactivate);