PPC KVM fix for 5.5
- Fix a bug where we try to do an ultracall on a system without an ultravisor. -----BEGIN PGP SIGNATURE----- iQEzBAABCgAdFiEEv0VLfXa2m9eKuaRpnZrqdyxjcZ8FAl35s5kACgkQnZrqdyxj cZ8cwwf/UPCvZIYPSeYvzrCrlA+wlhBAh3bh47+ZXaNybOpss1xZ7QOFGkgoVBkn ES2Sdx3qgLvhmbR+nEKon8YCDVSwUj2ehwJu1nzAUzuVYw+m8OHGjdW07+go5KKi xZOndwBQGYaaWxch2O8Qw27TZU4lcVY/FNQiti5Ahg9dKK98CLyMsWnTms23ZjGD JMN/jCoMxa6godxWk3mSLaIwXj8P1P4pH3oiMFF8ngRTqyMgi1l02wim+DV10rD4 5JoAF2kzSYngDlrhhQAsSOWrsWst1X2txcHA2QsoL7ZGYUQzzKyHH6zC6dS9eWk4 ni70RLEnJj8YpsjwT52tFYokxwTPfQ== =kPkE -----END PGP SIGNATURE----- Merge tag 'kvm-ppc-fixes-5.5-1' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc into kvm-master PPC KVM fix for 5.5 - Fix a bug where we try to do an ultracall on a system without an ultravisor.
This commit is contained in:
		
						commit
						d68321dec1
					
				| @ -5,7 +5,7 @@ The Definitive KVM (Kernel-based Virtual Machine) API Documentation | ||||
| ---------------------- | ||||
| 
 | ||||
| The kvm API is a set of ioctls that are issued to control various aspects | ||||
| of a virtual machine.  The ioctls belong to three classes: | ||||
| of a virtual machine.  The ioctls belong to the following classes: | ||||
| 
 | ||||
|  - System ioctls: These query and set global attributes which affect the | ||||
|    whole kvm subsystem.  In addition a system ioctl is used to create | ||||
| @ -3002,6 +3002,9 @@ can be determined by querying the KVM_CAP_GUEST_DEBUG_HW_BPS and | ||||
| KVM_CAP_GUEST_DEBUG_HW_WPS capabilities which return a positive number | ||||
| indicating the number of supported registers. | ||||
| 
 | ||||
| For ppc, the KVM_CAP_PPC_GUEST_DEBUG_SSTEP capability indicates whether | ||||
| the single-step debug event (KVM_GUESTDBG_SINGLESTEP) is supported. | ||||
| 
 | ||||
| When debug events exit the main run loop with the reason | ||||
| KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run | ||||
| structure containing architecture specific debug information. | ||||
| @ -4146,6 +4149,24 @@ Valid values for 'action': | ||||
| #define KVM_PMU_EVENT_ALLOW 0 | ||||
| #define KVM_PMU_EVENT_DENY 1 | ||||
| 
 | ||||
| 4.121 KVM_PPC_SVM_OFF | ||||
| 
 | ||||
| Capability: basic | ||||
| Architectures: powerpc | ||||
| Type: vm ioctl | ||||
| Parameters: none | ||||
| Returns: 0 on successful completion, | ||||
| Errors: | ||||
|   EINVAL:    if ultravisor failed to terminate the secure guest | ||||
|   ENOMEM:    if hypervisor failed to allocate new radix page tables for guest | ||||
| 
 | ||||
| This ioctl is used to turn off the secure mode of the guest or transition | ||||
| the guest from secure mode to normal mode. This is invoked when the guest | ||||
| is reset. This has no effect if called for a normal guest. | ||||
| 
 | ||||
| This ioctl issues an ultravisor call to terminate the secure guest, | ||||
| unpins the VPA pages and releases all the device pages that are used to | ||||
| track the secure pages by hypervisor. | ||||
| 
 | ||||
| 5. The kvm_run structure | ||||
| ------------------------ | ||||
|  | ||||
| @ -3,9 +3,19 @@ XICS interrupt controller | ||||
| Device type supported: KVM_DEV_TYPE_XICS | ||||
| 
 | ||||
| Groups: | ||||
|   KVM_DEV_XICS_SOURCES | ||||
|   1. KVM_DEV_XICS_GRP_SOURCES | ||||
|   Attributes: One per interrupt source, indexed by the source number. | ||||
| 
 | ||||
|   2. KVM_DEV_XICS_GRP_CTRL | ||||
|   Attributes: | ||||
|     2.1 KVM_DEV_XICS_NR_SERVERS (write only) | ||||
|   The kvm_device_attr.addr points to a __u32 value which is the number of | ||||
|   interrupt server numbers (ie, highest possible vcpu id plus one). | ||||
|   Errors: | ||||
|     -EINVAL: Value greater than KVM_MAX_VCPU_ID. | ||||
|     -EFAULT: Invalid user pointer for attr->addr. | ||||
|     -EBUSY:  A vcpu is already connected to the device. | ||||
| 
 | ||||
| This device emulates the XICS (eXternal Interrupt Controller | ||||
| Specification) defined in PAPR.  The XICS has a set of interrupt | ||||
| sources, each identified by a 20-bit source number, and a set of | ||||
| @ -38,7 +48,7 @@ least-significant end of the word: | ||||
| 
 | ||||
| Each source has 64 bits of state that can be read and written using | ||||
| the KVM_GET_DEVICE_ATTR and KVM_SET_DEVICE_ATTR ioctls, specifying the | ||||
| KVM_DEV_XICS_SOURCES attribute group, with the attribute number being | ||||
| KVM_DEV_XICS_GRP_SOURCES attribute group, with the attribute number being | ||||
| the interrupt source number.  The 64 bit state word has the following | ||||
| bitfields, starting from the least-significant end of the word: | ||||
| 
 | ||||
|  | ||||
| @ -78,6 +78,14 @@ the legacy interrupt mode, referred as XICS (POWER7/8). | ||||
|     migrating the VM. | ||||
|     Errors: none | ||||
| 
 | ||||
|     1.3 KVM_DEV_XIVE_NR_SERVERS (write only) | ||||
|     The kvm_device_attr.addr points to a __u32 value which is the number of | ||||
|     interrupt server numbers (ie, highest possible vcpu id plus one). | ||||
|     Errors: | ||||
|       -EINVAL: Value greater than KVM_MAX_VCPU_ID. | ||||
|       -EFAULT: Invalid user pointer for attr->addr. | ||||
|       -EBUSY:  A vCPU is already connected to the device. | ||||
| 
 | ||||
|   2. KVM_DEV_XIVE_GRP_SOURCE (write only) | ||||
|   Initializes a new source in the XIVE device and mask it. | ||||
|   Attributes: | ||||
|  | ||||
| @ -451,6 +451,23 @@ config PPC_TRANSACTIONAL_MEM | ||||
| 	help | ||||
| 	  Support user-mode Transactional Memory on POWERPC. | ||||
| 
 | ||||
| config PPC_UV | ||||
| 	bool "Ultravisor support" | ||||
| 	depends on KVM_BOOK3S_HV_POSSIBLE | ||||
| 	select ZONE_DEVICE | ||||
| 	select DEV_PAGEMAP_OPS | ||||
| 	select DEVICE_PRIVATE | ||||
| 	select MEMORY_HOTPLUG | ||||
| 	select MEMORY_HOTREMOVE | ||||
| 	default n | ||||
| 	help | ||||
| 	  This option paravirtualizes the kernel to run in POWER platforms that | ||||
| 	  supports the Protected Execution Facility (PEF). On such platforms, | ||||
| 	  the ultravisor firmware runs at a privilege level above the | ||||
| 	  hypervisor. | ||||
| 
 | ||||
| 	  If unsure, say "N". | ||||
| 
 | ||||
| config LD_HEAD_STUB_CATCH | ||||
| 	bool "Reserve 256 bytes to cope with linker stubs in HEAD text" if EXPERT | ||||
| 	depends on PPC64 | ||||
|  | ||||
| @ -342,6 +342,15 @@ | ||||
| #define H_TLB_INVALIDATE	0xF808 | ||||
| #define H_COPY_TOFROM_GUEST	0xF80C | ||||
| 
 | ||||
| /* Flags for H_SVM_PAGE_IN */ | ||||
| #define H_PAGE_IN_SHARED        0x1 | ||||
| 
 | ||||
| /* Platform-specific hcalls used by the Ultravisor */ | ||||
| #define H_SVM_PAGE_IN		0xEF00 | ||||
| #define H_SVM_PAGE_OUT		0xEF04 | ||||
| #define H_SVM_INIT_START	0xEF08 | ||||
| #define H_SVM_INIT_DONE		0xEF0C | ||||
| 
 | ||||
| /* Values for 2nd argument to H_SET_MODE */ | ||||
| #define H_SET_MODE_RESOURCE_SET_CIABR		1 | ||||
| #define H_SET_MODE_RESOURCE_SET_DAWR		2 | ||||
|  | ||||
							
								
								
									
										74
									
								
								arch/powerpc/include/asm/kvm_book3s_uvmem.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								arch/powerpc/include/asm/kvm_book3s_uvmem.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,74 @@ | ||||
| /* SPDX-License-Identifier: GPL-2.0 */ | ||||
| #ifndef __ASM_KVM_BOOK3S_UVMEM_H__ | ||||
| #define __ASM_KVM_BOOK3S_UVMEM_H__ | ||||
| 
 | ||||
| #ifdef CONFIG_PPC_UV | ||||
| int kvmppc_uvmem_init(void); | ||||
| void kvmppc_uvmem_free(void); | ||||
| int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot); | ||||
| void kvmppc_uvmem_slot_free(struct kvm *kvm, | ||||
| 			    const struct kvm_memory_slot *slot); | ||||
| unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, | ||||
| 				   unsigned long gra, | ||||
| 				   unsigned long flags, | ||||
| 				   unsigned long page_shift); | ||||
| unsigned long kvmppc_h_svm_page_out(struct kvm *kvm, | ||||
| 				    unsigned long gra, | ||||
| 				    unsigned long flags, | ||||
| 				    unsigned long page_shift); | ||||
| unsigned long kvmppc_h_svm_init_start(struct kvm *kvm); | ||||
| unsigned long kvmppc_h_svm_init_done(struct kvm *kvm); | ||||
| int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn); | ||||
| void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, | ||||
| 			     struct kvm *kvm); | ||||
| #else | ||||
| static inline int kvmppc_uvmem_init(void) | ||||
| { | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static inline void kvmppc_uvmem_free(void) { } | ||||
| 
 | ||||
| static inline int | ||||
| kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot) | ||||
| { | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static inline void | ||||
| kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot) { } | ||||
| 
 | ||||
| static inline unsigned long | ||||
| kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gra, | ||||
| 		     unsigned long flags, unsigned long page_shift) | ||||
| { | ||||
| 	return H_UNSUPPORTED; | ||||
| } | ||||
| 
 | ||||
| static inline unsigned long | ||||
| kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gra, | ||||
| 		      unsigned long flags, unsigned long page_shift) | ||||
| { | ||||
| 	return H_UNSUPPORTED; | ||||
| } | ||||
| 
 | ||||
| static inline unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) | ||||
| { | ||||
| 	return H_UNSUPPORTED; | ||||
| } | ||||
| 
 | ||||
| static inline unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) | ||||
| { | ||||
| 	return H_UNSUPPORTED; | ||||
| } | ||||
| 
 | ||||
| static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn) | ||||
| { | ||||
| 	return -EFAULT; | ||||
| } | ||||
| 
 | ||||
| static inline void | ||||
| kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, | ||||
| 			struct kvm *kvm) { } | ||||
| #endif /* CONFIG_PPC_UV */ | ||||
| #endif /* __ASM_KVM_BOOK3S_UVMEM_H__ */ | ||||
| @ -275,6 +275,10 @@ struct kvm_hpt_info { | ||||
| 
 | ||||
| struct kvm_resize_hpt; | ||||
| 
 | ||||
| /* Flag values for kvm_arch.secure_guest */ | ||||
| #define KVMPPC_SECURE_INIT_START 0x1 /* H_SVM_INIT_START has been called */ | ||||
| #define KVMPPC_SECURE_INIT_DONE  0x2 /* H_SVM_INIT_DONE completed */ | ||||
| 
 | ||||
| struct kvm_arch { | ||||
| 	unsigned int lpid; | ||||
| 	unsigned int smt_mode;		/* # vcpus per virtual core */ | ||||
| @ -330,6 +334,8 @@ struct kvm_arch { | ||||
| #endif | ||||
| 	struct kvmppc_ops *kvm_ops; | ||||
| #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE | ||||
| 	struct mutex uvmem_lock; | ||||
| 	struct list_head uvmem_pfns; | ||||
| 	struct mutex mmu_setup_lock;	/* nests inside vcpu mutexes */ | ||||
| 	u64 l1_ptcr; | ||||
| 	int max_nested_lpid; | ||||
| @ -401,7 +407,6 @@ struct kvmppc_mmu { | ||||
| 	u32  (*mfsrin)(struct kvm_vcpu *vcpu, u32 srnum); | ||||
| 	int  (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr, | ||||
| 		      struct kvmppc_pte *pte, bool data, bool iswrite); | ||||
| 	void (*reset_msr)(struct kvm_vcpu *vcpu); | ||||
| 	void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large); | ||||
| 	int  (*esid_to_vsid)(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid); | ||||
| 	u64  (*ea_to_vp)(struct kvm_vcpu *vcpu, gva_t eaddr, bool data); | ||||
|  | ||||
| @ -271,6 +271,7 @@ struct kvmppc_ops { | ||||
| 			   union kvmppc_one_reg *val); | ||||
| 	void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); | ||||
| 	void (*vcpu_put)(struct kvm_vcpu *vcpu); | ||||
| 	void (*inject_interrupt)(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags); | ||||
| 	void (*set_msr)(struct kvm_vcpu *vcpu, u64 msr); | ||||
| 	int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu); | ||||
| 	struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned int id); | ||||
| @ -321,6 +322,7 @@ struct kvmppc_ops { | ||||
| 			       int size); | ||||
| 	int (*store_to_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr, | ||||
| 			      int size); | ||||
| 	int (*svm_off)(struct kvm *kvm); | ||||
| }; | ||||
| 
 | ||||
| extern struct kvmppc_ops *kvmppc_hv_ops; | ||||
|  | ||||
| @ -748,6 +748,18 @@ | ||||
| #define SPRN_USPRG7	0x107	/* SPRG7 userspace read */ | ||||
| #define SPRN_SRR0	0x01A	/* Save/Restore Register 0 */ | ||||
| #define SPRN_SRR1	0x01B	/* Save/Restore Register 1 */ | ||||
| 
 | ||||
| #ifdef CONFIG_PPC_BOOK3S | ||||
| /*
 | ||||
|  * Bits loaded from MSR upon interrupt. | ||||
|  * PPC (64-bit) bits 33-36,42-47 are interrupt dependent, the others are | ||||
|  * loaded from MSR. The exception is that SRESET and MCE do not always load | ||||
|  * bit 62 (RI) from MSR. Don't use PPC_BITMASK for this because 32-bit uses | ||||
|  * it. | ||||
|  */ | ||||
| #define   SRR1_MSR_BITS		(~0x783f0000UL) | ||||
| #endif | ||||
| 
 | ||||
| #define   SRR1_ISI_NOPT		0x40000000 /* ISI: Not found in hash */ | ||||
| #define   SRR1_ISI_N_OR_G	0x10000000 /* ISI: Access is no-exec or G */ | ||||
| #define   SRR1_ISI_PROT		0x08000000 /* ISI: Other protection fault */ | ||||
|  | ||||
| @ -26,8 +26,14 @@ | ||||
| #define UV_WRITE_PATE			0xF104 | ||||
| #define UV_RETURN			0xF11C | ||||
| #define UV_ESM				0xF110 | ||||
| #define UV_REGISTER_MEM_SLOT		0xF120 | ||||
| #define UV_UNREGISTER_MEM_SLOT		0xF124 | ||||
| #define UV_PAGE_IN			0xF128 | ||||
| #define UV_PAGE_OUT			0xF12C | ||||
| #define UV_SHARE_PAGE			0xF130 | ||||
| #define UV_UNSHARE_PAGE			0xF134 | ||||
| #define UV_UNSHARE_ALL_PAGES		0xF140 | ||||
| #define UV_PAGE_INVAL			0xF138 | ||||
| #define UV_SVM_TERMINATE		0xF13C | ||||
| 
 | ||||
| #endif /* _ASM_POWERPC_ULTRAVISOR_API_H */ | ||||
|  | ||||
| @ -46,4 +46,40 @@ static inline int uv_unshare_all_pages(void) | ||||
| 	return ucall_norets(UV_UNSHARE_ALL_PAGES); | ||||
| } | ||||
| 
 | ||||
| static inline int uv_page_in(u64 lpid, u64 src_ra, u64 dst_gpa, u64 flags, | ||||
| 			     u64 page_shift) | ||||
| { | ||||
| 	return ucall_norets(UV_PAGE_IN, lpid, src_ra, dst_gpa, flags, | ||||
| 			    page_shift); | ||||
| } | ||||
| 
 | ||||
| static inline int uv_page_out(u64 lpid, u64 dst_ra, u64 src_gpa, u64 flags, | ||||
| 			      u64 page_shift) | ||||
| { | ||||
| 	return ucall_norets(UV_PAGE_OUT, lpid, dst_ra, src_gpa, flags, | ||||
| 			    page_shift); | ||||
| } | ||||
| 
 | ||||
| static inline int uv_register_mem_slot(u64 lpid, u64 start_gpa, u64 size, | ||||
| 				       u64 flags, u64 slotid) | ||||
| { | ||||
| 	return ucall_norets(UV_REGISTER_MEM_SLOT, lpid, start_gpa, | ||||
| 			    size, flags, slotid); | ||||
| } | ||||
| 
 | ||||
| static inline int uv_unregister_mem_slot(u64 lpid, u64 slotid) | ||||
| { | ||||
| 	return ucall_norets(UV_UNREGISTER_MEM_SLOT, lpid, slotid); | ||||
| } | ||||
| 
 | ||||
| static inline int uv_page_inval(u64 lpid, u64 gpa, u64 page_shift) | ||||
| { | ||||
| 	return ucall_norets(UV_PAGE_INVAL, lpid, gpa, page_shift); | ||||
| } | ||||
| 
 | ||||
| static inline int uv_svm_terminate(u64 lpid) | ||||
| { | ||||
| 	return ucall_norets(UV_SVM_TERMINATE, lpid); | ||||
| } | ||||
| 
 | ||||
| #endif	/* _ASM_POWERPC_ULTRAVISOR_H */ | ||||
|  | ||||
| @ -667,6 +667,8 @@ struct kvm_ppc_cpu_char { | ||||
| 
 | ||||
| /* PPC64 eXternal Interrupt Controller Specification */ | ||||
| #define KVM_DEV_XICS_GRP_SOURCES	1	/* 64-bit source attributes */ | ||||
| #define KVM_DEV_XICS_GRP_CTRL		2 | ||||
| #define   KVM_DEV_XICS_NR_SERVERS	1 | ||||
| 
 | ||||
| /* Layout of 64-bit source attribute values */ | ||||
| #define  KVM_XICS_DESTINATION_SHIFT	0 | ||||
| @ -683,6 +685,7 @@ struct kvm_ppc_cpu_char { | ||||
| #define KVM_DEV_XIVE_GRP_CTRL		1 | ||||
| #define   KVM_DEV_XIVE_RESET		1 | ||||
| #define   KVM_DEV_XIVE_EQ_SYNC		2 | ||||
| #define   KVM_DEV_XIVE_NR_SERVERS	3 | ||||
| #define KVM_DEV_XIVE_GRP_SOURCE		2	/* 64-bit source identifier */ | ||||
| #define KVM_DEV_XIVE_GRP_SOURCE_CONFIG	3	/* 64-bit source identifier */ | ||||
| #define KVM_DEV_XIVE_GRP_EQ_CONFIG	4	/* 64-bit EQ identifier */ | ||||
|  | ||||
| @ -71,6 +71,9 @@ kvm-hv-y += \ | ||||
| 	book3s_64_mmu_radix.o \
 | ||||
| 	book3s_hv_nested.o | ||||
| 
 | ||||
| kvm-hv-$(CONFIG_PPC_UV) += \
 | ||||
| 	book3s_hv_uvmem.o | ||||
| 
 | ||||
| kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
 | ||||
| 	book3s_hv_tm.o | ||||
| 
 | ||||
|  | ||||
| @ -74,27 +74,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | ||||
| 	{ NULL } | ||||
| }; | ||||
| 
 | ||||
| void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) { | ||||
| 		ulong pc = kvmppc_get_pc(vcpu); | ||||
| 		ulong lr = kvmppc_get_lr(vcpu); | ||||
| 		if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) | ||||
| 			kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK); | ||||
| 		if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) | ||||
| 			kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK); | ||||
| 		vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK; | ||||
| 	} | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(kvmppc_unfixup_split_real); | ||||
| 
 | ||||
| static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	if (!is_kvmppc_hv_enabled(vcpu->kvm)) | ||||
| 		return to_book3s(vcpu)->hior; | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu, | ||||
| 			unsigned long pending_now, unsigned long old_pending) | ||||
| { | ||||
| @ -134,11 +113,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) | ||||
| 
 | ||||
| void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) | ||||
| { | ||||
| 	kvmppc_unfixup_split_real(vcpu); | ||||
| 	kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu)); | ||||
| 	kvmppc_set_srr1(vcpu, (kvmppc_get_msr(vcpu) & ~0x783f0000ul) | flags); | ||||
| 	kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec); | ||||
| 	vcpu->arch.mmu.reset_msr(vcpu); | ||||
| 	vcpu->kvm->arch.kvm_ops->inject_interrupt(vcpu, vec, flags); | ||||
| } | ||||
| 
 | ||||
| static int kvmppc_book3s_vec2irqprio(unsigned int vec) | ||||
|  | ||||
| @ -32,4 +32,7 @@ extern void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val); | ||||
| static inline void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) {} | ||||
| #endif | ||||
| 
 | ||||
| extern void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr); | ||||
| extern void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags); | ||||
| 
 | ||||
| #endif | ||||
|  | ||||
| @ -90,11 +90,6 @@ static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, | ||||
| 	return (((u64)eaddr >> 12) & 0xffff) | (vsid << 16); | ||||
| } | ||||
| 
 | ||||
| static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	kvmppc_set_msr(vcpu, 0); | ||||
| } | ||||
| 
 | ||||
| static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvm_vcpu *vcpu, | ||||
| 				      u32 sre, gva_t eaddr, | ||||
| 				      bool primary) | ||||
| @ -406,7 +401,6 @@ void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu) | ||||
| 	mmu->mtsrin = kvmppc_mmu_book3s_32_mtsrin; | ||||
| 	mmu->mfsrin = kvmppc_mmu_book3s_32_mfsrin; | ||||
| 	mmu->xlate = kvmppc_mmu_book3s_32_xlate; | ||||
| 	mmu->reset_msr = kvmppc_mmu_book3s_32_reset_msr; | ||||
| 	mmu->tlbie = kvmppc_mmu_book3s_32_tlbie; | ||||
| 	mmu->esid_to_vsid = kvmppc_mmu_book3s_32_esid_to_vsid; | ||||
| 	mmu->ea_to_vp = kvmppc_mmu_book3s_32_ea_to_vp; | ||||
|  | ||||
| @ -24,20 +24,6 @@ | ||||
| #define dprintk(X...) do { } while(0) | ||||
| #endif | ||||
| 
 | ||||
| static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	unsigned long msr = vcpu->arch.intr_msr; | ||||
| 	unsigned long cur_msr = kvmppc_get_msr(vcpu); | ||||
| 
 | ||||
| 	/* If transactional, change to suspend mode on IRQ delivery */ | ||||
| 	if (MSR_TM_TRANSACTIONAL(cur_msr)) | ||||
| 		msr |= MSR_TS_S; | ||||
| 	else | ||||
| 		msr |= cur_msr & MSR_TS_MASK; | ||||
| 
 | ||||
| 	kvmppc_set_msr(vcpu, msr); | ||||
| } | ||||
| 
 | ||||
| static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe( | ||||
| 				struct kvm_vcpu *vcpu, | ||||
| 				gva_t eaddr) | ||||
| @ -676,7 +662,6 @@ void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu) | ||||
| 	mmu->slbie = kvmppc_mmu_book3s_64_slbie; | ||||
| 	mmu->slbia = kvmppc_mmu_book3s_64_slbia; | ||||
| 	mmu->xlate = kvmppc_mmu_book3s_64_xlate; | ||||
| 	mmu->reset_msr = kvmppc_mmu_book3s_64_reset_msr; | ||||
| 	mmu->tlbie = kvmppc_mmu_book3s_64_tlbie; | ||||
| 	mmu->esid_to_vsid = kvmppc_mmu_book3s_64_esid_to_vsid; | ||||
| 	mmu->ea_to_vp = kvmppc_mmu_book3s_64_ea_to_vp; | ||||
|  | ||||
| @ -275,18 +275,6 @@ int kvmppc_mmu_hv_init(void) | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	unsigned long msr = vcpu->arch.intr_msr; | ||||
| 
 | ||||
| 	/* If transactional, change to suspend mode on IRQ delivery */ | ||||
| 	if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr)) | ||||
| 		msr |= MSR_TS_S; | ||||
| 	else | ||||
| 		msr |= vcpu->arch.shregs.msr & MSR_TS_MASK; | ||||
| 	kvmppc_set_msr(vcpu, msr); | ||||
| } | ||||
| 
 | ||||
| static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, | ||||
| 				long pte_index, unsigned long pteh, | ||||
| 				unsigned long ptel, unsigned long *pte_idx_ret) | ||||
| @ -508,6 +496,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||||
| 	struct vm_area_struct *vma; | ||||
| 	unsigned long rcbits; | ||||
| 	long mmio_update; | ||||
| 	struct mm_struct *mm; | ||||
| 
 | ||||
| 	if (kvm_is_radix(kvm)) | ||||
| 		return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr); | ||||
| @ -584,6 +573,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||||
| 	is_ci = false; | ||||
| 	pfn = 0; | ||||
| 	page = NULL; | ||||
| 	mm = current->mm; | ||||
| 	pte_size = PAGE_SIZE; | ||||
| 	writing = (dsisr & DSISR_ISSTORE) != 0; | ||||
| 	/* If writing != 0, then the HPTE must allow writing, if we get here */ | ||||
| @ -592,8 +582,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||||
| 	npages = get_user_pages_fast(hva, 1, writing ? FOLL_WRITE : 0, pages); | ||||
| 	if (npages < 1) { | ||||
| 		/* Check if it's an I/O mapping */ | ||||
| 		down_read(¤t->mm->mmap_sem); | ||||
| 		vma = find_vma(current->mm, hva); | ||||
| 		down_read(&mm->mmap_sem); | ||||
| 		vma = find_vma(mm, hva); | ||||
| 		if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end && | ||||
| 		    (vma->vm_flags & VM_PFNMAP)) { | ||||
| 			pfn = vma->vm_pgoff + | ||||
| @ -602,7 +592,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||||
| 			is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot)))); | ||||
| 			write_ok = vma->vm_flags & VM_WRITE; | ||||
| 		} | ||||
| 		up_read(¤t->mm->mmap_sem); | ||||
| 		up_read(&mm->mmap_sem); | ||||
| 		if (!pfn) | ||||
| 			goto out_put; | ||||
| 	} else { | ||||
| @ -621,8 +611,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||||
| 			 * hugepage split and collapse. | ||||
| 			 */ | ||||
| 			local_irq_save(flags); | ||||
| 			ptep = find_current_mm_pte(current->mm->pgd, | ||||
| 						   hva, NULL, NULL); | ||||
| 			ptep = find_current_mm_pte(mm->pgd, hva, NULL, NULL); | ||||
| 			if (ptep) { | ||||
| 				pte = kvmppc_read_update_linux_pte(ptep, 1); | ||||
| 				if (__pte_write(pte)) | ||||
| @ -2000,7 +1989,7 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf) | ||||
| 	ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC); | ||||
| 	if (ret < 0) { | ||||
| 		kfree(ctx); | ||||
| 		kvm_put_kvm(kvm); | ||||
| 		kvm_put_kvm_no_destroy(kvm); | ||||
| 		return ret; | ||||
| 	} | ||||
| 
 | ||||
| @ -2161,7 +2150,6 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) | ||||
| 	vcpu->arch.slb_nr = 32;		/* POWER7/POWER8 */ | ||||
| 
 | ||||
| 	mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; | ||||
| 	mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr; | ||||
| 
 | ||||
| 	vcpu->arch.hflags |= BOOK3S_HFLAG_SLB; | ||||
| } | ||||
|  | ||||
| @ -19,6 +19,8 @@ | ||||
| #include <asm/pgtable.h> | ||||
| #include <asm/pgalloc.h> | ||||
| #include <asm/pte-walk.h> | ||||
| #include <asm/ultravisor.h> | ||||
| #include <asm/kvm_book3s_uvmem.h> | ||||
| 
 | ||||
| /*
 | ||||
|  * Supported radix tree geometry. | ||||
| @ -915,6 +917,9 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||||
| 	if (!(dsisr & DSISR_PRTABLE_FAULT)) | ||||
| 		gpa |= ea & 0xfff; | ||||
| 
 | ||||
| 	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) | ||||
| 		return kvmppc_send_page_to_uv(kvm, gfn); | ||||
| 
 | ||||
| 	/* Get the corresponding memslot */ | ||||
| 	memslot = gfn_to_memslot(kvm, gfn); | ||||
| 
 | ||||
| @ -972,6 +977,11 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||||
| 	unsigned long gpa = gfn << PAGE_SHIFT; | ||||
| 	unsigned int shift; | ||||
| 
 | ||||
| 	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) { | ||||
| 		uv_page_inval(kvm->arch.lpid, gpa, PAGE_SHIFT); | ||||
| 		return 0; | ||||
| 	} | ||||
| 
 | ||||
| 	ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); | ||||
| 	if (ptep && pte_present(*ptep)) | ||||
| 		kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot, | ||||
| @ -989,6 +999,9 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||||
| 	int ref = 0; | ||||
| 	unsigned long old, *rmapp; | ||||
| 
 | ||||
| 	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) | ||||
| 		return ref; | ||||
| 
 | ||||
| 	ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); | ||||
| 	if (ptep && pte_present(*ptep) && pte_young(*ptep)) { | ||||
| 		old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0, | ||||
| @ -1013,6 +1026,9 @@ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, | ||||
| 	unsigned int shift; | ||||
| 	int ref = 0; | ||||
| 
 | ||||
| 	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) | ||||
| 		return ref; | ||||
| 
 | ||||
| 	ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); | ||||
| 	if (ptep && pte_present(*ptep) && pte_young(*ptep)) | ||||
| 		ref = 1; | ||||
| @ -1030,6 +1046,9 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm, | ||||
| 	int ret = 0; | ||||
| 	unsigned long old, *rmapp; | ||||
| 
 | ||||
| 	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) | ||||
| 		return ret; | ||||
| 
 | ||||
| 	ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); | ||||
| 	if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) { | ||||
| 		ret = 1; | ||||
| @ -1082,6 +1101,12 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm, | ||||
| 	unsigned long gpa; | ||||
| 	unsigned int shift; | ||||
| 
 | ||||
| 	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START) | ||||
| 		kvmppc_uvmem_drop_pages(memslot, kvm); | ||||
| 
 | ||||
| 	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) | ||||
| 		return; | ||||
| 
 | ||||
| 	gpa = memslot->base_gfn << PAGE_SHIFT; | ||||
| 	spin_lock(&kvm->mmu_lock); | ||||
| 	for (n = memslot->npages; n; --n) { | ||||
|  | ||||
| @ -317,7 +317,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | ||||
| 	if (ret >= 0) | ||||
| 		list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); | ||||
| 	else | ||||
| 		kvm_put_kvm(kvm); | ||||
| 		kvm_put_kvm_no_destroy(kvm); | ||||
| 
 | ||||
| 	mutex_unlock(&kvm->lock); | ||||
| 
 | ||||
|  | ||||
| @ -72,6 +72,9 @@ | ||||
| #include <asm/xics.h> | ||||
| #include <asm/xive.h> | ||||
| #include <asm/hw_breakpoint.h> | ||||
| #include <asm/kvm_host.h> | ||||
| #include <asm/kvm_book3s_uvmem.h> | ||||
| #include <asm/ultravisor.h> | ||||
| 
 | ||||
| #include "book3s.h" | ||||
| 
 | ||||
| @ -133,7 +136,6 @@ static inline bool nesting_enabled(struct kvm *kvm) | ||||
| /* If set, the threads on each CPU core have to be in the same MMU mode */ | ||||
| static bool no_mixing_hpt_and_radix; | ||||
| 
 | ||||
| static void kvmppc_end_cede(struct kvm_vcpu *vcpu); | ||||
| static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); | ||||
| 
 | ||||
| /*
 | ||||
| @ -338,18 +340,6 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) | ||||
| 	spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); | ||||
| } | ||||
| 
 | ||||
| static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) | ||||
| { | ||||
| 	/*
 | ||||
| 	 * Check for illegal transactional state bit combination | ||||
| 	 * and if we find it, force the TS field to a safe state. | ||||
| 	 */ | ||||
| 	if ((msr & MSR_TS_MASK) == MSR_TS_MASK) | ||||
| 		msr &= ~MSR_TS_MASK; | ||||
| 	vcpu->arch.shregs.msr = msr; | ||||
| 	kvmppc_end_cede(vcpu); | ||||
| } | ||||
| 
 | ||||
| static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) | ||||
| { | ||||
| 	vcpu->arch.pvr = pvr; | ||||
| @ -792,6 +782,11 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, | ||||
| 		vcpu->arch.dawr  = value1; | ||||
| 		vcpu->arch.dawrx = value2; | ||||
| 		return H_SUCCESS; | ||||
| 	case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE: | ||||
| 		/* KVM does not support mflags=2 (AIL=2) */ | ||||
| 		if (mflags != 0 && mflags != 3) | ||||
| 			return H_UNSUPPORTED_FLAG_START; | ||||
| 		return H_TOO_HARD; | ||||
| 	default: | ||||
| 		return H_TOO_HARD; | ||||
| 	} | ||||
| @ -1078,6 +1073,25 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) | ||||
| 					 kvmppc_get_gpr(vcpu, 5), | ||||
| 					 kvmppc_get_gpr(vcpu, 6)); | ||||
| 		break; | ||||
| 	case H_SVM_PAGE_IN: | ||||
| 		ret = kvmppc_h_svm_page_in(vcpu->kvm, | ||||
| 					   kvmppc_get_gpr(vcpu, 4), | ||||
| 					   kvmppc_get_gpr(vcpu, 5), | ||||
| 					   kvmppc_get_gpr(vcpu, 6)); | ||||
| 		break; | ||||
| 	case H_SVM_PAGE_OUT: | ||||
| 		ret = kvmppc_h_svm_page_out(vcpu->kvm, | ||||
| 					    kvmppc_get_gpr(vcpu, 4), | ||||
| 					    kvmppc_get_gpr(vcpu, 5), | ||||
| 					    kvmppc_get_gpr(vcpu, 6)); | ||||
| 		break; | ||||
| 	case H_SVM_INIT_START: | ||||
| 		ret = kvmppc_h_svm_init_start(vcpu->kvm); | ||||
| 		break; | ||||
| 	case H_SVM_INIT_DONE: | ||||
| 		ret = kvmppc_h_svm_init_done(vcpu->kvm); | ||||
| 		break; | ||||
| 
 | ||||
| 	default: | ||||
| 		return RESUME_HOST; | ||||
| 	} | ||||
| @ -2454,15 +2468,6 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu) | ||||
| 	vcpu->arch.timer_running = 1; | ||||
| } | ||||
| 
 | ||||
| static void kvmppc_end_cede(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	vcpu->arch.ceded = 0; | ||||
| 	if (vcpu->arch.timer_running) { | ||||
| 		hrtimer_try_to_cancel(&vcpu->arch.dec_timer); | ||||
| 		vcpu->arch.timer_running = 0; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| extern int __kvmppc_vcore_entry(void); | ||||
| 
 | ||||
| static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, | ||||
| @ -4511,6 +4516,29 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, | ||||
| 	if (change == KVM_MR_FLAGS_ONLY && kvm_is_radix(kvm) && | ||||
| 	    ((new->flags ^ old->flags) & KVM_MEM_LOG_DIRTY_PAGES)) | ||||
| 		kvmppc_radix_flush_memslot(kvm, old); | ||||
| 	/*
 | ||||
| 	 * If UV hasn't yet called H_SVM_INIT_START, don't register memslots. | ||||
| 	 */ | ||||
| 	if (!kvm->arch.secure_guest) | ||||
| 		return; | ||||
| 
 | ||||
| 	switch (change) { | ||||
| 	case KVM_MR_CREATE: | ||||
| 		if (kvmppc_uvmem_slot_init(kvm, new)) | ||||
| 			return; | ||||
| 		uv_register_mem_slot(kvm->arch.lpid, | ||||
| 				     new->base_gfn << PAGE_SHIFT, | ||||
| 				     new->npages * PAGE_SIZE, | ||||
| 				     0, new->id); | ||||
| 		break; | ||||
| 	case KVM_MR_DELETE: | ||||
| 		uv_unregister_mem_slot(kvm->arch.lpid, old->id); | ||||
| 		kvmppc_uvmem_slot_free(kvm, old); | ||||
| 		break; | ||||
| 	default: | ||||
| 		/* TODO: Handle KVM_MR_MOVE */ | ||||
| 		break; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
| @ -4784,6 +4812,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) | ||||
| 	char buf[32]; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	mutex_init(&kvm->arch.uvmem_lock); | ||||
| 	INIT_LIST_HEAD(&kvm->arch.uvmem_pfns); | ||||
| 	mutex_init(&kvm->arch.mmu_setup_lock); | ||||
| 
 | ||||
| 	/* Allocate the guest's logical partition ID */ | ||||
| @ -4953,8 +4983,11 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) | ||||
| 		if (nesting_enabled(kvm)) | ||||
| 			kvmhv_release_all_nested(kvm); | ||||
| 		kvm->arch.process_table = 0; | ||||
| 		if (kvm->arch.secure_guest) | ||||
| 			uv_svm_terminate(kvm->arch.lpid); | ||||
| 		kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0); | ||||
| 	} | ||||
| 
 | ||||
| 	kvmppc_free_lpid(kvm->arch.lpid); | ||||
| 
 | ||||
| 	kvmppc_free_pimap(kvm); | ||||
| @ -5394,6 +5427,94 @@ static int kvmhv_store_to_eaddr(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr, | ||||
| 	return rc; | ||||
| } | ||||
| 
 | ||||
| static void unpin_vpa_reset(struct kvm *kvm, struct kvmppc_vpa *vpa) | ||||
| { | ||||
| 	unpin_vpa(kvm, vpa); | ||||
| 	vpa->gpa = 0; | ||||
| 	vpa->pinned_addr = NULL; | ||||
| 	vpa->dirty = false; | ||||
| 	vpa->update_pending = 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  *  IOCTL handler to turn off secure mode of guest | ||||
|  * | ||||
|  * - Release all device pages | ||||
|  * - Issue ucall to terminate the guest on the UV side | ||||
|  * - Unpin the VPA pages. | ||||
|  * - Reinit the partition scoped page tables | ||||
|  */ | ||||
| static int kvmhv_svm_off(struct kvm *kvm) | ||||
| { | ||||
| 	struct kvm_vcpu *vcpu; | ||||
| 	int mmu_was_ready; | ||||
| 	int srcu_idx; | ||||
| 	int ret = 0; | ||||
| 	int i; | ||||
| 
 | ||||
| 	if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) | ||||
| 		return ret; | ||||
| 
 | ||||
| 	mutex_lock(&kvm->arch.mmu_setup_lock); | ||||
| 	mmu_was_ready = kvm->arch.mmu_ready; | ||||
| 	if (kvm->arch.mmu_ready) { | ||||
| 		kvm->arch.mmu_ready = 0; | ||||
| 		/* order mmu_ready vs. vcpus_running */ | ||||
| 		smp_mb(); | ||||
| 		if (atomic_read(&kvm->arch.vcpus_running)) { | ||||
| 			kvm->arch.mmu_ready = 1; | ||||
| 			ret = -EBUSY; | ||||
| 			goto out; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	srcu_idx = srcu_read_lock(&kvm->srcu); | ||||
| 	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { | ||||
| 		struct kvm_memory_slot *memslot; | ||||
| 		struct kvm_memslots *slots = __kvm_memslots(kvm, i); | ||||
| 
 | ||||
| 		if (!slots) | ||||
| 			continue; | ||||
| 
 | ||||
| 		kvm_for_each_memslot(memslot, slots) { | ||||
| 			kvmppc_uvmem_drop_pages(memslot, kvm); | ||||
| 			uv_unregister_mem_slot(kvm->arch.lpid, memslot->id); | ||||
| 		} | ||||
| 	} | ||||
| 	srcu_read_unlock(&kvm->srcu, srcu_idx); | ||||
| 
 | ||||
| 	ret = uv_svm_terminate(kvm->arch.lpid); | ||||
| 	if (ret != U_SUCCESS) { | ||||
| 		ret = -EINVAL; | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * When secure guest is reset, all the guest pages are sent | ||||
| 	 * to UV via UV_PAGE_IN before the non-boot vcpus get a | ||||
| 	 * chance to run and unpin their VPA pages. Unpinning of all | ||||
| 	 * VPA pages is done here explicitly so that VPA pages | ||||
| 	 * can be migrated to the secure side. | ||||
| 	 * | ||||
| 	 * This is required to for the secure SMP guest to reboot | ||||
| 	 * correctly. | ||||
| 	 */ | ||||
| 	kvm_for_each_vcpu(i, vcpu, kvm) { | ||||
| 		spin_lock(&vcpu->arch.vpa_update_lock); | ||||
| 		unpin_vpa_reset(kvm, &vcpu->arch.dtl); | ||||
| 		unpin_vpa_reset(kvm, &vcpu->arch.slb_shadow); | ||||
| 		unpin_vpa_reset(kvm, &vcpu->arch.vpa); | ||||
| 		spin_unlock(&vcpu->arch.vpa_update_lock); | ||||
| 	} | ||||
| 
 | ||||
| 	kvmppc_setup_partition_table(kvm); | ||||
| 	kvm->arch.secure_guest = 0; | ||||
| 	kvm->arch.mmu_ready = mmu_was_ready; | ||||
| out: | ||||
| 	mutex_unlock(&kvm->arch.mmu_setup_lock); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static struct kvmppc_ops kvm_ops_hv = { | ||||
| 	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, | ||||
| 	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, | ||||
| @ -5401,6 +5522,7 @@ static struct kvmppc_ops kvm_ops_hv = { | ||||
| 	.set_one_reg = kvmppc_set_one_reg_hv, | ||||
| 	.vcpu_load   = kvmppc_core_vcpu_load_hv, | ||||
| 	.vcpu_put    = kvmppc_core_vcpu_put_hv, | ||||
| 	.inject_interrupt = kvmppc_inject_interrupt_hv, | ||||
| 	.set_msr     = kvmppc_set_msr_hv, | ||||
| 	.vcpu_run    = kvmppc_vcpu_run_hv, | ||||
| 	.vcpu_create = kvmppc_core_vcpu_create_hv, | ||||
| @ -5436,6 +5558,7 @@ static struct kvmppc_ops kvm_ops_hv = { | ||||
| 	.enable_nested = kvmhv_enable_nested, | ||||
| 	.load_from_eaddr = kvmhv_load_from_eaddr, | ||||
| 	.store_to_eaddr = kvmhv_store_to_eaddr, | ||||
| 	.svm_off = kvmhv_svm_off, | ||||
| }; | ||||
| 
 | ||||
| static int kvm_init_subcore_bitmap(void) | ||||
| @ -5544,11 +5667,16 @@ static int kvmppc_book3s_init_hv(void) | ||||
| 			no_mixing_hpt_and_radix = true; | ||||
| 	} | ||||
| 
 | ||||
| 	r = kvmppc_uvmem_init(); | ||||
| 	if (r < 0) | ||||
| 		pr_err("KVM-HV: kvmppc_uvmem_init failed %d\n", r); | ||||
| 
 | ||||
| 	return r; | ||||
| } | ||||
| 
 | ||||
| static void kvmppc_book3s_exit_hv(void) | ||||
| { | ||||
| 	kvmppc_uvmem_free(); | ||||
| 	kvmppc_free_host_rm_ops(); | ||||
| 	if (kvmppc_radix_possible()) | ||||
| 		kvmppc_radix_exit(); | ||||
|  | ||||
| @ -755,6 +755,71 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip) | ||||
| 	local_paca->kvm_hstate.kvm_split_mode = NULL; | ||||
| } | ||||
| 
 | ||||
| static void kvmppc_end_cede(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	vcpu->arch.ceded = 0; | ||||
| 	if (vcpu->arch.timer_running) { | ||||
| 		hrtimer_try_to_cancel(&vcpu->arch.dec_timer); | ||||
| 		vcpu->arch.timer_running = 0; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) | ||||
| { | ||||
| 	/*
 | ||||
| 	 * Check for illegal transactional state bit combination | ||||
| 	 * and if we find it, force the TS field to a safe state. | ||||
| 	 */ | ||||
| 	if ((msr & MSR_TS_MASK) == MSR_TS_MASK) | ||||
| 		msr &= ~MSR_TS_MASK; | ||||
| 	vcpu->arch.shregs.msr = msr; | ||||
| 	kvmppc_end_cede(vcpu); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(kvmppc_set_msr_hv); | ||||
| 
 | ||||
| static void inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags) | ||||
| { | ||||
| 	unsigned long msr, pc, new_msr, new_pc; | ||||
| 
 | ||||
| 	msr = kvmppc_get_msr(vcpu); | ||||
| 	pc = kvmppc_get_pc(vcpu); | ||||
| 	new_msr = vcpu->arch.intr_msr; | ||||
| 	new_pc = vec; | ||||
| 
 | ||||
| 	/* If transactional, change to suspend mode on IRQ delivery */ | ||||
| 	if (MSR_TM_TRANSACTIONAL(msr)) | ||||
| 		new_msr |= MSR_TS_S; | ||||
| 	else | ||||
| 		new_msr |= msr & MSR_TS_MASK; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Perform MSR and PC adjustment for LPCR[AIL]=3 if it is set and | ||||
| 	 * applicable. AIL=2 is not supported. | ||||
| 	 * | ||||
| 	 * AIL does not apply to SRESET, MCE, or HMI (which is never | ||||
| 	 * delivered to the guest), and does not apply if IR=0 or DR=0. | ||||
| 	 */ | ||||
| 	if (vec != BOOK3S_INTERRUPT_SYSTEM_RESET && | ||||
| 	    vec != BOOK3S_INTERRUPT_MACHINE_CHECK && | ||||
| 	    (vcpu->arch.vcore->lpcr & LPCR_AIL) == LPCR_AIL_3 && | ||||
| 	    (msr & (MSR_IR|MSR_DR)) == (MSR_IR|MSR_DR) ) { | ||||
| 		new_msr |= MSR_IR | MSR_DR; | ||||
| 		new_pc += 0xC000000000004000ULL; | ||||
| 	} | ||||
| 
 | ||||
| 	kvmppc_set_srr0(vcpu, pc); | ||||
| 	kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags); | ||||
| 	kvmppc_set_pc(vcpu, new_pc); | ||||
| 	vcpu->arch.shregs.msr = new_msr; | ||||
| } | ||||
| 
 | ||||
| void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags) | ||||
| { | ||||
| 	inject_interrupt(vcpu, vec, srr1_flags); | ||||
| 	kvmppc_end_cede(vcpu); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(kvmppc_inject_interrupt_hv); | ||||
| 
 | ||||
| /*
 | ||||
|  * Is there a PRIV_DOORBELL pending for the guest (on POWER9)? | ||||
|  * Can we inject a Decrementer or a External interrupt? | ||||
| @ -762,7 +827,6 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip) | ||||
| void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	int ext; | ||||
| 	unsigned long vec = 0; | ||||
| 	unsigned long lpcr; | ||||
| 
 | ||||
| 	/* Insert EXTERNAL bit into LPCR at the MER bit position */ | ||||
| @ -774,26 +838,16 @@ void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu) | ||||
| 
 | ||||
| 	if (vcpu->arch.shregs.msr & MSR_EE) { | ||||
| 		if (ext) { | ||||
| 			vec = BOOK3S_INTERRUPT_EXTERNAL; | ||||
| 			inject_interrupt(vcpu, BOOK3S_INTERRUPT_EXTERNAL, 0); | ||||
| 		} else { | ||||
| 			long int dec = mfspr(SPRN_DEC); | ||||
| 			if (!(lpcr & LPCR_LD)) | ||||
| 				dec = (int) dec; | ||||
| 			if (dec < 0) | ||||
| 				vec = BOOK3S_INTERRUPT_DECREMENTER; | ||||
| 				inject_interrupt(vcpu, | ||||
| 					BOOK3S_INTERRUPT_DECREMENTER, 0); | ||||
| 		} | ||||
| 	} | ||||
| 	if (vec) { | ||||
| 		unsigned long msr, old_msr = vcpu->arch.shregs.msr; | ||||
| 
 | ||||
| 		kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu)); | ||||
| 		kvmppc_set_srr1(vcpu, old_msr); | ||||
| 		kvmppc_set_pc(vcpu, vec); | ||||
| 		msr = vcpu->arch.intr_msr; | ||||
| 		if (MSR_TM_ACTIVE(old_msr)) | ||||
| 			msr |= MSR_TS_S; | ||||
| 		vcpu->arch.shregs.msr = msr; | ||||
| 	} | ||||
| 
 | ||||
| 	if (vcpu->arch.doorbell_request) { | ||||
| 		mtspr(SPRN_DPDES, 1); | ||||
|  | ||||
| @ -1186,7 +1186,7 @@ static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu, | ||||
| forward_to_l1: | ||||
| 	vcpu->arch.fault_dsisr = flags; | ||||
| 	if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) { | ||||
| 		vcpu->arch.shregs.msr &= ~0x783f0000ul; | ||||
| 		vcpu->arch.shregs.msr &= SRR1_MSR_BITS; | ||||
| 		vcpu->arch.shregs.msr |= flags; | ||||
| 	} | ||||
| 	return RESUME_HOST; | ||||
|  | ||||
							
								
								
									
										785
									
								
								arch/powerpc/kvm/book3s_hv_uvmem.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										785
									
								
								arch/powerpc/kvm/book3s_hv_uvmem.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,785 @@ | ||||
| // SPDX-License-Identifier: GPL-2.0
 | ||||
| /*
 | ||||
|  * Secure pages management: Migration of pages between normal and secure | ||||
|  * memory of KVM guests. | ||||
|  * | ||||
|  * Copyright 2018 Bharata B Rao, IBM Corp. <bharata@linux.ibm.com> | ||||
|  */ | ||||
| 
 | ||||
| /*
 | ||||
|  * A pseries guest can be run as secure guest on Ultravisor-enabled | ||||
|  * POWER platforms. On such platforms, this driver will be used to manage | ||||
|  * the movement of guest pages between the normal memory managed by | ||||
|  * hypervisor (HV) and secure memory managed by Ultravisor (UV). | ||||
|  * | ||||
|  * The page-in or page-out requests from UV will come to HV as hcalls and | ||||
|  * HV will call back into UV via ultracalls to satisfy these page requests. | ||||
|  * | ||||
|  * Private ZONE_DEVICE memory equal to the amount of secure memory | ||||
|  * available in the platform for running secure guests is hotplugged. | ||||
|  * Whenever a page belonging to the guest becomes secure, a page from this | ||||
|  * private device memory is used to represent and track that secure page | ||||
|  * on the HV side. Some pages (like virtio buffers, VPA pages etc) are | ||||
|  * shared between UV and HV. However such pages aren't represented by | ||||
|  * device private memory and mappings to shared memory exist in both | ||||
|  * UV and HV page tables. | ||||
|  */ | ||||
| 
 | ||||
| /*
 | ||||
|  * Notes on locking | ||||
|  * | ||||
|  * kvm->arch.uvmem_lock is a per-guest lock that prevents concurrent | ||||
|  * page-in and page-out requests for the same GPA. Concurrent accesses | ||||
|  * can either come via UV (guest vCPUs requesting for same page) | ||||
|  * or when HV and guest simultaneously access the same page. | ||||
|  * This mutex serializes the migration of page from HV(normal) to | ||||
|  * UV(secure) and vice versa. So the serialization points are around | ||||
|  * migrate_vma routines and page-in/out routines. | ||||
|  * | ||||
|  * Per-guest mutex comes with a cost though. Mainly it serializes the | ||||
|  * fault path as page-out can occur when HV faults on accessing secure | ||||
|  * guest pages. Currently UV issues page-in requests for all the guest | ||||
|  * PFNs one at a time during early boot (UV_ESM uvcall), so this is | ||||
|  * not a cause for concern. Also currently the number of page-outs caused | ||||
|  * by HV touching secure pages is very very low. If an when UV supports | ||||
|  * overcommitting, then we might see concurrent guest driven page-outs. | ||||
|  * | ||||
|  * Locking order | ||||
|  * | ||||
|  * 1. kvm->srcu - Protects KVM memslots | ||||
|  * 2. kvm->mm->mmap_sem - find_vma, migrate_vma_pages and helpers, ksm_madvise | ||||
|  * 3. kvm->arch.uvmem_lock - protects read/writes to uvmem slots thus acting | ||||
|  *			     as sync-points for page-in/out | ||||
|  */ | ||||
| 
 | ||||
| /*
 | ||||
|  * Notes on page size | ||||
|  * | ||||
|  * Currently UV uses 2MB mappings internally, but will issue H_SVM_PAGE_IN | ||||
|  * and H_SVM_PAGE_OUT hcalls in PAGE_SIZE(64K) granularity. HV tracks | ||||
|  * secure GPAs at 64K page size and maintains one device PFN for each | ||||
|  * 64K secure GPA. UV_PAGE_IN and UV_PAGE_OUT calls by HV are also issued | ||||
|  * for 64K page at a time. | ||||
|  * | ||||
|  * HV faulting on secure pages: When HV touches any secure page, it | ||||
|  * faults and issues a UV_PAGE_OUT request with 64K page size. Currently | ||||
|  * UV splits and remaps the 2MB page if necessary and copies out the | ||||
|  * required 64K page contents. | ||||
|  * | ||||
|  * Shared pages: Whenever guest shares a secure page, UV will split and | ||||
|  * remap the 2MB page if required and issue H_SVM_PAGE_IN with 64K page size. | ||||
|  * | ||||
|  * HV invalidating a page: When a regular page belonging to secure | ||||
|  * guest gets unmapped, HV informs UV with UV_PAGE_INVAL of 64K | ||||
|  * page size. Using 64K page size is correct here because any non-secure | ||||
|  * page will essentially be of 64K page size. Splitting by UV during sharing | ||||
|  * and page-out ensures this. | ||||
|  * | ||||
|  * Page fault handling: When HV handles page fault of a page belonging | ||||
|  * to secure guest, it sends that to UV with a 64K UV_PAGE_IN request. | ||||
|  * Using 64K size is correct here too as UV would have split the 2MB page | ||||
|  * into 64k mappings and would have done page-outs earlier. | ||||
|  * | ||||
|  * In summary, the current secure pages handling code in HV assumes | ||||
|  * 64K page size and in fact fails any page-in/page-out requests of | ||||
|  * non-64K size upfront. If and when UV starts supporting multiple | ||||
|  * page-sizes, we need to break this assumption. | ||||
|  */ | ||||
| 
 | ||||
| #include <linux/pagemap.h> | ||||
| #include <linux/migrate.h> | ||||
| #include <linux/kvm_host.h> | ||||
| #include <linux/ksm.h> | ||||
| #include <asm/ultravisor.h> | ||||
| #include <asm/mman.h> | ||||
| #include <asm/kvm_ppc.h> | ||||
| 
 | ||||
| static struct dev_pagemap kvmppc_uvmem_pgmap; | ||||
| static unsigned long *kvmppc_uvmem_bitmap; | ||||
| static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock); | ||||
| 
 | ||||
| #define KVMPPC_UVMEM_PFN	(1UL << 63) | ||||
| 
 | ||||
| struct kvmppc_uvmem_slot { | ||||
| 	struct list_head list; | ||||
| 	unsigned long nr_pfns; | ||||
| 	unsigned long base_pfn; | ||||
| 	unsigned long *pfns; | ||||
| }; | ||||
| 
 | ||||
| struct kvmppc_uvmem_page_pvt { | ||||
| 	struct kvm *kvm; | ||||
| 	unsigned long gpa; | ||||
| 	bool skip_page_out; | ||||
| }; | ||||
| 
 | ||||
| int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot) | ||||
| { | ||||
| 	struct kvmppc_uvmem_slot *p; | ||||
| 
 | ||||
| 	p = kzalloc(sizeof(*p), GFP_KERNEL); | ||||
| 	if (!p) | ||||
| 		return -ENOMEM; | ||||
| 	p->pfns = vzalloc(array_size(slot->npages, sizeof(*p->pfns))); | ||||
| 	if (!p->pfns) { | ||||
| 		kfree(p); | ||||
| 		return -ENOMEM; | ||||
| 	} | ||||
| 	p->nr_pfns = slot->npages; | ||||
| 	p->base_pfn = slot->base_gfn; | ||||
| 
 | ||||
| 	mutex_lock(&kvm->arch.uvmem_lock); | ||||
| 	list_add(&p->list, &kvm->arch.uvmem_pfns); | ||||
| 	mutex_unlock(&kvm->arch.uvmem_lock); | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * All device PFNs are already released by the time we come here. | ||||
|  */ | ||||
| void kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot) | ||||
| { | ||||
| 	struct kvmppc_uvmem_slot *p, *next; | ||||
| 
 | ||||
| 	mutex_lock(&kvm->arch.uvmem_lock); | ||||
| 	list_for_each_entry_safe(p, next, &kvm->arch.uvmem_pfns, list) { | ||||
| 		if (p->base_pfn == slot->base_gfn) { | ||||
| 			vfree(p->pfns); | ||||
| 			list_del(&p->list); | ||||
| 			kfree(p); | ||||
| 			break; | ||||
| 		} | ||||
| 	} | ||||
| 	mutex_unlock(&kvm->arch.uvmem_lock); | ||||
| } | ||||
| 
 | ||||
| static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn, | ||||
| 				    struct kvm *kvm) | ||||
| { | ||||
| 	struct kvmppc_uvmem_slot *p; | ||||
| 
 | ||||
| 	list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) { | ||||
| 		if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { | ||||
| 			unsigned long index = gfn - p->base_pfn; | ||||
| 
 | ||||
| 			p->pfns[index] = uvmem_pfn | KVMPPC_UVMEM_PFN; | ||||
| 			return; | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void kvmppc_uvmem_pfn_remove(unsigned long gfn, struct kvm *kvm) | ||||
| { | ||||
| 	struct kvmppc_uvmem_slot *p; | ||||
| 
 | ||||
| 	list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) { | ||||
| 		if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { | ||||
| 			p->pfns[gfn - p->base_pfn] = 0; | ||||
| 			return; | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm, | ||||
| 				    unsigned long *uvmem_pfn) | ||||
| { | ||||
| 	struct kvmppc_uvmem_slot *p; | ||||
| 
 | ||||
| 	list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) { | ||||
| 		if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { | ||||
| 			unsigned long index = gfn - p->base_pfn; | ||||
| 
 | ||||
| 			if (p->pfns[index] & KVMPPC_UVMEM_PFN) { | ||||
| 				if (uvmem_pfn) | ||||
| 					*uvmem_pfn = p->pfns[index] & | ||||
| 						     ~KVMPPC_UVMEM_PFN; | ||||
| 				return true; | ||||
| 			} else | ||||
| 				return false; | ||||
| 		} | ||||
| 	} | ||||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) | ||||
| { | ||||
| 	struct kvm_memslots *slots; | ||||
| 	struct kvm_memory_slot *memslot; | ||||
| 	int ret = H_SUCCESS; | ||||
| 	int srcu_idx; | ||||
| 
 | ||||
| 	if (!kvmppc_uvmem_bitmap) | ||||
| 		return H_UNSUPPORTED; | ||||
| 
 | ||||
| 	/* Only radix guests can be secure guests */ | ||||
| 	if (!kvm_is_radix(kvm)) | ||||
| 		return H_UNSUPPORTED; | ||||
| 
 | ||||
| 	srcu_idx = srcu_read_lock(&kvm->srcu); | ||||
| 	slots = kvm_memslots(kvm); | ||||
| 	kvm_for_each_memslot(memslot, slots) { | ||||
| 		if (kvmppc_uvmem_slot_init(kvm, memslot)) { | ||||
| 			ret = H_PARAMETER; | ||||
| 			goto out; | ||||
| 		} | ||||
| 		ret = uv_register_mem_slot(kvm->arch.lpid, | ||||
| 					   memslot->base_gfn << PAGE_SHIFT, | ||||
| 					   memslot->npages * PAGE_SIZE, | ||||
| 					   0, memslot->id); | ||||
| 		if (ret < 0) { | ||||
| 			kvmppc_uvmem_slot_free(kvm, memslot); | ||||
| 			ret = H_PARAMETER; | ||||
| 			goto out; | ||||
| 		} | ||||
| 	} | ||||
| 	kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_START; | ||||
| out: | ||||
| 	srcu_read_unlock(&kvm->srcu, srcu_idx); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) | ||||
| { | ||||
| 	if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) | ||||
| 		return H_UNSUPPORTED; | ||||
| 
 | ||||
| 	kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE; | ||||
| 	pr_info("LPID %d went secure\n", kvm->arch.lpid); | ||||
| 	return H_SUCCESS; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Drop device pages that we maintain for the secure guest | ||||
|  * | ||||
|  * We first mark the pages to be skipped from UV_PAGE_OUT when there | ||||
|  * is HV side fault on these pages. Next we *get* these pages, forcing | ||||
|  * fault on them, do fault time migration to replace the device PTEs in | ||||
|  * QEMU page table with normal PTEs from newly allocated pages. | ||||
|  */ | ||||
| void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, | ||||
| 			     struct kvm *kvm) | ||||
| { | ||||
| 	int i; | ||||
| 	struct kvmppc_uvmem_page_pvt *pvt; | ||||
| 	unsigned long pfn, uvmem_pfn; | ||||
| 	unsigned long gfn = free->base_gfn; | ||||
| 
 | ||||
| 	for (i = free->npages; i; --i, ++gfn) { | ||||
| 		struct page *uvmem_page; | ||||
| 
 | ||||
| 		mutex_lock(&kvm->arch.uvmem_lock); | ||||
| 		if (!kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) { | ||||
| 			mutex_unlock(&kvm->arch.uvmem_lock); | ||||
| 			continue; | ||||
| 		} | ||||
| 
 | ||||
| 		uvmem_page = pfn_to_page(uvmem_pfn); | ||||
| 		pvt = uvmem_page->zone_device_data; | ||||
| 		pvt->skip_page_out = true; | ||||
| 		mutex_unlock(&kvm->arch.uvmem_lock); | ||||
| 
 | ||||
| 		pfn = gfn_to_pfn(kvm, gfn); | ||||
| 		if (is_error_noslot_pfn(pfn)) | ||||
| 			continue; | ||||
| 		kvm_release_pfn_clean(pfn); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Get a free device PFN from the pool | ||||
|  * | ||||
|  * Called when a normal page is moved to secure memory (UV_PAGE_IN). Device | ||||
|  * PFN will be used to keep track of the secure page on HV side. | ||||
|  * | ||||
|  * Called with kvm->arch.uvmem_lock held | ||||
|  */ | ||||
| static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm) | ||||
| { | ||||
| 	struct page *dpage = NULL; | ||||
| 	unsigned long bit, uvmem_pfn; | ||||
| 	struct kvmppc_uvmem_page_pvt *pvt; | ||||
| 	unsigned long pfn_last, pfn_first; | ||||
| 
 | ||||
| 	pfn_first = kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT; | ||||
| 	pfn_last = pfn_first + | ||||
| 		   (resource_size(&kvmppc_uvmem_pgmap.res) >> PAGE_SHIFT); | ||||
| 
 | ||||
| 	spin_lock(&kvmppc_uvmem_bitmap_lock); | ||||
| 	bit = find_first_zero_bit(kvmppc_uvmem_bitmap, | ||||
| 				  pfn_last - pfn_first); | ||||
| 	if (bit >= (pfn_last - pfn_first)) | ||||
| 		goto out; | ||||
| 	bitmap_set(kvmppc_uvmem_bitmap, bit, 1); | ||||
| 	spin_unlock(&kvmppc_uvmem_bitmap_lock); | ||||
| 
 | ||||
| 	pvt = kzalloc(sizeof(*pvt), GFP_KERNEL); | ||||
| 	if (!pvt) | ||||
| 		goto out_clear; | ||||
| 
 | ||||
| 	uvmem_pfn = bit + pfn_first; | ||||
| 	kvmppc_uvmem_pfn_insert(gpa >> PAGE_SHIFT, uvmem_pfn, kvm); | ||||
| 
 | ||||
| 	pvt->gpa = gpa; | ||||
| 	pvt->kvm = kvm; | ||||
| 
 | ||||
| 	dpage = pfn_to_page(uvmem_pfn); | ||||
| 	dpage->zone_device_data = pvt; | ||||
| 	get_page(dpage); | ||||
| 	lock_page(dpage); | ||||
| 	return dpage; | ||||
| out_clear: | ||||
| 	spin_lock(&kvmppc_uvmem_bitmap_lock); | ||||
| 	bitmap_clear(kvmppc_uvmem_bitmap, bit, 1); | ||||
| out: | ||||
| 	spin_unlock(&kvmppc_uvmem_bitmap_lock); | ||||
| 	return NULL; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Alloc a PFN from private device memory pool and copy page from normal | ||||
|  * memory to secure memory using UV_PAGE_IN uvcall. | ||||
|  */ | ||||
| static int | ||||
| kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start, | ||||
| 		   unsigned long end, unsigned long gpa, struct kvm *kvm, | ||||
| 		   unsigned long page_shift, bool *downgrade) | ||||
| { | ||||
| 	unsigned long src_pfn, dst_pfn = 0; | ||||
| 	struct migrate_vma mig; | ||||
| 	struct page *spage; | ||||
| 	unsigned long pfn; | ||||
| 	struct page *dpage; | ||||
| 	int ret = 0; | ||||
| 
 | ||||
| 	memset(&mig, 0, sizeof(mig)); | ||||
| 	mig.vma = vma; | ||||
| 	mig.start = start; | ||||
| 	mig.end = end; | ||||
| 	mig.src = &src_pfn; | ||||
| 	mig.dst = &dst_pfn; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We come here with mmap_sem write lock held just for | ||||
| 	 * ksm_madvise(), otherwise we only need read mmap_sem. | ||||
| 	 * Hence downgrade to read lock once ksm_madvise() is done. | ||||
| 	 */ | ||||
| 	ret = ksm_madvise(vma, vma->vm_start, vma->vm_end, | ||||
| 			  MADV_UNMERGEABLE, &vma->vm_flags); | ||||
| 	downgrade_write(&kvm->mm->mmap_sem); | ||||
| 	*downgrade = true; | ||||
| 	if (ret) | ||||
| 		return ret; | ||||
| 
 | ||||
| 	ret = migrate_vma_setup(&mig); | ||||
| 	if (ret) | ||||
| 		return ret; | ||||
| 
 | ||||
| 	if (!(*mig.src & MIGRATE_PFN_MIGRATE)) { | ||||
| 		ret = -1; | ||||
| 		goto out_finalize; | ||||
| 	} | ||||
| 
 | ||||
| 	dpage = kvmppc_uvmem_get_page(gpa, kvm); | ||||
| 	if (!dpage) { | ||||
| 		ret = -1; | ||||
| 		goto out_finalize; | ||||
| 	} | ||||
| 
 | ||||
| 	pfn = *mig.src >> MIGRATE_PFN_SHIFT; | ||||
| 	spage = migrate_pfn_to_page(*mig.src); | ||||
| 	if (spage) | ||||
| 		uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, | ||||
| 			   page_shift); | ||||
| 
 | ||||
| 	*mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; | ||||
| 	migrate_vma_pages(&mig); | ||||
| out_finalize: | ||||
| 	migrate_vma_finalize(&mig); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Shares the page with HV, thus making it a normal page. | ||||
|  * | ||||
|  * - If the page is already secure, then provision a new page and share | ||||
|  * - If the page is a normal page, share the existing page | ||||
|  * | ||||
|  * In the former case, uses dev_pagemap_ops.migrate_to_ram handler | ||||
|  * to unmap the device page from QEMU's page tables. | ||||
|  */ | ||||
| static unsigned long | ||||
| kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift) | ||||
| { | ||||
| 
 | ||||
| 	int ret = H_PARAMETER; | ||||
| 	struct page *uvmem_page; | ||||
| 	struct kvmppc_uvmem_page_pvt *pvt; | ||||
| 	unsigned long pfn; | ||||
| 	unsigned long gfn = gpa >> page_shift; | ||||
| 	int srcu_idx; | ||||
| 	unsigned long uvmem_pfn; | ||||
| 
 | ||||
| 	srcu_idx = srcu_read_lock(&kvm->srcu); | ||||
| 	mutex_lock(&kvm->arch.uvmem_lock); | ||||
| 	if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) { | ||||
| 		uvmem_page = pfn_to_page(uvmem_pfn); | ||||
| 		pvt = uvmem_page->zone_device_data; | ||||
| 		pvt->skip_page_out = true; | ||||
| 	} | ||||
| 
 | ||||
| retry: | ||||
| 	mutex_unlock(&kvm->arch.uvmem_lock); | ||||
| 	pfn = gfn_to_pfn(kvm, gfn); | ||||
| 	if (is_error_noslot_pfn(pfn)) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	mutex_lock(&kvm->arch.uvmem_lock); | ||||
| 	if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) { | ||||
| 		uvmem_page = pfn_to_page(uvmem_pfn); | ||||
| 		pvt = uvmem_page->zone_device_data; | ||||
| 		pvt->skip_page_out = true; | ||||
| 		kvm_release_pfn_clean(pfn); | ||||
| 		goto retry; | ||||
| 	} | ||||
| 
 | ||||
| 	if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, page_shift)) | ||||
| 		ret = H_SUCCESS; | ||||
| 	kvm_release_pfn_clean(pfn); | ||||
| 	mutex_unlock(&kvm->arch.uvmem_lock); | ||||
| out: | ||||
| 	srcu_read_unlock(&kvm->srcu, srcu_idx); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * H_SVM_PAGE_IN: Move page from normal memory to secure memory. | ||||
|  * | ||||
|  * H_PAGE_IN_SHARED flag makes the page shared which means that the same | ||||
|  * memory in is visible from both UV and HV. | ||||
|  */ | ||||
| unsigned long | ||||
| kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, | ||||
| 		     unsigned long flags, unsigned long page_shift) | ||||
| { | ||||
| 	bool downgrade = false; | ||||
| 	unsigned long start, end; | ||||
| 	struct vm_area_struct *vma; | ||||
| 	int srcu_idx; | ||||
| 	unsigned long gfn = gpa >> page_shift; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) | ||||
| 		return H_UNSUPPORTED; | ||||
| 
 | ||||
| 	if (page_shift != PAGE_SHIFT) | ||||
| 		return H_P3; | ||||
| 
 | ||||
| 	if (flags & ~H_PAGE_IN_SHARED) | ||||
| 		return H_P2; | ||||
| 
 | ||||
| 	if (flags & H_PAGE_IN_SHARED) | ||||
| 		return kvmppc_share_page(kvm, gpa, page_shift); | ||||
| 
 | ||||
| 	ret = H_PARAMETER; | ||||
| 	srcu_idx = srcu_read_lock(&kvm->srcu); | ||||
| 	down_write(&kvm->mm->mmap_sem); | ||||
| 
 | ||||
| 	start = gfn_to_hva(kvm, gfn); | ||||
| 	if (kvm_is_error_hva(start)) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	mutex_lock(&kvm->arch.uvmem_lock); | ||||
| 	/* Fail the page-in request of an already paged-in page */ | ||||
| 	if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL)) | ||||
| 		goto out_unlock; | ||||
| 
 | ||||
| 	end = start + (1UL << page_shift); | ||||
| 	vma = find_vma_intersection(kvm->mm, start, end); | ||||
| 	if (!vma || vma->vm_start > start || vma->vm_end < end) | ||||
| 		goto out_unlock; | ||||
| 
 | ||||
| 	if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift, | ||||
| 				&downgrade)) | ||||
| 		ret = H_SUCCESS; | ||||
| out_unlock: | ||||
| 	mutex_unlock(&kvm->arch.uvmem_lock); | ||||
| out: | ||||
| 	if (downgrade) | ||||
| 		up_read(&kvm->mm->mmap_sem); | ||||
| 	else | ||||
| 		up_write(&kvm->mm->mmap_sem); | ||||
| 	srcu_read_unlock(&kvm->srcu, srcu_idx); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Provision a new page on HV side and copy over the contents | ||||
|  * from secure memory using UV_PAGE_OUT uvcall. | ||||
|  */ | ||||
| static int | ||||
| kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start, | ||||
| 		    unsigned long end, unsigned long page_shift, | ||||
| 		    struct kvm *kvm, unsigned long gpa) | ||||
| { | ||||
| 	unsigned long src_pfn, dst_pfn = 0; | ||||
| 	struct migrate_vma mig; | ||||
| 	struct page *dpage, *spage; | ||||
| 	struct kvmppc_uvmem_page_pvt *pvt; | ||||
| 	unsigned long pfn; | ||||
| 	int ret = U_SUCCESS; | ||||
| 
 | ||||
| 	memset(&mig, 0, sizeof(mig)); | ||||
| 	mig.vma = vma; | ||||
| 	mig.start = start; | ||||
| 	mig.end = end; | ||||
| 	mig.src = &src_pfn; | ||||
| 	mig.dst = &dst_pfn; | ||||
| 
 | ||||
| 	mutex_lock(&kvm->arch.uvmem_lock); | ||||
| 	/* The requested page is already paged-out, nothing to do */ | ||||
| 	if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL)) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	ret = migrate_vma_setup(&mig); | ||||
| 	if (ret) | ||||
| 		return ret; | ||||
| 
 | ||||
| 	spage = migrate_pfn_to_page(*mig.src); | ||||
| 	if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE)) | ||||
| 		goto out_finalize; | ||||
| 
 | ||||
| 	if (!is_zone_device_page(spage)) | ||||
| 		goto out_finalize; | ||||
| 
 | ||||
| 	dpage = alloc_page_vma(GFP_HIGHUSER, vma, start); | ||||
| 	if (!dpage) { | ||||
| 		ret = -1; | ||||
| 		goto out_finalize; | ||||
| 	} | ||||
| 
 | ||||
| 	lock_page(dpage); | ||||
| 	pvt = spage->zone_device_data; | ||||
| 	pfn = page_to_pfn(dpage); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * This function is used in two cases: | ||||
| 	 * - When HV touches a secure page, for which we do UV_PAGE_OUT | ||||
| 	 * - When a secure page is converted to shared page, we *get* | ||||
| 	 *   the page to essentially unmap the device page. In this | ||||
| 	 *   case we skip page-out. | ||||
| 	 */ | ||||
| 	if (!pvt->skip_page_out) | ||||
| 		ret = uv_page_out(kvm->arch.lpid, pfn << page_shift, | ||||
| 				  gpa, 0, page_shift); | ||||
| 
 | ||||
| 	if (ret == U_SUCCESS) | ||||
| 		*mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED; | ||||
| 	else { | ||||
| 		unlock_page(dpage); | ||||
| 		__free_page(dpage); | ||||
| 		goto out_finalize; | ||||
| 	} | ||||
| 
 | ||||
| 	migrate_vma_pages(&mig); | ||||
| out_finalize: | ||||
| 	migrate_vma_finalize(&mig); | ||||
| out: | ||||
| 	mutex_unlock(&kvm->arch.uvmem_lock); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Fault handler callback that gets called when HV touches any page that | ||||
|  * has been moved to secure memory, we ask UV to give back the page by | ||||
|  * issuing UV_PAGE_OUT uvcall. | ||||
|  * | ||||
|  * This eventually results in dropping of device PFN and the newly | ||||
|  * provisioned page/PFN gets populated in QEMU page tables. | ||||
|  */ | ||||
| static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf) | ||||
| { | ||||
| 	struct kvmppc_uvmem_page_pvt *pvt = vmf->page->zone_device_data; | ||||
| 
 | ||||
| 	if (kvmppc_svm_page_out(vmf->vma, vmf->address, | ||||
| 				vmf->address + PAGE_SIZE, PAGE_SHIFT, | ||||
| 				pvt->kvm, pvt->gpa)) | ||||
| 		return VM_FAULT_SIGBUS; | ||||
| 	else | ||||
| 		return 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Release the device PFN back to the pool | ||||
|  * | ||||
|  * Gets called when secure page becomes a normal page during H_SVM_PAGE_OUT. | ||||
|  * Gets called with kvm->arch.uvmem_lock held. | ||||
|  */ | ||||
| static void kvmppc_uvmem_page_free(struct page *page) | ||||
| { | ||||
| 	unsigned long pfn = page_to_pfn(page) - | ||||
| 			(kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT); | ||||
| 	struct kvmppc_uvmem_page_pvt *pvt; | ||||
| 
 | ||||
| 	spin_lock(&kvmppc_uvmem_bitmap_lock); | ||||
| 	bitmap_clear(kvmppc_uvmem_bitmap, pfn, 1); | ||||
| 	spin_unlock(&kvmppc_uvmem_bitmap_lock); | ||||
| 
 | ||||
| 	pvt = page->zone_device_data; | ||||
| 	page->zone_device_data = NULL; | ||||
| 	kvmppc_uvmem_pfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm); | ||||
| 	kfree(pvt); | ||||
| } | ||||
| 
 | ||||
| static const struct dev_pagemap_ops kvmppc_uvmem_ops = { | ||||
| 	.page_free = kvmppc_uvmem_page_free, | ||||
| 	.migrate_to_ram	= kvmppc_uvmem_migrate_to_ram, | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * H_SVM_PAGE_OUT: Move page from secure memory to normal memory. | ||||
|  */ | ||||
| unsigned long | ||||
| kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gpa, | ||||
| 		      unsigned long flags, unsigned long page_shift) | ||||
| { | ||||
| 	unsigned long gfn = gpa >> page_shift; | ||||
| 	unsigned long start, end; | ||||
| 	struct vm_area_struct *vma; | ||||
| 	int srcu_idx; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) | ||||
| 		return H_UNSUPPORTED; | ||||
| 
 | ||||
| 	if (page_shift != PAGE_SHIFT) | ||||
| 		return H_P3; | ||||
| 
 | ||||
| 	if (flags) | ||||
| 		return H_P2; | ||||
| 
 | ||||
| 	ret = H_PARAMETER; | ||||
| 	srcu_idx = srcu_read_lock(&kvm->srcu); | ||||
| 	down_read(&kvm->mm->mmap_sem); | ||||
| 	start = gfn_to_hva(kvm, gfn); | ||||
| 	if (kvm_is_error_hva(start)) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	end = start + (1UL << page_shift); | ||||
| 	vma = find_vma_intersection(kvm->mm, start, end); | ||||
| 	if (!vma || vma->vm_start > start || vma->vm_end < end) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	if (!kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa)) | ||||
| 		ret = H_SUCCESS; | ||||
| out: | ||||
| 	up_read(&kvm->mm->mmap_sem); | ||||
| 	srcu_read_unlock(&kvm->srcu, srcu_idx); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn) | ||||
| { | ||||
| 	unsigned long pfn; | ||||
| 	int ret = U_SUCCESS; | ||||
| 
 | ||||
| 	pfn = gfn_to_pfn(kvm, gfn); | ||||
| 	if (is_error_noslot_pfn(pfn)) | ||||
| 		return -EFAULT; | ||||
| 
 | ||||
| 	mutex_lock(&kvm->arch.uvmem_lock); | ||||
| 	if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL)) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	ret = uv_page_in(kvm->arch.lpid, pfn << PAGE_SHIFT, gfn << PAGE_SHIFT, | ||||
| 			 0, PAGE_SHIFT); | ||||
| out: | ||||
| 	kvm_release_pfn_clean(pfn); | ||||
| 	mutex_unlock(&kvm->arch.uvmem_lock); | ||||
| 	return (ret == U_SUCCESS) ? RESUME_GUEST : -EFAULT; | ||||
| } | ||||
| 
 | ||||
| static u64 kvmppc_get_secmem_size(void) | ||||
| { | ||||
| 	struct device_node *np; | ||||
| 	int i, len; | ||||
| 	const __be32 *prop; | ||||
| 	u64 size = 0; | ||||
| 
 | ||||
| 	np = of_find_compatible_node(NULL, NULL, "ibm,uv-firmware"); | ||||
| 	if (!np) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	prop = of_get_property(np, "secure-memory-ranges", &len); | ||||
| 	if (!prop) | ||||
| 		goto out_put; | ||||
| 
 | ||||
| 	for (i = 0; i < len / (sizeof(*prop) * 4); i++) | ||||
| 		size += of_read_number(prop + (i * 4) + 2, 2); | ||||
| 
 | ||||
| out_put: | ||||
| 	of_node_put(np); | ||||
| out: | ||||
| 	return size; | ||||
| } | ||||
| 
 | ||||
| int kvmppc_uvmem_init(void) | ||||
| { | ||||
| 	int ret = 0; | ||||
| 	unsigned long size; | ||||
| 	struct resource *res; | ||||
| 	void *addr; | ||||
| 	unsigned long pfn_last, pfn_first; | ||||
| 
 | ||||
| 	size = kvmppc_get_secmem_size(); | ||||
| 	if (!size) { | ||||
| 		/*
 | ||||
| 		 * Don't fail the initialization of kvm-hv module if | ||||
| 		 * the platform doesn't export ibm,uv-firmware node. | ||||
| 		 * Let normal guests run on such PEF-disabled platform. | ||||
| 		 */ | ||||
| 		pr_info("KVMPPC-UVMEM: No support for secure guests\n"); | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	res = request_free_mem_region(&iomem_resource, size, "kvmppc_uvmem"); | ||||
| 	if (IS_ERR(res)) { | ||||
| 		ret = PTR_ERR(res); | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE; | ||||
| 	kvmppc_uvmem_pgmap.res = *res; | ||||
| 	kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops; | ||||
| 	addr = memremap_pages(&kvmppc_uvmem_pgmap, NUMA_NO_NODE); | ||||
| 	if (IS_ERR(addr)) { | ||||
| 		ret = PTR_ERR(addr); | ||||
| 		goto out_free_region; | ||||
| 	} | ||||
| 
 | ||||
| 	pfn_first = res->start >> PAGE_SHIFT; | ||||
| 	pfn_last = pfn_first + (resource_size(res) >> PAGE_SHIFT); | ||||
| 	kvmppc_uvmem_bitmap = kcalloc(BITS_TO_LONGS(pfn_last - pfn_first), | ||||
| 				      sizeof(unsigned long), GFP_KERNEL); | ||||
| 	if (!kvmppc_uvmem_bitmap) { | ||||
| 		ret = -ENOMEM; | ||||
| 		goto out_unmap; | ||||
| 	} | ||||
| 
 | ||||
| 	pr_info("KVMPPC-UVMEM: Secure Memory size 0x%lx\n", size); | ||||
| 	return ret; | ||||
| out_unmap: | ||||
| 	memunmap_pages(&kvmppc_uvmem_pgmap); | ||||
| out_free_region: | ||||
| 	release_mem_region(res->start, size); | ||||
| out: | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| void kvmppc_uvmem_free(void) | ||||
| { | ||||
| 	memunmap_pages(&kvmppc_uvmem_pgmap); | ||||
| 	release_mem_region(kvmppc_uvmem_pgmap.res.start, | ||||
| 			   resource_size(&kvmppc_uvmem_pgmap.res)); | ||||
| 	kfree(kvmppc_uvmem_bitmap); | ||||
| } | ||||
| @ -90,7 +90,43 @@ static void kvmppc_fixup_split_real(struct kvm_vcpu *vcpu) | ||||
| 	kvmppc_set_pc(vcpu, pc | SPLIT_HACK_OFFS); | ||||
| } | ||||
| 
 | ||||
| void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu); | ||||
| static void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) { | ||||
| 		ulong pc = kvmppc_get_pc(vcpu); | ||||
| 		ulong lr = kvmppc_get_lr(vcpu); | ||||
| 		if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) | ||||
| 			kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK); | ||||
| 		if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) | ||||
| 			kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK); | ||||
| 		vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void kvmppc_inject_interrupt_pr(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags) | ||||
| { | ||||
| 	unsigned long msr, pc, new_msr, new_pc; | ||||
| 
 | ||||
| 	kvmppc_unfixup_split_real(vcpu); | ||||
| 
 | ||||
| 	msr = kvmppc_get_msr(vcpu); | ||||
| 	pc = kvmppc_get_pc(vcpu); | ||||
| 	new_msr = vcpu->arch.intr_msr; | ||||
| 	new_pc = to_book3s(vcpu)->hior + vec; | ||||
| 
 | ||||
| #ifdef CONFIG_PPC_BOOK3S_64 | ||||
| 	/* If transactional, change to suspend mode on IRQ delivery */ | ||||
| 	if (MSR_TM_TRANSACTIONAL(msr)) | ||||
| 		new_msr |= MSR_TS_S; | ||||
| 	else | ||||
| 		new_msr |= msr & MSR_TS_MASK; | ||||
| #endif | ||||
| 
 | ||||
| 	kvmppc_set_srr0(vcpu, pc); | ||||
| 	kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags); | ||||
| 	kvmppc_set_pc(vcpu, new_pc); | ||||
| 	kvmppc_set_msr(vcpu, new_msr); | ||||
| } | ||||
| 
 | ||||
| static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) | ||||
| { | ||||
| @ -1761,6 +1797,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, | ||||
| #else | ||||
| 	/* default to book3s_32 (750) */ | ||||
| 	vcpu->arch.pvr = 0x84202; | ||||
| 	vcpu->arch.intr_msr = 0; | ||||
| #endif | ||||
| 	kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr); | ||||
| 	vcpu->arch.slb_nr = 64; | ||||
| @ -2058,6 +2095,7 @@ static struct kvmppc_ops kvm_ops_pr = { | ||||
| 	.set_one_reg = kvmppc_set_one_reg_pr, | ||||
| 	.vcpu_load   = kvmppc_core_vcpu_load_pr, | ||||
| 	.vcpu_put    = kvmppc_core_vcpu_put_pr, | ||||
| 	.inject_interrupt = kvmppc_inject_interrupt_pr, | ||||
| 	.set_msr     = kvmppc_set_msr_pr, | ||||
| 	.vcpu_run    = kvmppc_vcpu_run_pr, | ||||
| 	.vcpu_create = kvmppc_core_vcpu_create_pr, | ||||
|  | ||||
| @ -1211,6 +1211,45 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) | ||||
| 	vcpu->arch.xive_vcpu = NULL; | ||||
| } | ||||
| 
 | ||||
| static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu) | ||||
| { | ||||
| 	/* We have a block of xive->nr_servers VPs. We just need to check
 | ||||
| 	 * raw vCPU ids are below the expected limit for this guest's | ||||
| 	 * core stride ; kvmppc_pack_vcpu_id() will pack them down to an | ||||
| 	 * index that can be safely used to compute a VP id that belongs | ||||
| 	 * to the VP block. | ||||
| 	 */ | ||||
| 	return cpu < xive->nr_servers * xive->kvm->arch.emul_smt_mode; | ||||
| } | ||||
| 
 | ||||
| int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp) | ||||
| { | ||||
| 	u32 vp_id; | ||||
| 
 | ||||
| 	if (!kvmppc_xive_vcpu_id_valid(xive, cpu)) { | ||||
| 		pr_devel("Out of bounds !\n"); | ||||
| 		return -EINVAL; | ||||
| 	} | ||||
| 
 | ||||
| 	if (xive->vp_base == XIVE_INVALID_VP) { | ||||
| 		xive->vp_base = xive_native_alloc_vp_block(xive->nr_servers); | ||||
| 		pr_devel("VP_Base=%x nr_servers=%d\n", xive->vp_base, xive->nr_servers); | ||||
| 
 | ||||
| 		if (xive->vp_base == XIVE_INVALID_VP) | ||||
| 			return -ENOSPC; | ||||
| 	} | ||||
| 
 | ||||
| 	vp_id = kvmppc_xive_vp(xive, cpu); | ||||
| 	if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) { | ||||
| 		pr_devel("Duplicate !\n"); | ||||
| 		return -EEXIST; | ||||
| 	} | ||||
| 
 | ||||
| 	*vp = vp_id; | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| int kvmppc_xive_connect_vcpu(struct kvm_device *dev, | ||||
| 			     struct kvm_vcpu *vcpu, u32 cpu) | ||||
| { | ||||
| @ -1229,20 +1268,13 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, | ||||
| 		return -EPERM; | ||||
| 	if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) | ||||
| 		return -EBUSY; | ||||
| 	if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) { | ||||
| 		pr_devel("Out of bounds !\n"); | ||||
| 		return -EINVAL; | ||||
| 	} | ||||
| 
 | ||||
| 	/* We need to synchronize with queue provisioning */ | ||||
| 	mutex_lock(&xive->lock); | ||||
| 
 | ||||
| 	vp_id = kvmppc_xive_vp(xive, cpu); | ||||
| 	if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) { | ||||
| 		pr_devel("Duplicate !\n"); | ||||
| 		r = -EEXIST; | ||||
| 	r = kvmppc_xive_compute_vp_id(xive, cpu, &vp_id); | ||||
| 	if (r) | ||||
| 		goto bail; | ||||
| 	} | ||||
| 
 | ||||
| 	xc = kzalloc(sizeof(*xc), GFP_KERNEL); | ||||
| 	if (!xc) { | ||||
| @ -1834,6 +1866,43 @@ int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr) | ||||
| { | ||||
| 	u32 __user *ubufp = (u32 __user *) addr; | ||||
| 	u32 nr_servers; | ||||
| 	int rc = 0; | ||||
| 
 | ||||
| 	if (get_user(nr_servers, ubufp)) | ||||
| 		return -EFAULT; | ||||
| 
 | ||||
| 	pr_devel("%s nr_servers=%u\n", __func__, nr_servers); | ||||
| 
 | ||||
| 	if (!nr_servers || nr_servers > KVM_MAX_VCPU_ID) | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| 	mutex_lock(&xive->lock); | ||||
| 	if (xive->vp_base != XIVE_INVALID_VP) | ||||
| 		/* The VP block is allocated once and freed when the device
 | ||||
| 		 * is released. Better not allow to change its size since its | ||||
| 		 * used by connect_vcpu to validate vCPU ids are valid (eg, | ||||
| 		 * setting it back to a higher value could allow connect_vcpu | ||||
| 		 * to come up with a VP id that goes beyond the VP block, which | ||||
| 		 * is likely to cause a crash in OPAL). | ||||
| 		 */ | ||||
| 		rc = -EBUSY; | ||||
| 	else if (nr_servers > KVM_MAX_VCPUS) | ||||
| 		/* We don't need more servers. Higher vCPU ids get packed
 | ||||
| 		 * down below KVM_MAX_VCPUS by kvmppc_pack_vcpu_id(). | ||||
| 		 */ | ||||
| 		xive->nr_servers = KVM_MAX_VCPUS; | ||||
| 	else | ||||
| 		xive->nr_servers = nr_servers; | ||||
| 
 | ||||
| 	mutex_unlock(&xive->lock); | ||||
| 
 | ||||
| 	return rc; | ||||
| } | ||||
| 
 | ||||
| static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | ||||
| { | ||||
| 	struct kvmppc_xive *xive = dev->private; | ||||
| @ -1842,6 +1911,11 @@ static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | ||||
| 	switch (attr->group) { | ||||
| 	case KVM_DEV_XICS_GRP_SOURCES: | ||||
| 		return xive_set_source(xive, attr->attr, attr->addr); | ||||
| 	case KVM_DEV_XICS_GRP_CTRL: | ||||
| 		switch (attr->attr) { | ||||
| 		case KVM_DEV_XICS_NR_SERVERS: | ||||
| 			return kvmppc_xive_set_nr_servers(xive, attr->addr); | ||||
| 		} | ||||
| 	} | ||||
| 	return -ENXIO; | ||||
| } | ||||
| @ -1867,6 +1941,11 @@ static int xive_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | ||||
| 		    attr->attr < KVMPPC_XICS_NR_IRQS) | ||||
| 			return 0; | ||||
| 		break; | ||||
| 	case KVM_DEV_XICS_GRP_CTRL: | ||||
| 		switch (attr->attr) { | ||||
| 		case KVM_DEV_XICS_NR_SERVERS: | ||||
| 			return 0; | ||||
| 		} | ||||
| 	} | ||||
| 	return -ENXIO; | ||||
| } | ||||
| @ -2001,10 +2080,13 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) | ||||
| { | ||||
| 	struct kvmppc_xive *xive; | ||||
| 	struct kvm *kvm = dev->kvm; | ||||
| 	int ret = 0; | ||||
| 
 | ||||
| 	pr_devel("Creating xive for partition\n"); | ||||
| 
 | ||||
| 	/* Already there ? */ | ||||
| 	if (kvm->arch.xive) | ||||
| 		return -EEXIST; | ||||
| 
 | ||||
| 	xive = kvmppc_xive_get_device(kvm, type); | ||||
| 	if (!xive) | ||||
| 		return -ENOMEM; | ||||
| @ -2014,12 +2096,6 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) | ||||
| 	xive->kvm = kvm; | ||||
| 	mutex_init(&xive->lock); | ||||
| 
 | ||||
| 	/* Already there ? */ | ||||
| 	if (kvm->arch.xive) | ||||
| 		ret = -EEXIST; | ||||
| 	else | ||||
| 		kvm->arch.xive = xive; | ||||
| 
 | ||||
| 	/* We use the default queue size set by the host */ | ||||
| 	xive->q_order = xive_native_default_eq_shift(); | ||||
| 	if (xive->q_order < PAGE_SHIFT) | ||||
| @ -2027,18 +2103,16 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) | ||||
| 	else | ||||
| 		xive->q_page_order = xive->q_order - PAGE_SHIFT; | ||||
| 
 | ||||
| 	/* Allocate a bunch of VPs */ | ||||
| 	xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS); | ||||
| 	pr_devel("VP_Base=%x\n", xive->vp_base); | ||||
| 
 | ||||
| 	if (xive->vp_base == XIVE_INVALID_VP) | ||||
| 		ret = -ENOMEM; | ||||
| 	/* VP allocation is delayed to the first call to connect_vcpu */ | ||||
| 	xive->vp_base = XIVE_INVALID_VP; | ||||
| 	/* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
 | ||||
| 	 * on a POWER9 system. | ||||
| 	 */ | ||||
| 	xive->nr_servers = KVM_MAX_VCPUS; | ||||
| 
 | ||||
| 	xive->single_escalation = xive_native_has_single_escalation(); | ||||
| 
 | ||||
| 	if (ret) | ||||
| 		return ret; | ||||
| 
 | ||||
| 	kvm->arch.xive = xive; | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| @ -2108,9 +2182,9 @@ static int xive_debug_show(struct seq_file *m, void *private) | ||||
| 		if (!xc) | ||||
| 			continue; | ||||
| 
 | ||||
| 		seq_printf(m, "cpu server %#x CPPR:%#x HWCPPR:%#x" | ||||
| 		seq_printf(m, "cpu server %#x VP:%#x CPPR:%#x HWCPPR:%#x" | ||||
| 			   " MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n", | ||||
| 			   xc->server_num, xc->cppr, xc->hw_cppr, | ||||
| 			   xc->server_num, xc->vp_id, xc->cppr, xc->hw_cppr, | ||||
| 			   xc->mfrr, xc->pending, | ||||
| 			   xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); | ||||
| 
 | ||||
|  | ||||
| @ -135,6 +135,9 @@ struct kvmppc_xive { | ||||
| 	/* Flags */ | ||||
| 	u8	single_escalation; | ||||
| 
 | ||||
| 	/* Number of entries in the VP block */ | ||||
| 	u32	nr_servers; | ||||
| 
 | ||||
| 	struct kvmppc_xive_ops *ops; | ||||
| 	struct address_space   *mapping; | ||||
| 	struct mutex mapping_lock; | ||||
| @ -296,6 +299,8 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, | ||||
| struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type); | ||||
| void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, | ||||
| 				    struct kvmppc_xive_vcpu *xc, int irq); | ||||
| int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp); | ||||
| int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr); | ||||
| 
 | ||||
| #endif /* CONFIG_KVM_XICS */ | ||||
| #endif /* _KVM_PPC_BOOK3S_XICS_H */ | ||||
|  | ||||
| @ -50,6 +50,24 @@ static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q, | ||||
| 					      u8 prio, __be32 *qpage, | ||||
| 					      u32 order, bool can_escalate) | ||||
| { | ||||
| 	int rc; | ||||
| 	__be32 *qpage_prev = q->qpage; | ||||
| 
 | ||||
| 	rc = xive_native_configure_queue(vp_id, q, prio, qpage, order, | ||||
| 					 can_escalate); | ||||
| 	if (rc) | ||||
| 		return rc; | ||||
| 
 | ||||
| 	if (qpage_prev) | ||||
| 		put_page(virt_to_page(qpage_prev)); | ||||
| 
 | ||||
| 	return rc; | ||||
| } | ||||
| 
 | ||||
| void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | ||||
| @ -118,19 +136,12 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, | ||||
| 		return -EPERM; | ||||
| 	if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) | ||||
| 		return -EBUSY; | ||||
| 	if (server_num >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) { | ||||
| 		pr_devel("Out of bounds !\n"); | ||||
| 		return -EINVAL; | ||||
| 	} | ||||
| 
 | ||||
| 	mutex_lock(&xive->lock); | ||||
| 
 | ||||
| 	vp_id = kvmppc_xive_vp(xive, server_num); | ||||
| 	if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) { | ||||
| 		pr_devel("Duplicate !\n"); | ||||
| 		rc = -EEXIST; | ||||
| 	rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id); | ||||
| 	if (rc) | ||||
| 		goto bail; | ||||
| 	} | ||||
| 
 | ||||
| 	xc = kzalloc(sizeof(*xc), GFP_KERNEL); | ||||
| 	if (!xc) { | ||||
| @ -582,19 +593,14 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, | ||||
| 		q->guest_qaddr  = 0; | ||||
| 		q->guest_qshift = 0; | ||||
| 
 | ||||
| 		rc = xive_native_configure_queue(xc->vp_id, q, priority, | ||||
| 						 NULL, 0, true); | ||||
| 		rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority, | ||||
| 							NULL, 0, true); | ||||
| 		if (rc) { | ||||
| 			pr_err("Failed to reset queue %d for VCPU %d: %d\n", | ||||
| 			       priority, xc->server_num, rc); | ||||
| 			return rc; | ||||
| 		} | ||||
| 
 | ||||
| 		if (q->qpage) { | ||||
| 			put_page(virt_to_page(q->qpage)); | ||||
| 			q->qpage = NULL; | ||||
| 		} | ||||
| 
 | ||||
| 		return 0; | ||||
| 	} | ||||
| 
 | ||||
| @ -624,12 +630,6 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, | ||||
| 
 | ||||
| 	srcu_idx = srcu_read_lock(&kvm->srcu); | ||||
| 	gfn = gpa_to_gfn(kvm_eq.qaddr); | ||||
| 	page = gfn_to_page(kvm, gfn); | ||||
| 	if (is_error_page(page)) { | ||||
| 		srcu_read_unlock(&kvm->srcu, srcu_idx); | ||||
| 		pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr); | ||||
| 		return -EINVAL; | ||||
| 	} | ||||
| 
 | ||||
| 	page_size = kvm_host_page_size(kvm, gfn); | ||||
| 	if (1ull << kvm_eq.qshift > page_size) { | ||||
| @ -638,6 +638,13 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, | ||||
| 		return -EINVAL; | ||||
| 	} | ||||
| 
 | ||||
| 	page = gfn_to_page(kvm, gfn); | ||||
| 	if (is_error_page(page)) { | ||||
| 		srcu_read_unlock(&kvm->srcu, srcu_idx); | ||||
| 		pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr); | ||||
| 		return -EINVAL; | ||||
| 	} | ||||
| 
 | ||||
| 	qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK); | ||||
| 	srcu_read_unlock(&kvm->srcu, srcu_idx); | ||||
| 
 | ||||
| @ -653,8 +660,8 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, | ||||
| 	  * OPAL level because the use of END ESBs is not supported by | ||||
| 	  * Linux. | ||||
| 	  */ | ||||
| 	rc = xive_native_configure_queue(xc->vp_id, q, priority, | ||||
| 					 (__be32 *) qaddr, kvm_eq.qshift, true); | ||||
| 	rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority, | ||||
| 					(__be32 *) qaddr, kvm_eq.qshift, true); | ||||
| 	if (rc) { | ||||
| 		pr_err("Failed to configure queue %d for VCPU %d: %d\n", | ||||
| 		       priority, xc->server_num, rc); | ||||
| @ -928,6 +935,8 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev, | ||||
| 			return kvmppc_xive_reset(xive); | ||||
| 		case KVM_DEV_XIVE_EQ_SYNC: | ||||
| 			return kvmppc_xive_native_eq_sync(xive); | ||||
| 		case KVM_DEV_XIVE_NR_SERVERS: | ||||
| 			return kvmppc_xive_set_nr_servers(xive, attr->addr); | ||||
| 		} | ||||
| 		break; | ||||
| 	case KVM_DEV_XIVE_GRP_SOURCE: | ||||
| @ -967,6 +976,7 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev, | ||||
| 		switch (attr->attr) { | ||||
| 		case KVM_DEV_XIVE_RESET: | ||||
| 		case KVM_DEV_XIVE_EQ_SYNC: | ||||
| 		case KVM_DEV_XIVE_NR_SERVERS: | ||||
| 			return 0; | ||||
| 		} | ||||
| 		break; | ||||
| @ -1067,7 +1077,6 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) | ||||
| { | ||||
| 	struct kvmppc_xive *xive; | ||||
| 	struct kvm *kvm = dev->kvm; | ||||
| 	int ret = 0; | ||||
| 
 | ||||
| 	pr_devel("Creating xive native device\n"); | ||||
| 
 | ||||
| @ -1081,27 +1090,20 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) | ||||
| 	dev->private = xive; | ||||
| 	xive->dev = dev; | ||||
| 	xive->kvm = kvm; | ||||
| 	kvm->arch.xive = xive; | ||||
| 	mutex_init(&xive->mapping_lock); | ||||
| 	mutex_init(&xive->lock); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for | ||||
| 	 * a default. Getting the max number of CPUs the VM was | ||||
| 	 * configured with would improve our usage of the XIVE VP space. | ||||
| 	/* VP allocation is delayed to the first call to connect_vcpu */ | ||||
| 	xive->vp_base = XIVE_INVALID_VP; | ||||
| 	/* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
 | ||||
| 	 * on a POWER9 system. | ||||
| 	 */ | ||||
| 	xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS); | ||||
| 	pr_devel("VP_Base=%x\n", xive->vp_base); | ||||
| 
 | ||||
| 	if (xive->vp_base == XIVE_INVALID_VP) | ||||
| 		ret = -ENXIO; | ||||
| 	xive->nr_servers = KVM_MAX_VCPUS; | ||||
| 
 | ||||
| 	xive->single_escalation = xive_native_has_single_escalation(); | ||||
| 	xive->ops = &kvmppc_xive_native_ops; | ||||
| 
 | ||||
| 	if (ret) | ||||
| 		return ret; | ||||
| 
 | ||||
| 	kvm->arch.xive = xive; | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| @ -1204,8 +1206,8 @@ static int xive_native_debug_show(struct seq_file *m, void *private) | ||||
| 		if (!xc) | ||||
| 			continue; | ||||
| 
 | ||||
| 		seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", | ||||
| 			   xc->server_num, | ||||
| 		seq_printf(m, "cpu server %#x VP=%#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", | ||||
| 			   xc->server_num, xc->vp_id, | ||||
| 			   vcpu->arch.xive_saved_state.nsr, | ||||
| 			   vcpu->arch.xive_saved_state.cppr, | ||||
| 			   vcpu->arch.xive_saved_state.ipb, | ||||
|  | ||||
| @ -355,9 +355,9 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, | ||||
| 
 | ||||
| 	if (tlbsel == 1) { | ||||
| 		struct vm_area_struct *vma; | ||||
| 		down_read(¤t->mm->mmap_sem); | ||||
| 		down_read(&kvm->mm->mmap_sem); | ||||
| 
 | ||||
| 		vma = find_vma(current->mm, hva); | ||||
| 		vma = find_vma(kvm->mm, hva); | ||||
| 		if (vma && hva >= vma->vm_start && | ||||
| 		    (vma->vm_flags & VM_PFNMAP)) { | ||||
| 			/*
 | ||||
| @ -441,7 +441,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, | ||||
| 			tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); | ||||
| 		} | ||||
| 
 | ||||
| 		up_read(¤t->mm->mmap_sem); | ||||
| 		up_read(&kvm->mm->mmap_sem); | ||||
| 	} | ||||
| 
 | ||||
| 	if (likely(!pfnmap)) { | ||||
|  | ||||
| @ -31,6 +31,8 @@ | ||||
| #include <asm/hvcall.h> | ||||
| #include <asm/plpar_wrappers.h> | ||||
| #endif | ||||
| #include <asm/ultravisor.h> | ||||
| #include <asm/kvm_host.h> | ||||
| 
 | ||||
| #include "timing.h" | ||||
| #include "irq.h" | ||||
| @ -522,6 +524,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | ||||
| 	case KVM_CAP_IMMEDIATE_EXIT: | ||||
| 		r = 1; | ||||
| 		break; | ||||
| 	case KVM_CAP_PPC_GUEST_DEBUG_SSTEP: | ||||
| 		/* fall through */ | ||||
| 	case KVM_CAP_PPC_PAIRED_SINGLES: | ||||
| 	case KVM_CAP_PPC_OSI: | ||||
| 	case KVM_CAP_PPC_GET_PVINFO: | ||||
| @ -2411,6 +2415,16 @@ long kvm_arch_vm_ioctl(struct file *filp, | ||||
| 			r = -EFAULT; | ||||
| 		break; | ||||
| 	} | ||||
| 	case KVM_PPC_SVM_OFF: { | ||||
| 		struct kvm *kvm = filp->private_data; | ||||
| 
 | ||||
| 		r = 0; | ||||
| 		if (!kvm->arch.kvm_ops->svm_off) | ||||
| 			goto out; | ||||
| 
 | ||||
| 		r = kvm->arch.kvm_ops->svm_off(kvm); | ||||
| 		break; | ||||
| 	} | ||||
| 	default: { | ||||
| 		struct kvm *kvm = filp->private_data; | ||||
| 		r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg); | ||||
|  | ||||
| @ -392,6 +392,7 @@ struct kvm_vcpu_stat { | ||||
| 	u64 diagnose_10; | ||||
| 	u64 diagnose_44; | ||||
| 	u64 diagnose_9c; | ||||
| 	u64 diagnose_9c_ignored; | ||||
| 	u64 diagnose_258; | ||||
| 	u64 diagnose_308; | ||||
| 	u64 diagnose_500; | ||||
|  | ||||
| @ -158,14 +158,28 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) | ||||
| 
 | ||||
| 	tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; | ||||
| 	vcpu->stat.diagnose_9c++; | ||||
| 	VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d", tid); | ||||
| 
 | ||||
| 	/* yield to self */ | ||||
| 	if (tid == vcpu->vcpu_id) | ||||
| 		return 0; | ||||
| 		goto no_yield; | ||||
| 
 | ||||
| 	/* yield to invalid */ | ||||
| 	tcpu = kvm_get_vcpu_by_id(vcpu->kvm, tid); | ||||
| 	if (tcpu) | ||||
| 		kvm_vcpu_yield_to(tcpu); | ||||
| 	if (!tcpu) | ||||
| 		goto no_yield; | ||||
| 
 | ||||
| 	/* target already running */ | ||||
| 	if (READ_ONCE(tcpu->cpu) >= 0) | ||||
| 		goto no_yield; | ||||
| 
 | ||||
| 	if (kvm_vcpu_yield_to(tcpu) <= 0) | ||||
| 		goto no_yield; | ||||
| 
 | ||||
| 	VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: done", tid); | ||||
| 	return 0; | ||||
| no_yield: | ||||
| 	VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: ignored", tid); | ||||
| 	vcpu->stat.diagnose_9c_ignored++; | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -1477,8 +1477,7 @@ static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int __inject_sigp_restart(struct kvm_vcpu *vcpu, | ||||
| 				 struct kvm_s390_irq *irq) | ||||
| static int __inject_sigp_restart(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | ||||
| 
 | ||||
| @ -2007,7 +2006,7 @@ static int do_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) | ||||
| 		rc = __inject_sigp_stop(vcpu, irq); | ||||
| 		break; | ||||
| 	case KVM_S390_RESTART: | ||||
| 		rc = __inject_sigp_restart(vcpu, irq); | ||||
| 		rc = __inject_sigp_restart(vcpu); | ||||
| 		break; | ||||
| 	case KVM_S390_INT_CLOCK_COMP: | ||||
| 		rc = __inject_ckc(vcpu); | ||||
|  | ||||
| @ -155,6 +155,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | ||||
| 	{ "instruction_diag_10", VCPU_STAT(diagnose_10) }, | ||||
| 	{ "instruction_diag_44", VCPU_STAT(diagnose_44) }, | ||||
| 	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) }, | ||||
| 	{ "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) }, | ||||
| 	{ "instruction_diag_258", VCPU_STAT(diagnose_258) }, | ||||
| 	{ "instruction_diag_308", VCPU_STAT(diagnose_308) }, | ||||
| 	{ "instruction_diag_500", VCPU_STAT(diagnose_500) }, | ||||
| @ -453,16 +454,14 @@ static void kvm_s390_cpu_feat_init(void) | ||||
| 
 | ||||
| int kvm_arch_init(void *opaque) | ||||
| { | ||||
| 	int rc; | ||||
| 	int rc = -ENOMEM; | ||||
| 
 | ||||
| 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); | ||||
| 	if (!kvm_s390_dbf) | ||||
| 		return -ENOMEM; | ||||
| 
 | ||||
| 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) { | ||||
| 		rc = -ENOMEM; | ||||
| 		goto out_debug_unreg; | ||||
| 	} | ||||
| 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	kvm_s390_cpu_feat_init(); | ||||
| 
 | ||||
| @ -470,19 +469,17 @@ int kvm_arch_init(void *opaque) | ||||
| 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); | ||||
| 	if (rc) { | ||||
| 		pr_err("A FLIC registration call failed with rc=%d\n", rc); | ||||
| 		goto out_debug_unreg; | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	rc = kvm_s390_gib_init(GAL_ISC); | ||||
| 	if (rc) | ||||
| 		goto out_gib_destroy; | ||||
| 		goto out; | ||||
| 
 | ||||
| 	return 0; | ||||
| 
 | ||||
| out_gib_destroy: | ||||
| 	kvm_s390_gib_destroy(); | ||||
| out_debug_unreg: | ||||
| 	debug_unregister(kvm_s390_dbf); | ||||
| out: | ||||
| 	kvm_arch_exit(); | ||||
| 	return rc; | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -3323,8 +3323,19 @@ static int intel_pmu_hw_config(struct perf_event *event) | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_RETPOLINE | ||||
| static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr); | ||||
| static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr); | ||||
| #endif | ||||
| 
 | ||||
| struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) | ||||
| { | ||||
| #ifdef CONFIG_RETPOLINE | ||||
| 	if (x86_pmu.guest_get_msrs == intel_guest_get_msrs) | ||||
| 		return intel_guest_get_msrs(nr); | ||||
| 	else if (x86_pmu.guest_get_msrs == core_guest_get_msrs) | ||||
| 		return core_guest_get_msrs(nr); | ||||
| #endif | ||||
| 	if (x86_pmu.guest_get_msrs) | ||||
| 		return x86_pmu.guest_get_msrs(nr); | ||||
| 	*nr = 0; | ||||
|  | ||||
| @ -156,10 +156,8 @@ enum kvm_reg { | ||||
| 	VCPU_REGS_R15 = __VCPU_REGS_R15, | ||||
| #endif | ||||
| 	VCPU_REGS_RIP, | ||||
| 	NR_VCPU_REGS | ||||
| }; | ||||
| 	NR_VCPU_REGS, | ||||
| 
 | ||||
| enum kvm_reg_ex { | ||||
| 	VCPU_EXREG_PDPTR = NR_VCPU_REGS, | ||||
| 	VCPU_EXREG_CR3, | ||||
| 	VCPU_EXREG_RFLAGS, | ||||
| @ -454,6 +452,11 @@ struct kvm_pmc { | ||||
| 	u64 eventsel; | ||||
| 	struct perf_event *perf_event; | ||||
| 	struct kvm_vcpu *vcpu; | ||||
| 	/*
 | ||||
| 	 * eventsel value for general purpose counters, | ||||
| 	 * ctrl value for fixed counters. | ||||
| 	 */ | ||||
| 	u64 current_config; | ||||
| }; | ||||
| 
 | ||||
| struct kvm_pmu { | ||||
| @ -472,7 +475,21 @@ struct kvm_pmu { | ||||
| 	struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; | ||||
| 	struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED]; | ||||
| 	struct irq_work irq_work; | ||||
| 	u64 reprogram_pmi; | ||||
| 	DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX); | ||||
| 	DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX); | ||||
| 	DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * The gate to release perf_events not marked in | ||||
| 	 * pmc_in_use only once in a vcpu time slice. | ||||
| 	 */ | ||||
| 	bool need_cleanup; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * The total number of programmed perf_events and it helps to avoid | ||||
| 	 * redundant check before cleanup if guest don't use vPMU at all. | ||||
| 	 */ | ||||
| 	u8 event_count; | ||||
| }; | ||||
| 
 | ||||
| struct kvm_pmu_ops; | ||||
| @ -565,6 +582,7 @@ struct kvm_vcpu_arch { | ||||
| 	u64 smbase; | ||||
| 	u64 smi_count; | ||||
| 	bool tpr_access_reporting; | ||||
| 	bool xsaves_enabled; | ||||
| 	u64 ia32_xss; | ||||
| 	u64 microcode_version; | ||||
| 	u64 arch_capabilities; | ||||
| @ -1041,7 +1059,6 @@ struct kvm_x86_ops { | ||||
| 			    struct kvm_segment *var, int seg); | ||||
| 	void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); | ||||
| 	void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu); | ||||
| 	void (*decache_cr3)(struct kvm_vcpu *vcpu); | ||||
| 	void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); | ||||
| 	void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); | ||||
| 	void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); | ||||
| @ -1090,7 +1107,7 @@ struct kvm_x86_ops { | ||||
| 	void (*enable_nmi_window)(struct kvm_vcpu *vcpu); | ||||
| 	void (*enable_irq_window)(struct kvm_vcpu *vcpu); | ||||
| 	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); | ||||
| 	bool (*get_enable_apicv)(struct kvm_vcpu *vcpu); | ||||
| 	bool (*get_enable_apicv)(struct kvm *kvm); | ||||
| 	void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); | ||||
| 	void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); | ||||
| 	void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); | ||||
| @ -1578,6 +1595,8 @@ bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); | ||||
| 
 | ||||
| void kvm_make_mclock_inprogress_request(struct kvm *kvm); | ||||
| void kvm_make_scan_ioapic_request(struct kvm *kvm); | ||||
| void kvm_make_scan_ioapic_request_mask(struct kvm *kvm, | ||||
| 				       unsigned long *vcpu_bitmap); | ||||
| 
 | ||||
| void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, | ||||
| 				     struct kvm_async_pf *work); | ||||
|  | ||||
| @ -33,6 +33,7 @@ | ||||
| #include <asm/apicdef.h> | ||||
| #include <asm/hypervisor.h> | ||||
| #include <asm/tlb.h> | ||||
| #include <asm/cpuidle_haltpoll.h> | ||||
| 
 | ||||
| static int kvmapf = 1; | ||||
| 
 | ||||
|  | ||||
| @ -8,9 +8,9 @@ kvm-y			+= $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ | ||||
| 				$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o | ||||
| kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o | ||||
| 
 | ||||
| kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
 | ||||
| kvm-y			+= x86.o emulate.o i8259.o irq.o lapic.o \
 | ||||
| 			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
 | ||||
| 			   hyperv.o page_track.o debugfs.o | ||||
| 			   hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o | ||||
| 
 | ||||
| kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o vmx/evmcs.o vmx/nested.o | ||||
| kvm-amd-y		+= svm.o pmu_amd.o | ||||
|  | ||||
| @ -505,7 +505,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function, | ||||
| 
 | ||||
| 	r = -E2BIG; | ||||
| 
 | ||||
| 	if (*nent >= maxnent) | ||||
| 	if (WARN_ON(*nent >= maxnent)) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	do_host_cpuid(entry, function, 0); | ||||
| @ -780,6 +780,11 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function, | ||||
| 	case 0x8000001a: | ||||
| 	case 0x8000001e: | ||||
| 		break; | ||||
| 	/* Support memory encryption cpuid if host supports it */ | ||||
| 	case 0x8000001F: | ||||
| 		if (!boot_cpu_has(X86_FEATURE_SEV)) | ||||
| 			entry->eax = entry->ebx = entry->ecx = entry->edx = 0; | ||||
| 		break; | ||||
| 	/*Add support for Centaur's CPUID instruction*/ | ||||
| 	case 0xC0000000: | ||||
| 		/*Just support up to 0xC0000004 now*/ | ||||
| @ -812,6 +817,9 @@ out: | ||||
| static int do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 func, | ||||
| 			 int *nent, int maxnent, unsigned int type) | ||||
| { | ||||
| 	if (*nent >= maxnent) | ||||
| 		return -E2BIG; | ||||
| 
 | ||||
| 	if (type == KVM_GET_EMULATED_CPUID) | ||||
| 		return __do_cpuid_func_emulated(entry, func, nent, maxnent); | ||||
| 
 | ||||
|  | ||||
| @ -2770,11 +2770,10 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) | ||||
| 		return emulate_ud(ctxt); | ||||
| 
 | ||||
| 	ops->get_msr(ctxt, MSR_EFER, &efer); | ||||
| 	setup_syscalls_segments(ctxt, &cs, &ss); | ||||
| 
 | ||||
| 	if (!(efer & EFER_SCE)) | ||||
| 		return emulate_ud(ctxt); | ||||
| 
 | ||||
| 	setup_syscalls_segments(ctxt, &cs, &ss); | ||||
| 	ops->get_msr(ctxt, MSR_STAR, &msr_data); | ||||
| 	msr_data >>= 32; | ||||
| 	cs_sel = (u16)(msr_data & 0xfffc); | ||||
| @ -2838,12 +2837,11 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) | ||||
| 	if (ctxt->mode == X86EMUL_MODE_PROT64) | ||||
| 		return X86EMUL_UNHANDLEABLE; | ||||
| 
 | ||||
| 	setup_syscalls_segments(ctxt, &cs, &ss); | ||||
| 
 | ||||
| 	ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); | ||||
| 	if ((msr_data & 0xfffc) == 0x0) | ||||
| 		return emulate_gp(ctxt, 0); | ||||
| 
 | ||||
| 	setup_syscalls_segments(ctxt, &cs, &ss); | ||||
| 	ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF); | ||||
| 	cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK; | ||||
| 	ss_sel = cs_sel + 8; | ||||
|  | ||||
| @ -271,8 +271,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) | ||||
| { | ||||
| 	unsigned index; | ||||
| 	bool mask_before, mask_after; | ||||
| 	int old_remote_irr, old_delivery_status; | ||||
| 	union kvm_ioapic_redirect_entry *e; | ||||
| 	unsigned long vcpu_bitmap; | ||||
| 	int old_remote_irr, old_delivery_status, old_dest_id, old_dest_mode; | ||||
| 
 | ||||
| 	switch (ioapic->ioregsel) { | ||||
| 	case IOAPIC_REG_VERSION: | ||||
| @ -296,6 +297,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) | ||||
| 		/* Preserve read-only fields */ | ||||
| 		old_remote_irr = e->fields.remote_irr; | ||||
| 		old_delivery_status = e->fields.delivery_status; | ||||
| 		old_dest_id = e->fields.dest_id; | ||||
| 		old_dest_mode = e->fields.dest_mode; | ||||
| 		if (ioapic->ioregsel & 1) { | ||||
| 			e->bits &= 0xffffffff; | ||||
| 			e->bits |= (u64) val << 32; | ||||
| @ -321,7 +324,34 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) | ||||
| 		if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG | ||||
| 		    && ioapic->irr & (1 << index)) | ||||
| 			ioapic_service(ioapic, index, false); | ||||
| 		kvm_make_scan_ioapic_request(ioapic->kvm); | ||||
| 		if (e->fields.delivery_mode == APIC_DM_FIXED) { | ||||
| 			struct kvm_lapic_irq irq; | ||||
| 
 | ||||
| 			irq.shorthand = 0; | ||||
| 			irq.vector = e->fields.vector; | ||||
| 			irq.delivery_mode = e->fields.delivery_mode << 8; | ||||
| 			irq.dest_id = e->fields.dest_id; | ||||
| 			irq.dest_mode = e->fields.dest_mode; | ||||
| 			bitmap_zero(&vcpu_bitmap, 16); | ||||
| 			kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq, | ||||
| 						 &vcpu_bitmap); | ||||
| 			if (old_dest_mode != e->fields.dest_mode || | ||||
| 			    old_dest_id != e->fields.dest_id) { | ||||
| 				/*
 | ||||
| 				 * Update vcpu_bitmap with vcpus specified in | ||||
| 				 * the previous request as well. This is done to | ||||
| 				 * keep ioapic_handled_vectors synchronized. | ||||
| 				 */ | ||||
| 				irq.dest_id = old_dest_id; | ||||
| 				irq.dest_mode = old_dest_mode; | ||||
| 				kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq, | ||||
| 							 &vcpu_bitmap); | ||||
| 			} | ||||
| 			kvm_make_scan_ioapic_request_mask(ioapic->kvm, | ||||
| 							  &vcpu_bitmap); | ||||
| 		} else { | ||||
| 			kvm_make_scan_ioapic_request(ioapic->kvm); | ||||
| 		} | ||||
| 		break; | ||||
| 	} | ||||
| } | ||||
|  | ||||
| @ -37,22 +37,50 @@ BUILD_KVM_GPR_ACCESSORS(r14, R14) | ||||
| BUILD_KVM_GPR_ACCESSORS(r15, R15) | ||||
| #endif | ||||
| 
 | ||||
| static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, | ||||
| 					      enum kvm_reg reg) | ||||
| static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu, | ||||
| 					     enum kvm_reg reg) | ||||
| { | ||||
| 	if (!test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail)) | ||||
| 	return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); | ||||
| } | ||||
| 
 | ||||
| static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu, | ||||
| 					 enum kvm_reg reg) | ||||
| { | ||||
| 	return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); | ||||
| } | ||||
| 
 | ||||
| static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu, | ||||
| 					       enum kvm_reg reg) | ||||
| { | ||||
| 	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); | ||||
| } | ||||
| 
 | ||||
| static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu, | ||||
| 					   enum kvm_reg reg) | ||||
| { | ||||
| 	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); | ||||
| 	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); | ||||
| } | ||||
| 
 | ||||
| static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, int reg) | ||||
| { | ||||
| 	if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	if (!kvm_register_is_available(vcpu, reg)) | ||||
| 		kvm_x86_ops->cache_reg(vcpu, reg); | ||||
| 
 | ||||
| 	return vcpu->arch.regs[reg]; | ||||
| } | ||||
| 
 | ||||
| static inline void kvm_register_write(struct kvm_vcpu *vcpu, | ||||
| 				      enum kvm_reg reg, | ||||
| static inline void kvm_register_write(struct kvm_vcpu *vcpu, int reg, | ||||
| 				      unsigned long val) | ||||
| { | ||||
| 	if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS)) | ||||
| 		return; | ||||
| 
 | ||||
| 	vcpu->arch.regs[reg] = val; | ||||
| 	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); | ||||
| 	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); | ||||
| 	kvm_register_mark_dirty(vcpu, reg); | ||||
| } | ||||
| 
 | ||||
| static inline unsigned long kvm_rip_read(struct kvm_vcpu *vcpu) | ||||
| @ -79,9 +107,8 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) | ||||
| { | ||||
| 	might_sleep();  /* on svm */ | ||||
| 
 | ||||
| 	if (!test_bit(VCPU_EXREG_PDPTR, | ||||
| 		      (unsigned long *)&vcpu->arch.regs_avail)) | ||||
| 		kvm_x86_ops->cache_reg(vcpu, (enum kvm_reg)VCPU_EXREG_PDPTR); | ||||
| 	if (!kvm_register_is_available(vcpu, VCPU_EXREG_PDPTR)) | ||||
| 		kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR); | ||||
| 
 | ||||
| 	return vcpu->arch.walk_mmu->pdptrs[index]; | ||||
| } | ||||
| @ -109,8 +136,8 @@ static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask) | ||||
| 
 | ||||
| static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) | ||||
| 		kvm_x86_ops->decache_cr3(vcpu); | ||||
| 	if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3)) | ||||
| 		kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_CR3); | ||||
| 	return vcpu->arch.cr3; | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -557,60 +557,53 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, | ||||
| 			irq->level, irq->trig_mode, dest_map); | ||||
| } | ||||
| 
 | ||||
| static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map, | ||||
| 			 struct kvm_lapic_irq *irq, u32 min) | ||||
| { | ||||
| 	int i, count = 0; | ||||
| 	struct kvm_vcpu *vcpu; | ||||
| 
 | ||||
| 	if (min > map->max_apic_id) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	for_each_set_bit(i, ipi_bitmap, | ||||
| 		min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { | ||||
| 		if (map->phys_map[min + i]) { | ||||
| 			vcpu = map->phys_map[min + i]->vcpu; | ||||
| 			count += kvm_apic_set_irq(vcpu, irq, NULL); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return count; | ||||
| } | ||||
| 
 | ||||
| int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, | ||||
| 		    unsigned long ipi_bitmap_high, u32 min, | ||||
| 		    unsigned long icr, int op_64_bit) | ||||
| { | ||||
| 	int i; | ||||
| 	struct kvm_apic_map *map; | ||||
| 	struct kvm_vcpu *vcpu; | ||||
| 	struct kvm_lapic_irq irq = {0}; | ||||
| 	int cluster_size = op_64_bit ? 64 : 32; | ||||
| 	int count = 0; | ||||
| 	int count; | ||||
| 
 | ||||
| 	if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK)) | ||||
| 		return -KVM_EINVAL; | ||||
| 
 | ||||
| 	irq.vector = icr & APIC_VECTOR_MASK; | ||||
| 	irq.delivery_mode = icr & APIC_MODE_MASK; | ||||
| 	irq.level = (icr & APIC_INT_ASSERT) != 0; | ||||
| 	irq.trig_mode = icr & APIC_INT_LEVELTRIG; | ||||
| 
 | ||||
| 	if (icr & APIC_DEST_MASK) | ||||
| 		return -KVM_EINVAL; | ||||
| 	if (icr & APIC_SHORT_MASK) | ||||
| 		return -KVM_EINVAL; | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| 	map = rcu_dereference(kvm->arch.apic_map); | ||||
| 
 | ||||
| 	if (unlikely(!map)) { | ||||
| 		count = -EOPNOTSUPP; | ||||
| 		goto out; | ||||
| 	count = -EOPNOTSUPP; | ||||
| 	if (likely(map)) { | ||||
| 		count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min); | ||||
| 		min += cluster_size; | ||||
| 		count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min); | ||||
| 	} | ||||
| 
 | ||||
| 	if (min > map->max_apic_id) | ||||
| 		goto out; | ||||
| 	/* Bits above cluster_size are masked in the caller.  */ | ||||
| 	for_each_set_bit(i, &ipi_bitmap_low, | ||||
| 		min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { | ||||
| 		if (map->phys_map[min + i]) { | ||||
| 			vcpu = map->phys_map[min + i]->vcpu; | ||||
| 			count += kvm_apic_set_irq(vcpu, &irq, NULL); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	min += cluster_size; | ||||
| 
 | ||||
| 	if (min > map->max_apic_id) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	for_each_set_bit(i, &ipi_bitmap_high, | ||||
| 		min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { | ||||
| 		if (map->phys_map[min + i]) { | ||||
| 			vcpu = map->phys_map[min + i]->vcpu; | ||||
| 			count += kvm_apic_set_irq(vcpu, &irq, NULL); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| out: | ||||
| 	rcu_read_unlock(); | ||||
| 	return count; | ||||
| } | ||||
| @ -1124,6 +1117,50 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | ||||
| 	return result; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * This routine identifies the destination vcpus mask meant to receive the | ||||
|  * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find | ||||
|  * out the destination vcpus array and set the bitmap or it traverses to | ||||
|  * each available vcpu to identify the same. | ||||
|  */ | ||||
| void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, | ||||
| 			      unsigned long *vcpu_bitmap) | ||||
| { | ||||
| 	struct kvm_lapic **dest_vcpu = NULL; | ||||
| 	struct kvm_lapic *src = NULL; | ||||
| 	struct kvm_apic_map *map; | ||||
| 	struct kvm_vcpu *vcpu; | ||||
| 	unsigned long bitmap; | ||||
| 	int i, vcpu_idx; | ||||
| 	bool ret; | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| 	map = rcu_dereference(kvm->arch.apic_map); | ||||
| 
 | ||||
| 	ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu, | ||||
| 					  &bitmap); | ||||
| 	if (ret) { | ||||
| 		for_each_set_bit(i, &bitmap, 16) { | ||||
| 			if (!dest_vcpu[i]) | ||||
| 				continue; | ||||
| 			vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx; | ||||
| 			__set_bit(vcpu_idx, vcpu_bitmap); | ||||
| 		} | ||||
| 	} else { | ||||
| 		kvm_for_each_vcpu(i, vcpu, kvm) { | ||||
| 			if (!kvm_apic_present(vcpu)) | ||||
| 				continue; | ||||
| 			if (!kvm_apic_match_dest(vcpu, NULL, | ||||
| 						 irq->delivery_mode, | ||||
| 						 irq->dest_id, | ||||
| 						 irq->dest_mode)) | ||||
| 				continue; | ||||
| 			__set_bit(i, vcpu_bitmap); | ||||
| 		} | ||||
| 	} | ||||
| 	rcu_read_unlock(); | ||||
| } | ||||
| 
 | ||||
| int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) | ||||
| { | ||||
| 	return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; | ||||
| @ -2709,7 +2746,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) | ||||
| 	 * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs | ||||
| 	 * and leave the INIT pending. | ||||
| 	 */ | ||||
| 	if (is_smm(vcpu) || kvm_x86_ops->apic_init_signal_blocked(vcpu)) { | ||||
| 	if (kvm_vcpu_latch_init(vcpu)) { | ||||
| 		WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED); | ||||
| 		if (test_bit(KVM_APIC_SIPI, &apic->pending_events)) | ||||
| 			clear_bit(KVM_APIC_SIPI, &apic->pending_events); | ||||
|  | ||||
| @ -226,6 +226,9 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); | ||||
| 
 | ||||
| void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu); | ||||
| 
 | ||||
| void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, | ||||
| 			      unsigned long *vcpu_bitmap); | ||||
| 
 | ||||
| bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, | ||||
| 			struct kvm_vcpu **dest_vcpu); | ||||
| int kvm_vector_to_index(u32 vector, u32 dest_vcpus, | ||||
|  | ||||
| @ -4395,7 +4395,7 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3, | ||||
| 			kvm_make_request(KVM_REQ_LOAD_CR3, vcpu); | ||||
| 			if (!skip_tlb_flush) { | ||||
| 				kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); | ||||
| 				kvm_x86_ops->tlb_flush(vcpu, true); | ||||
| 				kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); | ||||
| 			} | ||||
| 
 | ||||
| 			/*
 | ||||
| @ -62,8 +62,7 @@ static void kvm_perf_overflow(struct perf_event *perf_event, | ||||
| 	struct kvm_pmc *pmc = perf_event->overflow_handler_context; | ||||
| 	struct kvm_pmu *pmu = pmc_to_pmu(pmc); | ||||
| 
 | ||||
| 	if (!test_and_set_bit(pmc->idx, | ||||
| 			      (unsigned long *)&pmu->reprogram_pmi)) { | ||||
| 	if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) { | ||||
| 		__set_bit(pmc->idx, (unsigned long *)&pmu->global_status); | ||||
| 		kvm_make_request(KVM_REQ_PMU, pmc->vcpu); | ||||
| 	} | ||||
| @ -76,8 +75,7 @@ static void kvm_perf_overflow_intr(struct perf_event *perf_event, | ||||
| 	struct kvm_pmc *pmc = perf_event->overflow_handler_context; | ||||
| 	struct kvm_pmu *pmu = pmc_to_pmu(pmc); | ||||
| 
 | ||||
| 	if (!test_and_set_bit(pmc->idx, | ||||
| 			      (unsigned long *)&pmu->reprogram_pmi)) { | ||||
| 	if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) { | ||||
| 		__set_bit(pmc->idx, (unsigned long *)&pmu->global_status); | ||||
| 		kvm_make_request(KVM_REQ_PMU, pmc->vcpu); | ||||
| 
 | ||||
| @ -137,7 +135,37 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, | ||||
| 	} | ||||
| 
 | ||||
| 	pmc->perf_event = event; | ||||
| 	clear_bit(pmc->idx, (unsigned long*)&pmc_to_pmu(pmc)->reprogram_pmi); | ||||
| 	pmc_to_pmu(pmc)->event_count++; | ||||
| 	clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi); | ||||
| } | ||||
| 
 | ||||
| static void pmc_pause_counter(struct kvm_pmc *pmc) | ||||
| { | ||||
| 	u64 counter = pmc->counter; | ||||
| 
 | ||||
| 	if (!pmc->perf_event) | ||||
| 		return; | ||||
| 
 | ||||
| 	/* update counter, reset event value to avoid redundant accumulation */ | ||||
| 	counter += perf_event_pause(pmc->perf_event, true); | ||||
| 	pmc->counter = counter & pmc_bitmask(pmc); | ||||
| } | ||||
| 
 | ||||
| static bool pmc_resume_counter(struct kvm_pmc *pmc) | ||||
| { | ||||
| 	if (!pmc->perf_event) | ||||
| 		return false; | ||||
| 
 | ||||
| 	/* recalibrate sample period and check if it's accepted by perf core */ | ||||
| 	if (perf_event_period(pmc->perf_event, | ||||
| 			(-pmc->counter) & pmc_bitmask(pmc))) | ||||
| 		return false; | ||||
| 
 | ||||
| 	/* reuse perf_event to serve as pmc_reprogram_counter() does*/ | ||||
| 	perf_event_enable(pmc->perf_event); | ||||
| 
 | ||||
| 	clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi); | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) | ||||
| @ -154,7 +182,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) | ||||
| 
 | ||||
| 	pmc->eventsel = eventsel; | ||||
| 
 | ||||
| 	pmc_stop_counter(pmc); | ||||
| 	pmc_pause_counter(pmc); | ||||
| 
 | ||||
| 	if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc)) | ||||
| 		return; | ||||
| @ -193,6 +221,12 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) | ||||
| 	if (type == PERF_TYPE_RAW) | ||||
| 		config = eventsel & X86_RAW_EVENT_MASK; | ||||
| 
 | ||||
| 	if (pmc->current_config == eventsel && pmc_resume_counter(pmc)) | ||||
| 		return; | ||||
| 
 | ||||
| 	pmc_release_perf_event(pmc); | ||||
| 
 | ||||
| 	pmc->current_config = eventsel; | ||||
| 	pmc_reprogram_counter(pmc, type, config, | ||||
| 			      !(eventsel & ARCH_PERFMON_EVENTSEL_USR), | ||||
| 			      !(eventsel & ARCH_PERFMON_EVENTSEL_OS), | ||||
| @ -209,7 +243,7 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx) | ||||
| 	struct kvm_pmu_event_filter *filter; | ||||
| 	struct kvm *kvm = pmc->vcpu->kvm; | ||||
| 
 | ||||
| 	pmc_stop_counter(pmc); | ||||
| 	pmc_pause_counter(pmc); | ||||
| 
 | ||||
| 	if (!en_field || !pmc_is_enabled(pmc)) | ||||
| 		return; | ||||
| @ -224,6 +258,12 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx) | ||||
| 			return; | ||||
| 	} | ||||
| 
 | ||||
| 	if (pmc->current_config == (u64)ctrl && pmc_resume_counter(pmc)) | ||||
| 		return; | ||||
| 
 | ||||
| 	pmc_release_perf_event(pmc); | ||||
| 
 | ||||
| 	pmc->current_config = (u64)ctrl; | ||||
| 	pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE, | ||||
| 			      kvm_x86_ops->pmu_ops->find_fixed_event(idx), | ||||
| 			      !(en_field & 0x2), /* exclude user */ | ||||
| @ -253,27 +293,32 @@ EXPORT_SYMBOL_GPL(reprogram_counter); | ||||
| void kvm_pmu_handle_event(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||||
| 	u64 bitmask; | ||||
| 	int bit; | ||||
| 
 | ||||
| 	bitmask = pmu->reprogram_pmi; | ||||
| 
 | ||||
| 	for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) { | ||||
| 	for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) { | ||||
| 		struct kvm_pmc *pmc = kvm_x86_ops->pmu_ops->pmc_idx_to_pmc(pmu, bit); | ||||
| 
 | ||||
| 		if (unlikely(!pmc || !pmc->perf_event)) { | ||||
| 			clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi); | ||||
| 			clear_bit(bit, pmu->reprogram_pmi); | ||||
| 			continue; | ||||
| 		} | ||||
| 
 | ||||
| 		reprogram_counter(pmu, bit); | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Unused perf_events are only released if the corresponding MSRs | ||||
| 	 * weren't accessed during the last vCPU time slice. kvm_arch_sched_in | ||||
| 	 * triggers KVM_REQ_PMU if cleanup is needed. | ||||
| 	 */ | ||||
| 	if (unlikely(pmu->need_cleanup)) | ||||
| 		kvm_pmu_cleanup(vcpu); | ||||
| } | ||||
| 
 | ||||
| /* check if idx is a valid index to access PMU */ | ||||
| int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx) | ||||
| int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) | ||||
| { | ||||
| 	return kvm_x86_ops->pmu_ops->is_valid_msr_idx(vcpu, idx); | ||||
| 	return kvm_x86_ops->pmu_ops->is_valid_rdpmc_ecx(vcpu, idx); | ||||
| } | ||||
| 
 | ||||
| bool is_vmware_backdoor_pmc(u32 pmc_idx) | ||||
| @ -323,7 +368,7 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) | ||||
| 	if (is_vmware_backdoor_pmc(idx)) | ||||
| 		return kvm_pmu_rdpmc_vmware(vcpu, idx, data); | ||||
| 
 | ||||
| 	pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx, &mask); | ||||
| 	pmc = kvm_x86_ops->pmu_ops->rdpmc_ecx_to_pmc(vcpu, idx, &mask); | ||||
| 	if (!pmc) | ||||
| 		return 1; | ||||
| 
 | ||||
| @ -339,7 +384,17 @@ void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu) | ||||
| 
 | ||||
| bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) | ||||
| { | ||||
| 	return kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, msr); | ||||
| 	return kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, msr) || | ||||
| 		kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, msr); | ||||
| } | ||||
| 
 | ||||
| static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr) | ||||
| { | ||||
| 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||||
| 	struct kvm_pmc *pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, msr); | ||||
| 
 | ||||
| 	if (pmc) | ||||
| 		__set_bit(pmc->idx, pmu->pmc_in_use); | ||||
| } | ||||
| 
 | ||||
| int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) | ||||
| @ -349,6 +404,7 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) | ||||
| 
 | ||||
| int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | ||||
| { | ||||
| 	kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index); | ||||
| 	return kvm_x86_ops->pmu_ops->set_msr(vcpu, msr_info); | ||||
| } | ||||
| 
 | ||||
| @ -376,9 +432,45 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu) | ||||
| 	memset(pmu, 0, sizeof(*pmu)); | ||||
| 	kvm_x86_ops->pmu_ops->init(vcpu); | ||||
| 	init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn); | ||||
| 	pmu->event_count = 0; | ||||
| 	pmu->need_cleanup = false; | ||||
| 	kvm_pmu_refresh(vcpu); | ||||
| } | ||||
| 
 | ||||
| static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc) | ||||
| { | ||||
| 	struct kvm_pmu *pmu = pmc_to_pmu(pmc); | ||||
| 
 | ||||
| 	if (pmc_is_fixed(pmc)) | ||||
| 		return fixed_ctrl_field(pmu->fixed_ctr_ctrl, | ||||
| 			pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3; | ||||
| 
 | ||||
| 	return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE; | ||||
| } | ||||
| 
 | ||||
| /* Release perf_events for vPMCs that have been unused for a full time slice.  */ | ||||
| void kvm_pmu_cleanup(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||||
| 	struct kvm_pmc *pmc = NULL; | ||||
| 	DECLARE_BITMAP(bitmask, X86_PMC_IDX_MAX); | ||||
| 	int i; | ||||
| 
 | ||||
| 	pmu->need_cleanup = false; | ||||
| 
 | ||||
| 	bitmap_andnot(bitmask, pmu->all_valid_pmc_idx, | ||||
| 		      pmu->pmc_in_use, X86_PMC_IDX_MAX); | ||||
| 
 | ||||
| 	for_each_set_bit(i, bitmask, X86_PMC_IDX_MAX) { | ||||
| 		pmc = kvm_x86_ops->pmu_ops->pmc_idx_to_pmc(pmu, i); | ||||
| 
 | ||||
| 		if (pmc && pmc->perf_event && !pmc_speculative_in_use(pmc)) | ||||
| 			pmc_stop_counter(pmc); | ||||
| 	} | ||||
| 
 | ||||
| 	bitmap_zero(pmu->pmc_in_use, X86_PMC_IDX_MAX); | ||||
| } | ||||
| 
 | ||||
| void kvm_pmu_destroy(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	kvm_pmu_reset(vcpu); | ||||
|  | ||||
| @ -25,9 +25,10 @@ struct kvm_pmu_ops { | ||||
| 	unsigned (*find_fixed_event)(int idx); | ||||
| 	bool (*pmc_is_enabled)(struct kvm_pmc *pmc); | ||||
| 	struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx); | ||||
| 	struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, unsigned idx, | ||||
| 					  u64 *mask); | ||||
| 	int (*is_valid_msr_idx)(struct kvm_vcpu *vcpu, unsigned idx); | ||||
| 	struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu, | ||||
| 		unsigned int idx, u64 *mask); | ||||
| 	struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, u32 msr); | ||||
| 	int (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx); | ||||
| 	bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr); | ||||
| 	int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 *data); | ||||
| 	int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info); | ||||
| @ -55,12 +56,21 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc) | ||||
| 	return counter & pmc_bitmask(pmc); | ||||
| } | ||||
| 
 | ||||
| static inline void pmc_release_perf_event(struct kvm_pmc *pmc) | ||||
| { | ||||
| 	if (pmc->perf_event) { | ||||
| 		perf_event_release_kernel(pmc->perf_event); | ||||
| 		pmc->perf_event = NULL; | ||||
| 		pmc->current_config = 0; | ||||
| 		pmc_to_pmu(pmc)->event_count--; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static inline void pmc_stop_counter(struct kvm_pmc *pmc) | ||||
| { | ||||
| 	if (pmc->perf_event) { | ||||
| 		pmc->counter = pmc_read_counter(pmc); | ||||
| 		perf_event_release_kernel(pmc->perf_event); | ||||
| 		pmc->perf_event = NULL; | ||||
| 		pmc_release_perf_event(pmc); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| @ -79,6 +89,12 @@ static inline bool pmc_is_enabled(struct kvm_pmc *pmc) | ||||
| 	return kvm_x86_ops->pmu_ops->pmc_is_enabled(pmc); | ||||
| } | ||||
| 
 | ||||
| static inline bool kvm_valid_perf_global_ctrl(struct kvm_pmu *pmu, | ||||
| 						 u64 data) | ||||
| { | ||||
| 	return !(pmu->global_ctrl_mask & data); | ||||
| } | ||||
| 
 | ||||
| /* returns general purpose PMC with the specified MSR. Note that it can be
 | ||||
|  * used for both PERFCTRn and EVNTSELn; that is why it accepts base as a | ||||
|  * paramenter to tell them apart. | ||||
| @ -110,13 +126,14 @@ void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx); | ||||
| void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu); | ||||
| void kvm_pmu_handle_event(struct kvm_vcpu *vcpu); | ||||
| int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); | ||||
| int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx); | ||||
| int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx); | ||||
| bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr); | ||||
| int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); | ||||
| int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); | ||||
| void kvm_pmu_refresh(struct kvm_vcpu *vcpu); | ||||
| void kvm_pmu_reset(struct kvm_vcpu *vcpu); | ||||
| void kvm_pmu_init(struct kvm_vcpu *vcpu); | ||||
| void kvm_pmu_cleanup(struct kvm_vcpu *vcpu); | ||||
| void kvm_pmu_destroy(struct kvm_vcpu *vcpu); | ||||
| int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp); | ||||
| 
 | ||||
|  | ||||
| @ -174,7 +174,7 @@ static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) | ||||
| } | ||||
| 
 | ||||
| /* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */ | ||||
| static int amd_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx) | ||||
| static int amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) | ||||
| { | ||||
| 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||||
| 
 | ||||
| @ -184,7 +184,8 @@ static int amd_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx) | ||||
| } | ||||
| 
 | ||||
| /* idx is the ECX register of RDPMC instruction */ | ||||
| static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *mask) | ||||
| static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu, | ||||
| 	unsigned int idx, u64 *mask) | ||||
| { | ||||
| 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||||
| 	struct kvm_pmc *counters; | ||||
| @ -198,14 +199,20 @@ static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, unsigned idx, u | ||||
| } | ||||
| 
 | ||||
| static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) | ||||
| { | ||||
| 	/* All MSRs refer to exactly one PMC, so msr_idx_to_pmc is enough.  */ | ||||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr) | ||||
| { | ||||
| 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||||
| 	int ret = false; | ||||
| 	struct kvm_pmc *pmc; | ||||
| 
 | ||||
| 	ret = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER) || | ||||
| 		get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL); | ||||
| 	pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER); | ||||
| 	pmc = pmc ? pmc : get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL); | ||||
| 
 | ||||
| 	return ret; | ||||
| 	return pmc; | ||||
| } | ||||
| 
 | ||||
| static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) | ||||
| @ -272,6 +279,7 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu) | ||||
| 	pmu->counter_bitmask[KVM_PMC_FIXED] = 0; | ||||
| 	pmu->nr_arch_fixed_counters = 0; | ||||
| 	pmu->global_status = 0; | ||||
| 	bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters); | ||||
| } | ||||
| 
 | ||||
| static void amd_pmu_init(struct kvm_vcpu *vcpu) | ||||
| @ -285,6 +293,7 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu) | ||||
| 		pmu->gp_counters[i].type = KVM_PMC_GP; | ||||
| 		pmu->gp_counters[i].vcpu = vcpu; | ||||
| 		pmu->gp_counters[i].idx = i; | ||||
| 		pmu->gp_counters[i].current_config = 0; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| @ -306,8 +315,9 @@ struct kvm_pmu_ops amd_pmu_ops = { | ||||
| 	.find_fixed_event = amd_find_fixed_event, | ||||
| 	.pmc_is_enabled = amd_pmc_is_enabled, | ||||
| 	.pmc_idx_to_pmc = amd_pmc_idx_to_pmc, | ||||
| 	.rdpmc_ecx_to_pmc = amd_rdpmc_ecx_to_pmc, | ||||
| 	.msr_idx_to_pmc = amd_msr_idx_to_pmc, | ||||
| 	.is_valid_msr_idx = amd_is_valid_msr_idx, | ||||
| 	.is_valid_rdpmc_ecx = amd_is_valid_rdpmc_ecx, | ||||
| 	.is_valid_msr = amd_is_valid_msr, | ||||
| 	.get_msr = amd_pmu_get_msr, | ||||
| 	.set_msr = amd_pmu_set_msr, | ||||
|  | ||||
| @ -38,6 +38,7 @@ | ||||
| #include <linux/file.h> | ||||
| #include <linux/pagemap.h> | ||||
| #include <linux/swap.h> | ||||
| #include <linux/rwsem.h> | ||||
| 
 | ||||
| #include <asm/apic.h> | ||||
| #include <asm/perf_event.h> | ||||
| @ -418,9 +419,13 @@ enum { | ||||
| 
 | ||||
| #define VMCB_AVIC_APIC_BAR_MASK		0xFFFFFFFFFF000ULL | ||||
| 
 | ||||
| static int sev_flush_asids(void); | ||||
| static DECLARE_RWSEM(sev_deactivate_lock); | ||||
| static DEFINE_MUTEX(sev_bitmap_lock); | ||||
| static unsigned int max_sev_asid; | ||||
| static unsigned int min_sev_asid; | ||||
| static unsigned long *sev_asid_bitmap; | ||||
| static unsigned long *sev_reclaim_asid_bitmap; | ||||
| #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT) | ||||
| 
 | ||||
| struct enc_region { | ||||
| @ -1235,11 +1240,15 @@ static __init int sev_hardware_setup(void) | ||||
| 	/* Minimum ASID value that should be used for SEV guest */ | ||||
| 	min_sev_asid = cpuid_edx(0x8000001F); | ||||
| 
 | ||||
| 	/* Initialize SEV ASID bitmap */ | ||||
| 	/* Initialize SEV ASID bitmaps */ | ||||
| 	sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); | ||||
| 	if (!sev_asid_bitmap) | ||||
| 		return 1; | ||||
| 
 | ||||
| 	sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); | ||||
| 	if (!sev_reclaim_asid_bitmap) | ||||
| 		return 1; | ||||
| 
 | ||||
| 	status = kmalloc(sizeof(*status), GFP_KERNEL); | ||||
| 	if (!status) | ||||
| 		return 1; | ||||
| @ -1418,8 +1427,12 @@ static __exit void svm_hardware_unsetup(void) | ||||
| { | ||||
| 	int cpu; | ||||
| 
 | ||||
| 	if (svm_sev_enabled()) | ||||
| 	if (svm_sev_enabled()) { | ||||
| 		bitmap_free(sev_asid_bitmap); | ||||
| 		bitmap_free(sev_reclaim_asid_bitmap); | ||||
| 
 | ||||
| 		sev_flush_asids(); | ||||
| 	} | ||||
| 
 | ||||
| 	for_each_possible_cpu(cpu) | ||||
| 		svm_cpu_uninit(cpu); | ||||
| @ -1729,25 +1742,22 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static void __sev_asid_free(int asid) | ||||
| static void sev_asid_free(int asid) | ||||
| { | ||||
| 	struct svm_cpu_data *sd; | ||||
| 	int cpu, pos; | ||||
| 
 | ||||
| 	mutex_lock(&sev_bitmap_lock); | ||||
| 
 | ||||
| 	pos = asid - 1; | ||||
| 	clear_bit(pos, sev_asid_bitmap); | ||||
| 	__set_bit(pos, sev_reclaim_asid_bitmap); | ||||
| 
 | ||||
| 	for_each_possible_cpu(cpu) { | ||||
| 		sd = per_cpu(svm_data, cpu); | ||||
| 		sd->sev_vmcbs[pos] = NULL; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void sev_asid_free(struct kvm *kvm) | ||||
| { | ||||
| 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; | ||||
| 
 | ||||
| 	__sev_asid_free(sev->asid); | ||||
| 	mutex_unlock(&sev_bitmap_lock); | ||||
| } | ||||
| 
 | ||||
| static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) | ||||
| @ -1764,10 +1774,12 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) | ||||
| 
 | ||||
| 	/* deactivate handle */ | ||||
| 	data->handle = handle; | ||||
| 	sev_guest_deactivate(data, NULL); | ||||
| 
 | ||||
| 	wbinvd_on_all_cpus(); | ||||
| 	sev_guest_df_flush(NULL); | ||||
| 	/* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */ | ||||
| 	down_read(&sev_deactivate_lock); | ||||
| 	sev_guest_deactivate(data, NULL); | ||||
| 	up_read(&sev_deactivate_lock); | ||||
| 
 | ||||
| 	kfree(data); | ||||
| 
 | ||||
| 	decommission = kzalloc(sizeof(*decommission), GFP_KERNEL); | ||||
| @ -1916,7 +1928,7 @@ static void sev_vm_destroy(struct kvm *kvm) | ||||
| 	mutex_unlock(&kvm->lock); | ||||
| 
 | ||||
| 	sev_unbind_asid(kvm, sev->handle); | ||||
| 	sev_asid_free(kvm); | ||||
| 	sev_asid_free(sev->asid); | ||||
| } | ||||
| 
 | ||||
| static void avic_vm_destroy(struct kvm *kvm) | ||||
| @ -2370,7 +2382,7 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) | ||||
| 		load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); | ||||
| 		break; | ||||
| 	default: | ||||
| 		BUG(); | ||||
| 		WARN_ON_ONCE(1); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| @ -2523,10 +2535,6 @@ static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| } | ||||
| 
 | ||||
| static void svm_decache_cr3(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| } | ||||
| 
 | ||||
| static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| } | ||||
| @ -4997,6 +5005,18 @@ static int handle_exit(struct kvm_vcpu *vcpu) | ||||
| 		return 0; | ||||
| 	} | ||||
| 
 | ||||
| #ifdef CONFIG_RETPOLINE | ||||
| 	if (exit_code == SVM_EXIT_MSR) | ||||
| 		return msr_interception(svm); | ||||
| 	else if (exit_code == SVM_EXIT_VINTR) | ||||
| 		return interrupt_window_interception(svm); | ||||
| 	else if (exit_code == SVM_EXIT_INTR) | ||||
| 		return intr_interception(svm); | ||||
| 	else if (exit_code == SVM_EXIT_HLT) | ||||
| 		return halt_interception(svm); | ||||
| 	else if (exit_code == SVM_EXIT_NPF) | ||||
| 		return npf_interception(svm); | ||||
| #endif | ||||
| 	return svm_exit_handlers[exit_code](svm); | ||||
| } | ||||
| 
 | ||||
| @ -5092,8 +5112,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | ||||
| { | ||||
| 	struct vcpu_svm *svm = to_svm(vcpu); | ||||
| 
 | ||||
| 	if (svm_nested_virtualize_tpr(vcpu) || | ||||
| 	    kvm_vcpu_apicv_active(vcpu)) | ||||
| 	if (svm_nested_virtualize_tpr(vcpu)) | ||||
| 		return; | ||||
| 
 | ||||
| 	clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); | ||||
| @ -5110,9 +5129,9 @@ static void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu) | ||||
| 	return; | ||||
| } | ||||
| 
 | ||||
| static bool svm_get_enable_apicv(struct kvm_vcpu *vcpu) | ||||
| static bool svm_get_enable_apicv(struct kvm *kvm) | ||||
| { | ||||
| 	return avic && irqchip_split(vcpu->kvm); | ||||
| 	return avic && irqchip_split(kvm); | ||||
| } | ||||
| 
 | ||||
| static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) | ||||
| @ -5634,7 +5653,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | ||||
| 	svm->vmcb->save.cr2 = vcpu->arch.cr2; | ||||
| 
 | ||||
| 	clgi(); | ||||
| 	kvm_load_guest_xcr0(vcpu); | ||||
| 	kvm_load_guest_xsave_state(vcpu); | ||||
| 
 | ||||
| 	if (lapic_in_kernel(vcpu) && | ||||
| 		vcpu->arch.apic->lapic_timer.timer_advance_ns) | ||||
| @ -5784,7 +5803,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | ||||
| 	if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) | ||||
| 		kvm_before_interrupt(&svm->vcpu); | ||||
| 
 | ||||
| 	kvm_put_guest_xcr0(vcpu); | ||||
| 	kvm_load_host_xsave_state(vcpu); | ||||
| 	stgi(); | ||||
| 
 | ||||
| 	/* Any pending NMI will happen here */ | ||||
| @ -5893,6 +5912,9 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	struct vcpu_svm *svm = to_svm(vcpu); | ||||
| 
 | ||||
| 	vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && | ||||
| 				    boot_cpu_has(X86_FEATURE_XSAVES); | ||||
| 
 | ||||
| 	/* Update nrips enabled cache */ | ||||
| 	svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS); | ||||
| 
 | ||||
| @ -5936,13 +5958,6 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | ||||
| 		if (npt_enabled) | ||||
| 			entry->edx |= F(NPT); | ||||
| 
 | ||||
| 		break; | ||||
| 	case 0x8000001F: | ||||
| 		/* Support memory encryption cpuid if host supports it */ | ||||
| 		if (boot_cpu_has(X86_FEATURE_SEV)) | ||||
| 			cpuid(0x8000001f, &entry->eax, &entry->ebx, | ||||
| 				&entry->ecx, &entry->edx); | ||||
| 
 | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| @ -5968,7 +5983,7 @@ static bool svm_mpx_supported(void) | ||||
| 
 | ||||
| static bool svm_xsaves_supported(void) | ||||
| { | ||||
| 	return false; | ||||
| 	return boot_cpu_has(X86_FEATURE_XSAVES); | ||||
| } | ||||
| 
 | ||||
| static bool svm_umip_emulated(void) | ||||
| @ -6270,18 +6285,73 @@ static int enable_smi_window(struct kvm_vcpu *vcpu) | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int sev_asid_new(void) | ||||
| static int sev_flush_asids(void) | ||||
| { | ||||
| 	int ret, error; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * DEACTIVATE will clear the WBINVD indicator causing DF_FLUSH to fail, | ||||
| 	 * so it must be guarded. | ||||
| 	 */ | ||||
| 	down_write(&sev_deactivate_lock); | ||||
| 
 | ||||
| 	wbinvd_on_all_cpus(); | ||||
| 	ret = sev_guest_df_flush(&error); | ||||
| 
 | ||||
| 	up_write(&sev_deactivate_lock); | ||||
| 
 | ||||
| 	if (ret) | ||||
| 		pr_err("SEV: DF_FLUSH failed, ret=%d, error=%#x\n", ret, error); | ||||
| 
 | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /* Must be called with the sev_bitmap_lock held */ | ||||
| static bool __sev_recycle_asids(void) | ||||
| { | ||||
| 	int pos; | ||||
| 
 | ||||
| 	/* Check if there are any ASIDs to reclaim before performing a flush */ | ||||
| 	pos = find_next_bit(sev_reclaim_asid_bitmap, | ||||
| 			    max_sev_asid, min_sev_asid - 1); | ||||
| 	if (pos >= max_sev_asid) | ||||
| 		return false; | ||||
| 
 | ||||
| 	if (sev_flush_asids()) | ||||
| 		return false; | ||||
| 
 | ||||
| 	bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap, | ||||
| 		   max_sev_asid); | ||||
| 	bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid); | ||||
| 
 | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| static int sev_asid_new(void) | ||||
| { | ||||
| 	bool retry = true; | ||||
| 	int pos; | ||||
| 
 | ||||
| 	mutex_lock(&sev_bitmap_lock); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid. | ||||
| 	 */ | ||||
| again: | ||||
| 	pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1); | ||||
| 	if (pos >= max_sev_asid) | ||||
| 	if (pos >= max_sev_asid) { | ||||
| 		if (retry && __sev_recycle_asids()) { | ||||
| 			retry = false; | ||||
| 			goto again; | ||||
| 		} | ||||
| 		mutex_unlock(&sev_bitmap_lock); | ||||
| 		return -EBUSY; | ||||
| 	} | ||||
| 
 | ||||
| 	__set_bit(pos, sev_asid_bitmap); | ||||
| 
 | ||||
| 	mutex_unlock(&sev_bitmap_lock); | ||||
| 
 | ||||
| 	set_bit(pos, sev_asid_bitmap); | ||||
| 	return pos + 1; | ||||
| } | ||||
| 
 | ||||
| @ -6309,7 +6379,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) | ||||
| 	return 0; | ||||
| 
 | ||||
| e_free: | ||||
| 	__sev_asid_free(asid); | ||||
| 	sev_asid_free(asid); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| @ -6319,12 +6389,6 @@ static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) | ||||
| 	int asid = sev_get_asid(kvm); | ||||
| 	int ret; | ||||
| 
 | ||||
| 	wbinvd_on_all_cpus(); | ||||
| 
 | ||||
| 	ret = sev_guest_df_flush(error); | ||||
| 	if (ret) | ||||
| 		return ret; | ||||
| 
 | ||||
| 	data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); | ||||
| 	if (!data) | ||||
| 		return -ENOMEM; | ||||
| @ -7214,7 +7278,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { | ||||
| 	.get_cpl = svm_get_cpl, | ||||
| 	.get_cs_db_l_bits = kvm_get_cs_db_l_bits, | ||||
| 	.decache_cr0_guest_bits = svm_decache_cr0_guest_bits, | ||||
| 	.decache_cr3 = svm_decache_cr3, | ||||
| 	.decache_cr4_guest_bits = svm_decache_cr4_guest_bits, | ||||
| 	.set_cr0 = svm_set_cr0, | ||||
| 	.set_cr3 = svm_set_cr3, | ||||
|  | ||||
| @ -10,6 +10,7 @@ | ||||
| #include "hyperv.h" | ||||
| #include "mmu.h" | ||||
| #include "nested.h" | ||||
| #include "pmu.h" | ||||
| #include "trace.h" | ||||
| #include "x86.h" | ||||
| 
 | ||||
| @ -27,6 +28,16 @@ module_param(nested_early_check, bool, S_IRUGO); | ||||
| 	failed;								\ | ||||
| }) | ||||
| 
 | ||||
| #define SET_MSR_OR_WARN(vcpu, idx, data)				\ | ||||
| ({									\ | ||||
| 	bool failed = kvm_set_msr(vcpu, idx, data);			\ | ||||
| 	if (failed)							\ | ||||
| 		pr_warn_ratelimited(					\ | ||||
| 				"%s cannot write MSR (0x%x, 0x%llx)\n",	\ | ||||
| 				__func__, idx, data);			\ | ||||
| 	failed;								\ | ||||
| }) | ||||
| 
 | ||||
| /*
 | ||||
|  * Hyper-V requires all of these, so mark them as supported even though | ||||
|  * they are just treated the same as all-context. | ||||
| @ -257,7 +268,7 @@ static void free_nested(struct kvm_vcpu *vcpu) | ||||
| 	vmx->nested.cached_shadow_vmcs12 = NULL; | ||||
| 	/* Unpin physical memory we referred to in the vmcs02 */ | ||||
| 	if (vmx->nested.apic_access_page) { | ||||
| 		kvm_release_page_dirty(vmx->nested.apic_access_page); | ||||
| 		kvm_release_page_clean(vmx->nested.apic_access_page); | ||||
| 		vmx->nested.apic_access_page = NULL; | ||||
| 	} | ||||
| 	kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); | ||||
| @ -929,6 +940,57 @@ fail: | ||||
| 	return i + 1; | ||||
| } | ||||
| 
 | ||||
| static bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu, | ||||
| 					    u32 msr_index, | ||||
| 					    u64 *data) | ||||
| { | ||||
| 	struct vcpu_vmx *vmx = to_vmx(vcpu); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If the L0 hypervisor stored a more accurate value for the TSC that | ||||
| 	 * does not include the time taken for emulation of the L2->L1 | ||||
| 	 * VM-exit in L0, use the more accurate value. | ||||
| 	 */ | ||||
| 	if (msr_index == MSR_IA32_TSC) { | ||||
| 		int index = vmx_find_msr_index(&vmx->msr_autostore.guest, | ||||
| 					       MSR_IA32_TSC); | ||||
| 
 | ||||
| 		if (index >= 0) { | ||||
| 			u64 val = vmx->msr_autostore.guest.val[index].value; | ||||
| 
 | ||||
| 			*data = kvm_read_l1_tsc(vcpu, val); | ||||
| 			return true; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if (kvm_get_msr(vcpu, msr_index, data)) { | ||||
| 		pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__, | ||||
| 			msr_index); | ||||
| 		return false; | ||||
| 	} | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| static bool read_and_check_msr_entry(struct kvm_vcpu *vcpu, u64 gpa, int i, | ||||
| 				     struct vmx_msr_entry *e) | ||||
| { | ||||
| 	if (kvm_vcpu_read_guest(vcpu, | ||||
| 				gpa + i * sizeof(*e), | ||||
| 				e, 2 * sizeof(u32))) { | ||||
| 		pr_debug_ratelimited( | ||||
| 			"%s cannot read MSR entry (%u, 0x%08llx)\n", | ||||
| 			__func__, i, gpa + i * sizeof(*e)); | ||||
| 		return false; | ||||
| 	} | ||||
| 	if (nested_vmx_store_msr_check(vcpu, e)) { | ||||
| 		pr_debug_ratelimited( | ||||
| 			"%s check failed (%u, 0x%x, 0x%x)\n", | ||||
| 			__func__, i, e->index, e->reserved); | ||||
| 		return false; | ||||
| 	} | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) | ||||
| { | ||||
| 	u64 data; | ||||
| @ -940,26 +1002,12 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) | ||||
| 		if (unlikely(i >= max_msr_list_size)) | ||||
| 			return -EINVAL; | ||||
| 
 | ||||
| 		if (kvm_vcpu_read_guest(vcpu, | ||||
| 					gpa + i * sizeof(e), | ||||
| 					&e, 2 * sizeof(u32))) { | ||||
| 			pr_debug_ratelimited( | ||||
| 				"%s cannot read MSR entry (%u, 0x%08llx)\n", | ||||
| 				__func__, i, gpa + i * sizeof(e)); | ||||
| 		if (!read_and_check_msr_entry(vcpu, gpa, i, &e)) | ||||
| 			return -EINVAL; | ||||
| 		} | ||||
| 		if (nested_vmx_store_msr_check(vcpu, &e)) { | ||||
| 			pr_debug_ratelimited( | ||||
| 				"%s check failed (%u, 0x%x, 0x%x)\n", | ||||
| 				__func__, i, e.index, e.reserved); | ||||
| 
 | ||||
| 		if (!nested_vmx_get_vmexit_msr_value(vcpu, e.index, &data)) | ||||
| 			return -EINVAL; | ||||
| 		} | ||||
| 		if (kvm_get_msr(vcpu, e.index, &data)) { | ||||
| 			pr_debug_ratelimited( | ||||
| 				"%s cannot read MSR (%u, 0x%x)\n", | ||||
| 				__func__, i, e.index); | ||||
| 			return -EINVAL; | ||||
| 		} | ||||
| 
 | ||||
| 		if (kvm_vcpu_write_guest(vcpu, | ||||
| 					 gpa + i * sizeof(e) + | ||||
| 					     offsetof(struct vmx_msr_entry, value), | ||||
| @ -973,6 +1021,60 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static bool nested_msr_store_list_has_msr(struct kvm_vcpu *vcpu, u32 msr_index) | ||||
| { | ||||
| 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||||
| 	u32 count = vmcs12->vm_exit_msr_store_count; | ||||
| 	u64 gpa = vmcs12->vm_exit_msr_store_addr; | ||||
| 	struct vmx_msr_entry e; | ||||
| 	u32 i; | ||||
| 
 | ||||
| 	for (i = 0; i < count; i++) { | ||||
| 		if (!read_and_check_msr_entry(vcpu, gpa, i, &e)) | ||||
| 			return false; | ||||
| 
 | ||||
| 		if (e.index == msr_index) | ||||
| 			return true; | ||||
| 	} | ||||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| static void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu, | ||||
| 					   u32 msr_index) | ||||
| { | ||||
| 	struct vcpu_vmx *vmx = to_vmx(vcpu); | ||||
| 	struct vmx_msrs *autostore = &vmx->msr_autostore.guest; | ||||
| 	bool in_vmcs12_store_list; | ||||
| 	int msr_autostore_index; | ||||
| 	bool in_autostore_list; | ||||
| 	int last; | ||||
| 
 | ||||
| 	msr_autostore_index = vmx_find_msr_index(autostore, msr_index); | ||||
| 	in_autostore_list = msr_autostore_index >= 0; | ||||
| 	in_vmcs12_store_list = nested_msr_store_list_has_msr(vcpu, msr_index); | ||||
| 
 | ||||
| 	if (in_vmcs12_store_list && !in_autostore_list) { | ||||
| 		if (autostore->nr == NR_LOADSTORE_MSRS) { | ||||
| 			/*
 | ||||
| 			 * Emulated VMEntry does not fail here.  Instead a less | ||||
| 			 * accurate value will be returned by | ||||
| 			 * nested_vmx_get_vmexit_msr_value() using kvm_get_msr() | ||||
| 			 * instead of reading the value from the vmcs02 VMExit | ||||
| 			 * MSR-store area. | ||||
| 			 */ | ||||
| 			pr_warn_ratelimited( | ||||
| 				"Not enough msr entries in msr_autostore.  Can't add msr %x\n", | ||||
| 				msr_index); | ||||
| 			return; | ||||
| 		} | ||||
| 		last = autostore->nr++; | ||||
| 		autostore->val[last].index = msr_index; | ||||
| 	} else if (!in_vmcs12_store_list && in_autostore_list) { | ||||
| 		last = --autostore->nr; | ||||
| 		autostore->val[msr_autostore_index] = autostore->val[last]; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val) | ||||
| { | ||||
| 	unsigned long invalid_mask; | ||||
| @ -1012,7 +1114,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne | ||||
| 		kvm_mmu_new_cr3(vcpu, cr3, false); | ||||
| 
 | ||||
| 	vcpu->arch.cr3 = cr3; | ||||
| 	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); | ||||
| 	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3); | ||||
| 
 | ||||
| 	kvm_init_mmu(vcpu, false); | ||||
| 
 | ||||
| @ -1024,7 +1126,9 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne | ||||
|  * populated by L2 differently than TLB entries populated | ||||
|  * by L1. | ||||
|  * | ||||
|  * If L1 uses EPT, then TLB entries are tagged with different EPTP. | ||||
|  * If L0 uses EPT, L1 and L2 run with different EPTP because | ||||
|  * guest_mode is part of kvm_mmu_page_role. Thus, TLB entries | ||||
|  * are tagged with different EPTP. | ||||
|  * | ||||
|  * If L1 uses VPID and we allocated a vpid02, TLB entries are tagged | ||||
|  * with different VPID (L1 entries are tagged with vmx->vpid | ||||
| @ -1034,7 +1138,7 @@ static bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||||
| 
 | ||||
| 	return nested_cpu_has_ept(vmcs12) || | ||||
| 	return enable_ept || | ||||
| 	       (nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02); | ||||
| } | ||||
| 
 | ||||
| @ -2018,7 +2122,7 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx) | ||||
| 	 * addresses are constant (for vmcs02), the counts can change based | ||||
| 	 * on L2's behavior, e.g. switching to/from long mode. | ||||
| 	 */ | ||||
| 	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); | ||||
| 	vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autostore.guest.val)); | ||||
| 	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val)); | ||||
| 	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val)); | ||||
| 
 | ||||
| @ -2073,6 +2177,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) | ||||
| 	exec_control &= ~CPU_BASED_TPR_SHADOW; | ||||
| 	exec_control |= vmcs12->cpu_based_vm_exec_control; | ||||
| 
 | ||||
| 	vmx->nested.l1_tpr_threshold = -1; | ||||
| 	if (exec_control & CPU_BASED_TPR_SHADOW) | ||||
| 		vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); | ||||
| #ifdef CONFIG_X86_64 | ||||
| @ -2285,6 +2390,13 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) | ||||
| 		vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3); | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Make sure the msr_autostore list is up to date before we set the | ||||
| 	 * count in the vmcs02. | ||||
| 	 */ | ||||
| 	prepare_vmx_msr_autostore_list(&vmx->vcpu, MSR_IA32_TSC); | ||||
| 
 | ||||
| 	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, vmx->msr_autostore.guest.nr); | ||||
| 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); | ||||
| 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); | ||||
| 
 | ||||
| @ -2381,9 +2493,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | ||||
| 
 | ||||
| 	if (nested_cpu_has_ept(vmcs12)) | ||||
| 		nested_ept_init_mmu_context(vcpu); | ||||
| 	else if (nested_cpu_has2(vmcs12, | ||||
| 				 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) | ||||
| 		vmx_flush_tlb(vcpu, true); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those | ||||
| @ -2418,6 +2527,16 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | ||||
| 				entry_failure_code)) | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Immediately write vmcs02.GUEST_CR3.  It will be propagated to vmcs12 | ||||
| 	 * on nested VM-Exit, which can occur without actually running L2 and | ||||
| 	 * thus without hitting vmx_set_cr3(), e.g. if L1 is entering L2 with | ||||
| 	 * vmcs12.GUEST_ACTIVITYSTATE=HLT, in which case KVM will intercept the | ||||
| 	 * transition to HLT instead of running L2. | ||||
| 	 */ | ||||
| 	if (enable_ept) | ||||
| 		vmcs_writel(GUEST_CR3, vmcs12->guest_cr3); | ||||
| 
 | ||||
| 	/* Late preparation of GUEST_PDPTRs now that EFER and CRs are set. */ | ||||
| 	if (load_guest_pdptrs_vmcs12 && nested_cpu_has_ept(vmcs12) && | ||||
| 	    is_pae_paging(vcpu)) { | ||||
| @ -2430,6 +2549,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | ||||
| 	if (!enable_ept) | ||||
| 		vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; | ||||
| 
 | ||||
| 	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) && | ||||
| 	    SET_MSR_OR_WARN(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, | ||||
| 			    vmcs12->guest_ia32_perf_global_ctrl)) | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| 	kvm_rsp_write(vcpu, vmcs12->guest_rsp); | ||||
| 	kvm_rip_write(vcpu, vmcs12->guest_rip); | ||||
| 	return 0; | ||||
| @ -2664,6 +2788,11 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, | ||||
| 	    CC(!kvm_pat_valid(vmcs12->host_ia32_pat))) | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| 	if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) && | ||||
| 	    CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu), | ||||
| 					   vmcs12->host_ia32_perf_global_ctrl))) | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| #ifdef CONFIG_X86_64 | ||||
| 	ia32e = !!(vcpu->arch.efer & EFER_LMA); | ||||
| #else | ||||
| @ -2779,6 +2908,11 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, | ||||
| 		return -EINVAL; | ||||
| 	} | ||||
| 
 | ||||
| 	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) && | ||||
| 	    CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu), | ||||
| 					   vmcs12->guest_ia32_perf_global_ctrl))) | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If the load IA32_EFER VM-entry control is 1, the following checks | ||||
| 	 * are performed on the field for the IA32_EFER MSR: | ||||
| @ -2933,7 +3067,7 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) | ||||
| 		 * to it so we can release it later. | ||||
| 		 */ | ||||
| 		if (vmx->nested.apic_access_page) { /* shouldn't happen */ | ||||
| 			kvm_release_page_dirty(vmx->nested.apic_access_page); | ||||
| 			kvm_release_page_clean(vmx->nested.apic_access_page); | ||||
| 			vmx->nested.apic_access_page = NULL; | ||||
| 		} | ||||
| 		page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr); | ||||
| @ -3461,6 +3595,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) | ||||
| 		test_bit(KVM_APIC_INIT, &apic->pending_events)) { | ||||
| 		if (block_nested_events) | ||||
| 			return -EBUSY; | ||||
| 		clear_bit(KVM_APIC_INIT, &apic->pending_events); | ||||
| 		nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0); | ||||
| 		return 0; | ||||
| 	} | ||||
| @ -3864,8 +3999,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | ||||
| 		vcpu->arch.pat = vmcs12->host_ia32_pat; | ||||
| 	} | ||||
| 	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) | ||||
| 		vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, | ||||
| 			vmcs12->host_ia32_perf_global_ctrl); | ||||
| 		SET_MSR_OR_WARN(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, | ||||
| 				vmcs12->host_ia32_perf_global_ctrl); | ||||
| 
 | ||||
| 	/* Set L1 segment info according to Intel SDM
 | ||||
| 	    27.5.2 Loading Host Segment and Descriptor-Table Registers */ | ||||
| @ -3984,7 +4119,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) | ||||
| 
 | ||||
| 	nested_ept_uninit_mmu_context(vcpu); | ||||
| 	vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | ||||
| 	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); | ||||
| 	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs | ||||
| @ -4112,6 +4247,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | ||||
| 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); | ||||
| 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); | ||||
| 	vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); | ||||
| 	if (vmx->nested.l1_tpr_threshold != -1) | ||||
| 		vmcs_write32(TPR_THRESHOLD, vmx->nested.l1_tpr_threshold); | ||||
| 
 | ||||
| 	if (kvm_has_tsc_control) | ||||
| 		decache_tsc_multiplier(vmx); | ||||
| @ -4119,15 +4256,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | ||||
| 	if (vmx->nested.change_vmcs01_virtual_apic_mode) { | ||||
| 		vmx->nested.change_vmcs01_virtual_apic_mode = false; | ||||
| 		vmx_set_virtual_apic_mode(vcpu); | ||||
| 	} else if (!nested_cpu_has_ept(vmcs12) && | ||||
| 		   nested_cpu_has2(vmcs12, | ||||
| 				   SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { | ||||
| 		vmx_flush_tlb(vcpu, true); | ||||
| 	} | ||||
| 
 | ||||
| 	/* Unpin physical memory we referred to in vmcs02 */ | ||||
| 	if (vmx->nested.apic_access_page) { | ||||
| 		kvm_release_page_dirty(vmx->nested.apic_access_page); | ||||
| 		kvm_release_page_clean(vmx->nested.apic_access_page); | ||||
| 		vmx->nested.apic_access_page = NULL; | ||||
| 	} | ||||
| 	kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); | ||||
| @ -4327,6 +4460,27 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	struct vcpu_vmx *vmx; | ||||
| 
 | ||||
| 	if (!nested_vmx_allowed(vcpu)) | ||||
| 		return; | ||||
| 
 | ||||
| 	vmx = to_vmx(vcpu); | ||||
| 	if (kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL)) { | ||||
| 		vmx->nested.msrs.entry_ctls_high |= | ||||
| 				VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; | ||||
| 		vmx->nested.msrs.exit_ctls_high |= | ||||
| 				VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; | ||||
| 	} else { | ||||
| 		vmx->nested.msrs.entry_ctls_high &= | ||||
| 				~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; | ||||
| 		vmx->nested.msrs.exit_ctls_high &= | ||||
| 				~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer) | ||||
| { | ||||
| 	gva_t gva; | ||||
| @ -5766,7 +5920,7 @@ error_guest_mode: | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| void nested_vmx_vcpu_setup(void) | ||||
| void nested_vmx_set_vmcs_shadowing_bitmap(void) | ||||
| { | ||||
| 	if (enable_shadow_vmcs) { | ||||
| 		vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap)); | ||||
| @ -6047,23 +6201,23 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) | ||||
| 		init_vmcs_shadow_fields(); | ||||
| 	} | ||||
| 
 | ||||
| 	exit_handlers[EXIT_REASON_VMCLEAR]	= handle_vmclear, | ||||
| 	exit_handlers[EXIT_REASON_VMLAUNCH]	= handle_vmlaunch, | ||||
| 	exit_handlers[EXIT_REASON_VMPTRLD]	= handle_vmptrld, | ||||
| 	exit_handlers[EXIT_REASON_VMPTRST]	= handle_vmptrst, | ||||
| 	exit_handlers[EXIT_REASON_VMREAD]	= handle_vmread, | ||||
| 	exit_handlers[EXIT_REASON_VMRESUME]	= handle_vmresume, | ||||
| 	exit_handlers[EXIT_REASON_VMWRITE]	= handle_vmwrite, | ||||
| 	exit_handlers[EXIT_REASON_VMOFF]	= handle_vmoff, | ||||
| 	exit_handlers[EXIT_REASON_VMON]		= handle_vmon, | ||||
| 	exit_handlers[EXIT_REASON_INVEPT]	= handle_invept, | ||||
| 	exit_handlers[EXIT_REASON_INVVPID]	= handle_invvpid, | ||||
| 	exit_handlers[EXIT_REASON_VMFUNC]	= handle_vmfunc, | ||||
| 	exit_handlers[EXIT_REASON_VMCLEAR]	= handle_vmclear; | ||||
| 	exit_handlers[EXIT_REASON_VMLAUNCH]	= handle_vmlaunch; | ||||
| 	exit_handlers[EXIT_REASON_VMPTRLD]	= handle_vmptrld; | ||||
| 	exit_handlers[EXIT_REASON_VMPTRST]	= handle_vmptrst; | ||||
| 	exit_handlers[EXIT_REASON_VMREAD]	= handle_vmread; | ||||
| 	exit_handlers[EXIT_REASON_VMRESUME]	= handle_vmresume; | ||||
| 	exit_handlers[EXIT_REASON_VMWRITE]	= handle_vmwrite; | ||||
| 	exit_handlers[EXIT_REASON_VMOFF]	= handle_vmoff; | ||||
| 	exit_handlers[EXIT_REASON_VMON]		= handle_vmon; | ||||
| 	exit_handlers[EXIT_REASON_INVEPT]	= handle_invept; | ||||
| 	exit_handlers[EXIT_REASON_INVVPID]	= handle_invvpid; | ||||
| 	exit_handlers[EXIT_REASON_VMFUNC]	= handle_vmfunc; | ||||
| 
 | ||||
| 	kvm_x86_ops->check_nested_events = vmx_check_nested_events; | ||||
| 	kvm_x86_ops->get_nested_state = vmx_get_nested_state; | ||||
| 	kvm_x86_ops->set_nested_state = vmx_set_nested_state; | ||||
| 	kvm_x86_ops->get_vmcs12_pages = nested_get_vmcs12_pages, | ||||
| 	kvm_x86_ops->get_vmcs12_pages = nested_get_vmcs12_pages; | ||||
| 	kvm_x86_ops->nested_enable_evmcs = nested_enable_evmcs; | ||||
| 	kvm_x86_ops->nested_get_evmcs_version = nested_get_evmcs_version; | ||||
| 
 | ||||
|  | ||||
| @ -21,7 +21,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps, | ||||
| 				bool apicv); | ||||
| void nested_vmx_hardware_unsetup(void); | ||||
| __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)); | ||||
| void nested_vmx_vcpu_setup(void); | ||||
| void nested_vmx_set_vmcs_shadowing_bitmap(void); | ||||
| void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu); | ||||
| enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, | ||||
| 						     bool from_vmentry); | ||||
| @ -33,6 +33,7 @@ int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); | ||||
| int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata); | ||||
| int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, | ||||
| 			u32 vmx_instruction_info, bool wr, int len, gva_t *ret); | ||||
| void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu); | ||||
| 
 | ||||
| static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| @ -256,7 +257,7 @@ static inline bool fixed_bits_valid(u64 val, u64 fixed0, u64 fixed1) | ||||
| 	return ((val & fixed1) | fixed0) == val; | ||||
| } | ||||
| 
 | ||||
| static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) | ||||
| static inline bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) | ||||
| { | ||||
| 	u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0; | ||||
| 	u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1; | ||||
| @ -270,7 +271,7 @@ static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) | ||||
| 	return fixed_bits_valid(val, fixed0, fixed1); | ||||
| } | ||||
| 
 | ||||
| static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) | ||||
| static inline bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) | ||||
| { | ||||
| 	u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0; | ||||
| 	u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1; | ||||
| @ -278,7 +279,7 @@ static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) | ||||
| 	return fixed_bits_valid(val, fixed0, fixed1); | ||||
| } | ||||
| 
 | ||||
| static bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val) | ||||
| static inline bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val) | ||||
| { | ||||
| 	u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0; | ||||
| 	u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1; | ||||
|  | ||||
| @ -15,6 +15,7 @@ | ||||
| #include "x86.h" | ||||
| #include "cpuid.h" | ||||
| #include "lapic.h" | ||||
| #include "nested.h" | ||||
| #include "pmu.h" | ||||
| 
 | ||||
| static struct kvm_event_hw_type_mapping intel_arch_events[] = { | ||||
| @ -46,6 +47,7 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) | ||||
| 		if (old_ctrl == new_ctrl) | ||||
| 			continue; | ||||
| 
 | ||||
| 		__set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use); | ||||
| 		reprogram_fixed_counter(pmc, new_ctrl, i); | ||||
| 	} | ||||
| 
 | ||||
| @ -111,7 +113,7 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) | ||||
| } | ||||
| 
 | ||||
| /* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */ | ||||
| static int intel_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx) | ||||
| static int intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) | ||||
| { | ||||
| 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||||
| 	bool fixed = idx & (1u << 30); | ||||
| @ -122,8 +124,8 @@ static int intel_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx) | ||||
| 		(fixed && idx >= pmu->nr_arch_fixed_counters); | ||||
| } | ||||
| 
 | ||||
| static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, | ||||
| 					    unsigned idx, u64 *mask) | ||||
| static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu, | ||||
| 					    unsigned int idx, u64 *mask) | ||||
| { | ||||
| 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||||
| 	bool fixed = idx & (1u << 30); | ||||
| @ -162,6 +164,18 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr) | ||||
| { | ||||
| 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||||
| 	struct kvm_pmc *pmc; | ||||
| 
 | ||||
| 	pmc = get_fixed_pmc(pmu, msr); | ||||
| 	pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0); | ||||
| 	pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0); | ||||
| 
 | ||||
| 	return pmc; | ||||
| } | ||||
| 
 | ||||
| static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) | ||||
| { | ||||
| 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||||
| @ -223,7 +237,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | ||||
| 	case MSR_CORE_PERF_GLOBAL_CTRL: | ||||
| 		if (pmu->global_ctrl == data) | ||||
| 			return 0; | ||||
| 		if (!(data & pmu->global_ctrl_mask)) { | ||||
| 		if (kvm_valid_perf_global_ctrl(pmu, data)) { | ||||
| 			global_ctrl_changed(pmu, data); | ||||
| 			return 0; | ||||
| 		} | ||||
| @ -317,6 +331,13 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) | ||||
| 	    (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) && | ||||
| 	    (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) | ||||
| 		pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED; | ||||
| 
 | ||||
| 	bitmap_set(pmu->all_valid_pmc_idx, | ||||
| 		0, pmu->nr_arch_gp_counters); | ||||
| 	bitmap_set(pmu->all_valid_pmc_idx, | ||||
| 		INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters); | ||||
| 
 | ||||
| 	nested_vmx_pmu_entry_exit_ctls_update(vcpu); | ||||
| } | ||||
| 
 | ||||
| static void intel_pmu_init(struct kvm_vcpu *vcpu) | ||||
| @ -328,12 +349,14 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu) | ||||
| 		pmu->gp_counters[i].type = KVM_PMC_GP; | ||||
| 		pmu->gp_counters[i].vcpu = vcpu; | ||||
| 		pmu->gp_counters[i].idx = i; | ||||
| 		pmu->gp_counters[i].current_config = 0; | ||||
| 	} | ||||
| 
 | ||||
| 	for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) { | ||||
| 		pmu->fixed_counters[i].type = KVM_PMC_FIXED; | ||||
| 		pmu->fixed_counters[i].vcpu = vcpu; | ||||
| 		pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED; | ||||
| 		pmu->fixed_counters[i].current_config = 0; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| @ -366,8 +389,9 @@ struct kvm_pmu_ops intel_pmu_ops = { | ||||
| 	.find_fixed_event = intel_find_fixed_event, | ||||
| 	.pmc_is_enabled = intel_pmc_is_enabled, | ||||
| 	.pmc_idx_to_pmc = intel_pmc_idx_to_pmc, | ||||
| 	.rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc, | ||||
| 	.msr_idx_to_pmc = intel_msr_idx_to_pmc, | ||||
| 	.is_valid_msr_idx = intel_is_valid_msr_idx, | ||||
| 	.is_valid_rdpmc_ecx = intel_is_valid_rdpmc_ecx, | ||||
| 	.is_valid_msr = intel_is_valid_msr, | ||||
| 	.get_msr = intel_pmu_get_msr, | ||||
| 	.set_msr = intel_pmu_set_msr, | ||||
|  | ||||
| @ -106,8 +106,6 @@ module_param(enable_apicv, bool, S_IRUGO); | ||||
| static bool __read_mostly nested = 1; | ||||
| module_param(nested, bool, S_IRUGO); | ||||
| 
 | ||||
| static u64 __read_mostly host_xss; | ||||
| 
 | ||||
| bool __read_mostly enable_pml = 1; | ||||
| module_param_named(pml, enable_pml, bool, S_IRUGO); | ||||
| 
 | ||||
| @ -744,8 +742,8 @@ static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg, | ||||
| 	bool ret; | ||||
| 	u32 mask = 1 << (seg * SEG_FIELD_NR + field); | ||||
| 
 | ||||
| 	if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) { | ||||
| 		vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS); | ||||
| 	if (!kvm_register_is_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS)) { | ||||
| 		kvm_register_mark_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS); | ||||
| 		vmx->segment_cache.bitmask = 0; | ||||
| 	} | ||||
| 	ret = vmx->segment_cache.bitmask & mask; | ||||
| @ -853,7 +851,7 @@ static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, | ||||
| 	vm_exit_controls_clearbit(vmx, exit); | ||||
| } | ||||
| 
 | ||||
| static int find_msr(struct vmx_msrs *m, unsigned int msr) | ||||
| int vmx_find_msr_index(struct vmx_msrs *m, u32 msr) | ||||
| { | ||||
| 	unsigned int i; | ||||
| 
 | ||||
| @ -887,7 +885,7 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) | ||||
| 		} | ||||
| 		break; | ||||
| 	} | ||||
| 	i = find_msr(&m->guest, msr); | ||||
| 	i = vmx_find_msr_index(&m->guest, msr); | ||||
| 	if (i < 0) | ||||
| 		goto skip_guest; | ||||
| 	--m->guest.nr; | ||||
| @ -895,7 +893,7 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) | ||||
| 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr); | ||||
| 
 | ||||
| skip_guest: | ||||
| 	i = find_msr(&m->host, msr); | ||||
| 	i = vmx_find_msr_index(&m->host, msr); | ||||
| 	if (i < 0) | ||||
| 		return; | ||||
| 
 | ||||
| @ -954,12 +952,12 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, | ||||
| 		wrmsrl(MSR_IA32_PEBS_ENABLE, 0); | ||||
| 	} | ||||
| 
 | ||||
| 	i = find_msr(&m->guest, msr); | ||||
| 	i = vmx_find_msr_index(&m->guest, msr); | ||||
| 	if (!entry_only) | ||||
| 		j = find_msr(&m->host, msr); | ||||
| 		j = vmx_find_msr_index(&m->host, msr); | ||||
| 
 | ||||
| 	if ((i < 0 && m->guest.nr == NR_AUTOLOAD_MSRS) || | ||||
| 		(j < 0 &&  m->host.nr == NR_AUTOLOAD_MSRS)) { | ||||
| 	if ((i < 0 && m->guest.nr == NR_LOADSTORE_MSRS) || | ||||
| 		(j < 0 &&  m->host.nr == NR_LOADSTORE_MSRS)) { | ||||
| 		printk_once(KERN_WARNING "Not enough msr switch entries. " | ||||
| 				"Can't add msr %x\n", msr); | ||||
| 		return; | ||||
| @ -1436,35 +1434,44 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); | ||||
| 
 | ||||
| unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	struct vcpu_vmx *vmx = to_vmx(vcpu); | ||||
| 	unsigned long rflags, save_rflags; | ||||
| 
 | ||||
| 	if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) { | ||||
| 		__set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); | ||||
| 	if (!kvm_register_is_available(vcpu, VCPU_EXREG_RFLAGS)) { | ||||
| 		kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS); | ||||
| 		rflags = vmcs_readl(GUEST_RFLAGS); | ||||
| 		if (to_vmx(vcpu)->rmode.vm86_active) { | ||||
| 		if (vmx->rmode.vm86_active) { | ||||
| 			rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; | ||||
| 			save_rflags = to_vmx(vcpu)->rmode.save_rflags; | ||||
| 			save_rflags = vmx->rmode.save_rflags; | ||||
| 			rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; | ||||
| 		} | ||||
| 		to_vmx(vcpu)->rflags = rflags; | ||||
| 		vmx->rflags = rflags; | ||||
| 	} | ||||
| 	return to_vmx(vcpu)->rflags; | ||||
| 	return vmx->rflags; | ||||
| } | ||||
| 
 | ||||
| void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | ||||
| { | ||||
| 	unsigned long old_rflags = vmx_get_rflags(vcpu); | ||||
| 	struct vcpu_vmx *vmx = to_vmx(vcpu); | ||||
| 	unsigned long old_rflags; | ||||
| 
 | ||||
| 	__set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); | ||||
| 	to_vmx(vcpu)->rflags = rflags; | ||||
| 	if (to_vmx(vcpu)->rmode.vm86_active) { | ||||
| 		to_vmx(vcpu)->rmode.save_rflags = rflags; | ||||
| 	if (enable_unrestricted_guest) { | ||||
| 		kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS); | ||||
| 		vmx->rflags = rflags; | ||||
| 		vmcs_writel(GUEST_RFLAGS, rflags); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	old_rflags = vmx_get_rflags(vcpu); | ||||
| 	vmx->rflags = rflags; | ||||
| 	if (vmx->rmode.vm86_active) { | ||||
| 		vmx->rmode.save_rflags = rflags; | ||||
| 		rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; | ||||
| 	} | ||||
| 	vmcs_writel(GUEST_RFLAGS, rflags); | ||||
| 
 | ||||
| 	if ((old_rflags ^ to_vmx(vcpu)->rflags) & X86_EFLAGS_VM) | ||||
| 		to_vmx(vcpu)->emulation_required = emulation_required(vcpu); | ||||
| 	if ((old_rflags ^ vmx->rflags) & X86_EFLAGS_VM) | ||||
| 		vmx->emulation_required = emulation_required(vcpu); | ||||
| } | ||||
| 
 | ||||
| u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu) | ||||
| @ -1852,14 +1859,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | ||||
| 			return 1; | ||||
| 		return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, | ||||
| 				       &msr_info->data); | ||||
| 	case MSR_IA32_XSS: | ||||
| 		if (!vmx_xsaves_supported() || | ||||
| 		    (!msr_info->host_initiated && | ||||
| 		     !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && | ||||
| 		       guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)))) | ||||
| 			return 1; | ||||
| 		msr_info->data = vcpu->arch.ia32_xss; | ||||
| 		break; | ||||
| 	case MSR_IA32_RTIT_CTL: | ||||
| 		if (pt_mode != PT_MODE_HOST_GUEST) | ||||
| 			return 1; | ||||
| @ -2103,25 +2102,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | ||||
| 		if (!nested_vmx_allowed(vcpu)) | ||||
| 			return 1; | ||||
| 		return vmx_set_vmx_msr(vcpu, msr_index, data); | ||||
| 	case MSR_IA32_XSS: | ||||
| 		if (!vmx_xsaves_supported() || | ||||
| 		    (!msr_info->host_initiated && | ||||
| 		     !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && | ||||
| 		       guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)))) | ||||
| 			return 1; | ||||
| 		/*
 | ||||
| 		 * The only supported bit as of Skylake is bit 8, but | ||||
| 		 * it is not supported on KVM. | ||||
| 		 */ | ||||
| 		if (data != 0) | ||||
| 			return 1; | ||||
| 		vcpu->arch.ia32_xss = data; | ||||
| 		if (vcpu->arch.ia32_xss != host_xss) | ||||
| 			add_atomic_switch_msr(vmx, MSR_IA32_XSS, | ||||
| 				vcpu->arch.ia32_xss, host_xss, false); | ||||
| 		else | ||||
| 			clear_atomic_switch_msr(vmx, MSR_IA32_XSS); | ||||
| 		break; | ||||
| 	case MSR_IA32_RTIT_CTL: | ||||
| 		if ((pt_mode != PT_MODE_HOST_GUEST) || | ||||
| 			vmx_rtit_ctl_check(vcpu, data) || | ||||
| @ -2202,7 +2182,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | ||||
| 
 | ||||
| static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) | ||||
| { | ||||
| 	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); | ||||
| 	kvm_register_mark_available(vcpu, reg); | ||||
| 
 | ||||
| 	switch (reg) { | ||||
| 	case VCPU_REGS_RSP: | ||||
| 		vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP); | ||||
| @ -2214,7 +2195,12 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) | ||||
| 		if (enable_ept) | ||||
| 			ept_save_pdptrs(vcpu); | ||||
| 		break; | ||||
| 	case VCPU_EXREG_CR3: | ||||
| 		if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu))) | ||||
| 			vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | ||||
| 		break; | ||||
| 	default: | ||||
| 		WARN_ON_ONCE(1); | ||||
| 		break; | ||||
| 	} | ||||
| } | ||||
| @ -2885,13 +2871,6 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | ||||
| 	vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; | ||||
| } | ||||
| 
 | ||||
| static void vmx_decache_cr3(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu))) | ||||
| 		vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | ||||
| 	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); | ||||
| } | ||||
| 
 | ||||
| static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; | ||||
| @ -2904,8 +2883,7 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	struct kvm_mmu *mmu = vcpu->arch.walk_mmu; | ||||
| 
 | ||||
| 	if (!test_bit(VCPU_EXREG_PDPTR, | ||||
| 		      (unsigned long *)&vcpu->arch.regs_dirty)) | ||||
| 	if (!kvm_register_is_dirty(vcpu, VCPU_EXREG_PDPTR)) | ||||
| 		return; | ||||
| 
 | ||||
| 	if (is_pae_paging(vcpu)) { | ||||
| @ -2927,10 +2905,7 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu) | ||||
| 		mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3); | ||||
| 	} | ||||
| 
 | ||||
| 	__set_bit(VCPU_EXREG_PDPTR, | ||||
| 		  (unsigned long *)&vcpu->arch.regs_avail); | ||||
| 	__set_bit(VCPU_EXREG_PDPTR, | ||||
| 		  (unsigned long *)&vcpu->arch.regs_dirty); | ||||
| 	kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR); | ||||
| } | ||||
| 
 | ||||
| static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, | ||||
| @ -2939,8 +2914,8 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, | ||||
| { | ||||
| 	struct vcpu_vmx *vmx = to_vmx(vcpu); | ||||
| 
 | ||||
| 	if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) | ||||
| 		vmx_decache_cr3(vcpu); | ||||
| 	if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3)) | ||||
| 		vmx_cache_reg(vcpu, VCPU_EXREG_CR3); | ||||
| 	if (!(cr0 & X86_CR0_PG)) { | ||||
| 		/* From paging/starting to nonpaging */ | ||||
| 		exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING | | ||||
| @ -3021,6 +2996,7 @@ u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa) | ||||
| void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | ||||
| { | ||||
| 	struct kvm *kvm = vcpu->kvm; | ||||
| 	bool update_guest_cr3 = true; | ||||
| 	unsigned long guest_cr3; | ||||
| 	u64 eptp; | ||||
| 
 | ||||
| @ -3037,15 +3013,20 @@ void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | ||||
| 			spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); | ||||
| 		} | ||||
| 
 | ||||
| 		if (enable_unrestricted_guest || is_paging(vcpu) || | ||||
| 		    is_guest_mode(vcpu)) | ||||
| 			guest_cr3 = kvm_read_cr3(vcpu); | ||||
| 		else | ||||
| 		/* Loading vmcs02.GUEST_CR3 is handled by nested VM-Enter. */ | ||||
| 		if (is_guest_mode(vcpu)) | ||||
| 			update_guest_cr3 = false; | ||||
| 		else if (!enable_unrestricted_guest && !is_paging(vcpu)) | ||||
| 			guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr; | ||||
| 		else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) | ||||
| 			guest_cr3 = vcpu->arch.cr3; | ||||
| 		else /* vmcs01.GUEST_CR3 is already up-to-date. */ | ||||
| 			update_guest_cr3 = false; | ||||
| 		ept_load_pdptrs(vcpu); | ||||
| 	} | ||||
| 
 | ||||
| 	vmcs_writel(GUEST_CR3, guest_cr3); | ||||
| 	if (update_guest_cr3) | ||||
| 		vmcs_writel(GUEST_CR3, guest_cr3); | ||||
| } | ||||
| 
 | ||||
| int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | ||||
| @ -3779,7 +3760,7 @@ void pt_update_intercept_for_msr(struct vcpu_vmx *vmx) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu) | ||||
| static bool vmx_get_enable_apicv(struct kvm *kvm) | ||||
| { | ||||
| 	return enable_apicv; | ||||
| } | ||||
| @ -4072,6 +4053,8 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) | ||||
| 			guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && | ||||
| 			guest_cpuid_has(vcpu, X86_FEATURE_XSAVES); | ||||
| 
 | ||||
| 		vcpu->arch.xsaves_enabled = xsaves_enabled; | ||||
| 
 | ||||
| 		if (!xsaves_enabled) | ||||
| 			exec_control &= ~SECONDARY_EXEC_XSAVES; | ||||
| 
 | ||||
| @ -4184,14 +4167,13 @@ static void ept_set_mmio_spte_mask(void) | ||||
| #define VMX_XSS_EXIT_BITMAP 0 | ||||
| 
 | ||||
| /*
 | ||||
|  * Sets up the vmcs for emulated real mode. | ||||
|  * Noting that the initialization of Guest-state Area of VMCS is in | ||||
|  * vmx_vcpu_reset(). | ||||
|  */ | ||||
| static void vmx_vcpu_setup(struct vcpu_vmx *vmx) | ||||
| static void init_vmcs(struct vcpu_vmx *vmx) | ||||
| { | ||||
| 	int i; | ||||
| 
 | ||||
| 	if (nested) | ||||
| 		nested_vmx_vcpu_setup(); | ||||
| 		nested_vmx_set_vmcs_shadowing_bitmap(); | ||||
| 
 | ||||
| 	if (cpu_has_vmx_msr_bitmap()) | ||||
| 		vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); | ||||
| @ -4200,7 +4182,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) | ||||
| 
 | ||||
| 	/* Control */ | ||||
| 	pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); | ||||
| 	vmx->hv_deadline_tsc = -1; | ||||
| 
 | ||||
| 	exec_controls_set(vmx, vmx_exec_control(vmx)); | ||||
| 
 | ||||
| @ -4249,34 +4230,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) | ||||
| 	if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) | ||||
| 		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); | ||||
| 
 | ||||
| 	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) { | ||||
| 		u32 index = vmx_msr_index[i]; | ||||
| 		u32 data_low, data_high; | ||||
| 		int j = vmx->nmsrs; | ||||
| 
 | ||||
| 		if (rdmsr_safe(index, &data_low, &data_high) < 0) | ||||
| 			continue; | ||||
| 		if (wrmsr_safe(index, data_low, data_high) < 0) | ||||
| 			continue; | ||||
| 		vmx->guest_msrs[j].index = i; | ||||
| 		vmx->guest_msrs[j].data = 0; | ||||
| 
 | ||||
| 		switch (index) { | ||||
| 		case MSR_IA32_TSX_CTRL: | ||||
| 			/*
 | ||||
| 			 * No need to pass TSX_CTRL_CPUID_CLEAR through, so | ||||
| 			 * let's avoid changing CPUID bits under the host | ||||
| 			 * kernel's feet. | ||||
| 			 */ | ||||
| 			vmx->guest_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; | ||||
| 			break; | ||||
| 		default: | ||||
| 			vmx->guest_msrs[j].mask = -1ull; | ||||
| 			break; | ||||
| 		} | ||||
| 		++vmx->nmsrs; | ||||
| 	} | ||||
| 
 | ||||
| 	vm_exit_controls_set(vmx, vmx_vmexit_ctrl()); | ||||
| 
 | ||||
| 	/* 22.2.1, 20.8.1 */ | ||||
| @ -4287,6 +4240,9 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) | ||||
| 
 | ||||
| 	set_cr4_guest_host_mask(vmx); | ||||
| 
 | ||||
| 	if (vmx->vpid != 0) | ||||
| 		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); | ||||
| 
 | ||||
| 	if (vmx_xsaves_supported()) | ||||
| 		vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP); | ||||
| 
 | ||||
| @ -4389,9 +4345,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | ||||
| 
 | ||||
| 	kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); | ||||
| 
 | ||||
| 	if (vmx->vpid != 0) | ||||
| 		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); | ||||
| 
 | ||||
| 	cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; | ||||
| 	vmx->vcpu.arch.cr0 = cr0; | ||||
| 	vmx_set_cr0(vcpu, cr0); /* enter rmode */ | ||||
| @ -4746,7 +4699,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int handle_external_interrupt(struct kvm_vcpu *vcpu) | ||||
| static __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	++vcpu->stat.irq_exits; | ||||
| 	return 1; | ||||
| @ -5018,21 +4971,6 @@ static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) | ||||
| 	vmcs_writel(GUEST_DR7, val); | ||||
| } | ||||
| 
 | ||||
| static int handle_cpuid(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	return kvm_emulate_cpuid(vcpu); | ||||
| } | ||||
| 
 | ||||
| static int handle_rdmsr(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	return kvm_emulate_rdmsr(vcpu); | ||||
| } | ||||
| 
 | ||||
| static int handle_wrmsr(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	return kvm_emulate_wrmsr(vcpu); | ||||
| } | ||||
| 
 | ||||
| static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	kvm_apic_update_ppr(vcpu); | ||||
| @ -5049,11 +4987,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu) | ||||
| 	return 1; | ||||
| } | ||||
| 
 | ||||
| static int handle_halt(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	return kvm_emulate_halt(vcpu); | ||||
| } | ||||
| 
 | ||||
| static int handle_vmcall(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	return kvm_emulate_hypercall(vcpu); | ||||
| @ -5601,11 +5534,11 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | ||||
| 	[EXIT_REASON_IO_INSTRUCTION]          = handle_io, | ||||
| 	[EXIT_REASON_CR_ACCESS]               = handle_cr, | ||||
| 	[EXIT_REASON_DR_ACCESS]               = handle_dr, | ||||
| 	[EXIT_REASON_CPUID]                   = handle_cpuid, | ||||
| 	[EXIT_REASON_MSR_READ]                = handle_rdmsr, | ||||
| 	[EXIT_REASON_MSR_WRITE]               = handle_wrmsr, | ||||
| 	[EXIT_REASON_CPUID]                   = kvm_emulate_cpuid, | ||||
| 	[EXIT_REASON_MSR_READ]                = kvm_emulate_rdmsr, | ||||
| 	[EXIT_REASON_MSR_WRITE]               = kvm_emulate_wrmsr, | ||||
| 	[EXIT_REASON_PENDING_INTERRUPT]       = handle_interrupt_window, | ||||
| 	[EXIT_REASON_HLT]                     = handle_halt, | ||||
| 	[EXIT_REASON_HLT]                     = kvm_emulate_halt, | ||||
| 	[EXIT_REASON_INVD]		      = handle_invd, | ||||
| 	[EXIT_REASON_INVLPG]		      = handle_invlpg, | ||||
| 	[EXIT_REASON_RDPMC]                   = handle_rdpmc, | ||||
| @ -5978,9 +5911,23 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | ||||
| 	} | ||||
| 
 | ||||
| 	if (exit_reason < kvm_vmx_max_exit_handlers | ||||
| 	    && kvm_vmx_exit_handlers[exit_reason]) | ||||
| 	    && kvm_vmx_exit_handlers[exit_reason]) { | ||||
| #ifdef CONFIG_RETPOLINE | ||||
| 		if (exit_reason == EXIT_REASON_MSR_WRITE) | ||||
| 			return kvm_emulate_wrmsr(vcpu); | ||||
| 		else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER) | ||||
| 			return handle_preemption_timer(vcpu); | ||||
| 		else if (exit_reason == EXIT_REASON_PENDING_INTERRUPT) | ||||
| 			return handle_interrupt_window(vcpu); | ||||
| 		else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) | ||||
| 			return handle_external_interrupt(vcpu); | ||||
| 		else if (exit_reason == EXIT_REASON_HLT) | ||||
| 			return kvm_emulate_halt(vcpu); | ||||
| 		else if (exit_reason == EXIT_REASON_EPT_MISCONFIG) | ||||
| 			return handle_ept_misconfig(vcpu); | ||||
| #endif | ||||
| 		return kvm_vmx_exit_handlers[exit_reason](vcpu); | ||||
| 	else { | ||||
| 	} else { | ||||
| 		vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", | ||||
| 				exit_reason); | ||||
| 		dump_vmcs(); | ||||
| @ -6066,17 +6013,17 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu) | ||||
| static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | ||||
| { | ||||
| 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||||
| 	int tpr_threshold; | ||||
| 
 | ||||
| 	if (is_guest_mode(vcpu) && | ||||
| 		nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) | ||||
| 		return; | ||||
| 
 | ||||
| 	if (irr == -1 || tpr < irr) { | ||||
| 		vmcs_write32(TPR_THRESHOLD, 0); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	vmcs_write32(TPR_THRESHOLD, irr); | ||||
| 	tpr_threshold = (irr == -1 || tpr < irr) ? 0 : irr; | ||||
| 	if (is_guest_mode(vcpu)) | ||||
| 		to_vmx(vcpu)->nested.l1_tpr_threshold = tpr_threshold; | ||||
| 	else | ||||
| 		vmcs_write32(TPR_THRESHOLD, tpr_threshold); | ||||
| } | ||||
| 
 | ||||
| void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) | ||||
| @ -6553,9 +6500,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | ||||
| 	if (vmx->nested.need_vmcs12_to_shadow_sync) | ||||
| 		nested_sync_vmcs12_to_shadow(vcpu); | ||||
| 
 | ||||
| 	if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) | ||||
| 	if (kvm_register_is_dirty(vcpu, VCPU_REGS_RSP)) | ||||
| 		vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); | ||||
| 	if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) | ||||
| 	if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP)) | ||||
| 		vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); | ||||
| 
 | ||||
| 	cr3 = __get_current_cr3_fast(); | ||||
| @ -6578,7 +6525,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | ||||
| 	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | ||||
| 		vmx_set_interrupt_shadow(vcpu, 0); | ||||
| 
 | ||||
| 	kvm_load_guest_xcr0(vcpu); | ||||
| 	kvm_load_guest_xsave_state(vcpu); | ||||
| 
 | ||||
| 	if (static_cpu_has(X86_FEATURE_PKU) && | ||||
| 	    kvm_read_cr4_bits(vcpu, X86_CR4_PKE) && | ||||
| @ -6685,7 +6632,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | ||||
| 			__write_pkru(vmx->host_pkru); | ||||
| 	} | ||||
| 
 | ||||
| 	kvm_put_guest_xcr0(vcpu); | ||||
| 	kvm_load_host_xsave_state(vcpu); | ||||
| 
 | ||||
| 	vmx->nested.nested_run_pending = 0; | ||||
| 	vmx->idt_vectoring_info = 0; | ||||
| @ -6727,7 +6674,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | ||||
| 	free_vpid(vmx->vpid); | ||||
| 	nested_vmx_free_vcpu(vcpu); | ||||
| 	free_loaded_vmcs(vmx->loaded_vmcs); | ||||
| 	kfree(vmx->guest_msrs); | ||||
| 	kvm_vcpu_uninit(vcpu); | ||||
| 	kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu); | ||||
| 	kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu); | ||||
| @ -6739,7 +6685,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | ||||
| 	int err; | ||||
| 	struct vcpu_vmx *vmx; | ||||
| 	unsigned long *msr_bitmap; | ||||
| 	int cpu; | ||||
| 	int i, cpu; | ||||
| 
 | ||||
| 	BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0, | ||||
| 		"struct kvm_vcpu must be at offset 0 for arch usercopy region"); | ||||
| @ -6784,16 +6730,39 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | ||||
| 			goto uninit_vcpu; | ||||
| 	} | ||||
| 
 | ||||
| 	vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT); | ||||
| 	BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0]) | ||||
| 		     > PAGE_SIZE); | ||||
| 	BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) != NR_SHARED_MSRS); | ||||
| 
 | ||||
| 	if (!vmx->guest_msrs) | ||||
| 		goto free_pml; | ||||
| 	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) { | ||||
| 		u32 index = vmx_msr_index[i]; | ||||
| 		u32 data_low, data_high; | ||||
| 		int j = vmx->nmsrs; | ||||
| 
 | ||||
| 		if (rdmsr_safe(index, &data_low, &data_high) < 0) | ||||
| 			continue; | ||||
| 		if (wrmsr_safe(index, data_low, data_high) < 0) | ||||
| 			continue; | ||||
| 
 | ||||
| 		vmx->guest_msrs[j].index = i; | ||||
| 		vmx->guest_msrs[j].data = 0; | ||||
| 		switch (index) { | ||||
| 		case MSR_IA32_TSX_CTRL: | ||||
| 			/*
 | ||||
| 			 * No need to pass TSX_CTRL_CPUID_CLEAR through, so | ||||
| 			 * let's avoid changing CPUID bits under the host | ||||
| 			 * kernel's feet. | ||||
| 			 */ | ||||
| 			vmx->guest_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; | ||||
| 			break; | ||||
| 		default: | ||||
| 			vmx->guest_msrs[j].mask = -1ull; | ||||
| 			break; | ||||
| 		} | ||||
| 		++vmx->nmsrs; | ||||
| 	} | ||||
| 
 | ||||
| 	err = alloc_loaded_vmcs(&vmx->vmcs01); | ||||
| 	if (err < 0) | ||||
| 		goto free_msrs; | ||||
| 		goto free_pml; | ||||
| 
 | ||||
| 	msr_bitmap = vmx->vmcs01.msr_bitmap; | ||||
| 	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_TSC, MSR_TYPE_R); | ||||
| @ -6815,7 +6784,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | ||||
| 	cpu = get_cpu(); | ||||
| 	vmx_vcpu_load(&vmx->vcpu, cpu); | ||||
| 	vmx->vcpu.cpu = cpu; | ||||
| 	vmx_vcpu_setup(vmx); | ||||
| 	init_vmcs(vmx); | ||||
| 	vmx_vcpu_put(&vmx->vcpu); | ||||
| 	put_cpu(); | ||||
| 	if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) { | ||||
| @ -6855,8 +6824,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | ||||
| 
 | ||||
| free_vmcs: | ||||
| 	free_loaded_vmcs(vmx->loaded_vmcs); | ||||
| free_msrs: | ||||
| 	kfree(vmx->guest_msrs); | ||||
| free_pml: | ||||
| 	vmx_destroy_pml_buffer(vmx); | ||||
| uninit_vcpu: | ||||
| @ -7035,6 +7002,7 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) | ||||
| 	cr4_fixed1_update(X86_CR4_SMAP,       ebx, bit(X86_FEATURE_SMAP)); | ||||
| 	cr4_fixed1_update(X86_CR4_PKE,        ecx, bit(X86_FEATURE_PKU)); | ||||
| 	cr4_fixed1_update(X86_CR4_UMIP,       ecx, bit(X86_FEATURE_UMIP)); | ||||
| 	cr4_fixed1_update(X86_CR4_LA57,       ecx, bit(X86_FEATURE_LA57)); | ||||
| 
 | ||||
| #undef cr4_fixed1_update | ||||
| } | ||||
| @ -7129,6 +7097,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	struct vcpu_vmx *vmx = to_vmx(vcpu); | ||||
| 
 | ||||
| 	/* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */ | ||||
| 	vcpu->arch.xsaves_enabled = false; | ||||
| 
 | ||||
| 	if (cpu_has_secondary_exec_ctrls()) { | ||||
| 		vmx_compute_secondary_exec_control(vmx); | ||||
| 		vmcs_set_secondary_exec_control(vmx); | ||||
| @ -7136,10 +7107,12 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) | ||||
| 
 | ||||
| 	if (nested_vmx_allowed(vcpu)) | ||||
| 		to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= | ||||
| 			FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX | | ||||
| 			FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; | ||||
| 	else | ||||
| 		to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= | ||||
| 			~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; | ||||
| 			~(FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX | | ||||
| 			  FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX); | ||||
| 
 | ||||
| 	if (nested_vmx_allowed(vcpu)) { | ||||
| 		nested_vmx_cr_fixed1_bits_update(vcpu); | ||||
| @ -7646,9 +7619,6 @@ static __init int hardware_setup(void) | ||||
| 		WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost"); | ||||
| 	} | ||||
| 
 | ||||
| 	if (boot_cpu_has(X86_FEATURE_XSAVES)) | ||||
| 		rdmsrl(MSR_IA32_XSS, host_xss); | ||||
| 
 | ||||
| 	if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() || | ||||
| 	    !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global())) | ||||
| 		enable_vpid = 0; | ||||
| @ -7829,7 +7799,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { | ||||
| 	.get_cpl = vmx_get_cpl, | ||||
| 	.get_cs_db_l_bits = vmx_get_cs_db_l_bits, | ||||
| 	.decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, | ||||
| 	.decache_cr3 = vmx_decache_cr3, | ||||
| 	.decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, | ||||
| 	.set_cr0 = vmx_set_cr0, | ||||
| 	.set_cr3 = vmx_set_cr3, | ||||
|  | ||||
| @ -22,11 +22,17 @@ extern u32 get_umwait_control_msr(void); | ||||
| 
 | ||||
| #define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) | ||||
| 
 | ||||
| #define NR_AUTOLOAD_MSRS 8 | ||||
| #ifdef CONFIG_X86_64 | ||||
| #define NR_SHARED_MSRS	7 | ||||
| #else | ||||
| #define NR_SHARED_MSRS	4 | ||||
| #endif | ||||
| 
 | ||||
| #define NR_LOADSTORE_MSRS 8 | ||||
| 
 | ||||
| struct vmx_msrs { | ||||
| 	unsigned int		nr; | ||||
| 	struct vmx_msr_entry	val[NR_AUTOLOAD_MSRS]; | ||||
| 	struct vmx_msr_entry	val[NR_LOADSTORE_MSRS]; | ||||
| }; | ||||
| 
 | ||||
| struct shared_msr_entry { | ||||
| @ -167,6 +173,9 @@ struct nested_vmx { | ||||
| 	u64 vmcs01_debugctl; | ||||
| 	u64 vmcs01_guest_bndcfgs; | ||||
| 
 | ||||
| 	/* to migrate it to L1 if L2 writes to L1's CR8 directly */ | ||||
| 	int l1_tpr_threshold; | ||||
| 
 | ||||
| 	u16 vpid02; | ||||
| 	u16 last_vpid; | ||||
| 
 | ||||
| @ -203,7 +212,7 @@ struct vcpu_vmx { | ||||
| 	u32                   idt_vectoring_info; | ||||
| 	ulong                 rflags; | ||||
| 
 | ||||
| 	struct shared_msr_entry *guest_msrs; | ||||
| 	struct shared_msr_entry guest_msrs[NR_SHARED_MSRS]; | ||||
| 	int                   nmsrs; | ||||
| 	int                   save_nmsrs; | ||||
| 	bool                  guest_msrs_ready; | ||||
| @ -230,6 +239,10 @@ struct vcpu_vmx { | ||||
| 		struct vmx_msrs host; | ||||
| 	} msr_autoload; | ||||
| 
 | ||||
| 	struct msr_autostore { | ||||
| 		struct vmx_msrs guest; | ||||
| 	} msr_autostore; | ||||
| 
 | ||||
| 	struct { | ||||
| 		int vm86_active; | ||||
| 		ulong save_rflags; | ||||
| @ -334,6 +347,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); | ||||
| struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr); | ||||
| void pt_update_intercept_for_msr(struct vcpu_vmx *vmx); | ||||
| void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); | ||||
| int vmx_find_msr_index(struct vmx_msrs *m, u32 msr); | ||||
| 
 | ||||
| #define POSTED_INTR_ON  0 | ||||
| #define POSTED_INTR_SN  1 | ||||
|  | ||||
| @ -176,6 +176,8 @@ struct kvm_shared_msrs { | ||||
| static struct kvm_shared_msrs_global __read_mostly shared_msrs_global; | ||||
| static struct kvm_shared_msrs __percpu *shared_msrs; | ||||
| 
 | ||||
| static u64 __read_mostly host_xss; | ||||
| 
 | ||||
| struct kvm_stats_debugfs_item debugfs_entries[] = { | ||||
| 	{ "pf_fixed", VCPU_STAT(pf_fixed) }, | ||||
| 	{ "pf_guest", VCPU_STAT(pf_guest) }, | ||||
| @ -260,23 +262,6 @@ static void kvm_on_user_return(struct user_return_notifier *urn) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void shared_msr_update(unsigned slot, u32 msr) | ||||
| { | ||||
| 	u64 value; | ||||
| 	unsigned int cpu = smp_processor_id(); | ||||
| 	struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); | ||||
| 
 | ||||
| 	/* only read, and nobody should modify it at this time,
 | ||||
| 	 * so don't need lock */ | ||||
| 	if (slot >= shared_msrs_global.nr) { | ||||
| 		printk(KERN_ERR "kvm: invalid MSR slot!"); | ||||
| 		return; | ||||
| 	} | ||||
| 	rdmsrl_safe(msr, &value); | ||||
| 	smsr->values[slot].host = value; | ||||
| 	smsr->values[slot].curr = value; | ||||
| } | ||||
| 
 | ||||
| void kvm_define_shared_msr(unsigned slot, u32 msr) | ||||
| { | ||||
| 	BUG_ON(slot >= KVM_NR_SHARED_MSRS); | ||||
| @ -288,10 +273,16 @@ EXPORT_SYMBOL_GPL(kvm_define_shared_msr); | ||||
| 
 | ||||
| static void kvm_shared_msr_cpu_online(void) | ||||
| { | ||||
| 	unsigned i; | ||||
| 	unsigned int cpu = smp_processor_id(); | ||||
| 	struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); | ||||
| 	u64 value; | ||||
| 	int i; | ||||
| 
 | ||||
| 	for (i = 0; i < shared_msrs_global.nr; ++i) | ||||
| 		shared_msr_update(i, shared_msrs_global.msrs[i]); | ||||
| 	for (i = 0; i < shared_msrs_global.nr; ++i) { | ||||
| 		rdmsrl_safe(shared_msrs_global.msrs[i], &value); | ||||
| 		smsr->values[i].host = value; | ||||
| 		smsr->values[i].curr = value; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) | ||||
| @ -710,10 +701,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3) | ||||
| 	ret = 1; | ||||
| 
 | ||||
| 	memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs)); | ||||
| 	__set_bit(VCPU_EXREG_PDPTR, | ||||
| 		  (unsigned long *)&vcpu->arch.regs_avail); | ||||
| 	__set_bit(VCPU_EXREG_PDPTR, | ||||
| 		  (unsigned long *)&vcpu->arch.regs_dirty); | ||||
| 	kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR); | ||||
| 
 | ||||
| out: | ||||
| 
 | ||||
| 	return ret; | ||||
| @ -723,7 +712,6 @@ EXPORT_SYMBOL_GPL(load_pdptrs); | ||||
| bool pdptrs_changed(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)]; | ||||
| 	bool changed = true; | ||||
| 	int offset; | ||||
| 	gfn_t gfn; | ||||
| 	int r; | ||||
| @ -731,8 +719,7 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu) | ||||
| 	if (!is_pae_paging(vcpu)) | ||||
| 		return false; | ||||
| 
 | ||||
| 	if (!test_bit(VCPU_EXREG_PDPTR, | ||||
| 		      (unsigned long *)&vcpu->arch.regs_avail)) | ||||
| 	if (!kvm_register_is_available(vcpu, VCPU_EXREG_PDPTR)) | ||||
| 		return true; | ||||
| 
 | ||||
| 	gfn = (kvm_read_cr3(vcpu) & 0xffffffe0ul) >> PAGE_SHIFT; | ||||
| @ -740,11 +727,9 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu) | ||||
| 	r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte), | ||||
| 				       PFERR_USER_MASK | PFERR_WRITE_MASK); | ||||
| 	if (r < 0) | ||||
| 		goto out; | ||||
| 	changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0; | ||||
| out: | ||||
| 		return true; | ||||
| 
 | ||||
| 	return changed; | ||||
| 	return memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0; | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(pdptrs_changed); | ||||
| 
 | ||||
| @ -813,27 +798,34 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(kvm_lmsw); | ||||
| 
 | ||||
| void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu) | ||||
| void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) && | ||||
| 			!vcpu->guest_xcr0_loaded) { | ||||
| 		/* kvm_set_xcr() also depends on this */ | ||||
| 	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) { | ||||
| 
 | ||||
| 		if (vcpu->arch.xcr0 != host_xcr0) | ||||
| 			xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0); | ||||
| 		vcpu->guest_xcr0_loaded = 1; | ||||
| 
 | ||||
| 		if (vcpu->arch.xsaves_enabled && | ||||
| 		    vcpu->arch.ia32_xss != host_xss) | ||||
| 			wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss); | ||||
| 	} | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0); | ||||
| EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state); | ||||
| 
 | ||||
| void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) | ||||
| void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	if (vcpu->guest_xcr0_loaded) { | ||||
| 	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) { | ||||
| 
 | ||||
| 		if (vcpu->arch.xcr0 != host_xcr0) | ||||
| 			xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0); | ||||
| 		vcpu->guest_xcr0_loaded = 0; | ||||
| 
 | ||||
| 		if (vcpu->arch.xsaves_enabled && | ||||
| 		    vcpu->arch.ia32_xss != host_xss) | ||||
| 			wrmsrl(MSR_IA32_XSS, host_xss); | ||||
| 	} | ||||
| 
 | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0); | ||||
| EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state); | ||||
| 
 | ||||
| static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) | ||||
| { | ||||
| @ -985,7 +977,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | ||||
| 
 | ||||
| 	kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush); | ||||
| 	vcpu->arch.cr3 = cr3; | ||||
| 	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); | ||||
| 	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3); | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| @ -1326,7 +1318,6 @@ static u64 kvm_get_arch_capabilities(void) | ||||
| 
 | ||||
| 	return data; | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities); | ||||
| 
 | ||||
| static int kvm_get_msr_feature(struct kvm_msr_entry *msr) | ||||
| { | ||||
| @ -1547,20 +1538,25 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_X86_64 | ||||
| struct pvclock_clock { | ||||
| 	int vclock_mode; | ||||
| 	u64 cycle_last; | ||||
| 	u64 mask; | ||||
| 	u32 mult; | ||||
| 	u32 shift; | ||||
| }; | ||||
| 
 | ||||
| struct pvclock_gtod_data { | ||||
| 	seqcount_t	seq; | ||||
| 
 | ||||
| 	struct { /* extract of a clocksource struct */ | ||||
| 		int vclock_mode; | ||||
| 		u64	cycle_last; | ||||
| 		u64	mask; | ||||
| 		u32	mult; | ||||
| 		u32	shift; | ||||
| 	} clock; | ||||
| 	struct pvclock_clock clock; /* extract of a clocksource struct */ | ||||
| 	struct pvclock_clock raw_clock; /* extract of a clocksource struct */ | ||||
| 
 | ||||
| 	u64		boot_ns_raw; | ||||
| 	u64		boot_ns; | ||||
| 	u64		nsec_base; | ||||
| 	u64		wall_time_sec; | ||||
| 	u64		monotonic_raw_nsec; | ||||
| }; | ||||
| 
 | ||||
| static struct pvclock_gtod_data pvclock_gtod_data; | ||||
| @ -1568,9 +1564,10 @@ static struct pvclock_gtod_data pvclock_gtod_data; | ||||
| static void update_pvclock_gtod(struct timekeeper *tk) | ||||
| { | ||||
| 	struct pvclock_gtod_data *vdata = &pvclock_gtod_data; | ||||
| 	u64 boot_ns; | ||||
| 	u64 boot_ns, boot_ns_raw; | ||||
| 
 | ||||
| 	boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot)); | ||||
| 	boot_ns_raw = ktime_to_ns(ktime_add(tk->tkr_raw.base, tk->offs_boot)); | ||||
| 
 | ||||
| 	write_seqcount_begin(&vdata->seq); | ||||
| 
 | ||||
| @ -1581,11 +1578,20 @@ static void update_pvclock_gtod(struct timekeeper *tk) | ||||
| 	vdata->clock.mult		= tk->tkr_mono.mult; | ||||
| 	vdata->clock.shift		= tk->tkr_mono.shift; | ||||
| 
 | ||||
| 	vdata->raw_clock.vclock_mode	= tk->tkr_raw.clock->archdata.vclock_mode; | ||||
| 	vdata->raw_clock.cycle_last	= tk->tkr_raw.cycle_last; | ||||
| 	vdata->raw_clock.mask		= tk->tkr_raw.mask; | ||||
| 	vdata->raw_clock.mult		= tk->tkr_raw.mult; | ||||
| 	vdata->raw_clock.shift		= tk->tkr_raw.shift; | ||||
| 
 | ||||
| 	vdata->boot_ns			= boot_ns; | ||||
| 	vdata->nsec_base		= tk->tkr_mono.xtime_nsec; | ||||
| 
 | ||||
| 	vdata->wall_time_sec            = tk->xtime_sec; | ||||
| 
 | ||||
| 	vdata->boot_ns_raw		= boot_ns_raw; | ||||
| 	vdata->monotonic_raw_nsec	= tk->tkr_raw.xtime_nsec; | ||||
| 
 | ||||
| 	write_seqcount_end(&vdata->seq); | ||||
| } | ||||
| #endif | ||||
| @ -2009,21 +2015,21 @@ static u64 read_tsc(void) | ||||
| 	return last; | ||||
| } | ||||
| 
 | ||||
| static inline u64 vgettsc(u64 *tsc_timestamp, int *mode) | ||||
| static inline u64 vgettsc(struct pvclock_clock *clock, u64 *tsc_timestamp, | ||||
| 			  int *mode) | ||||
| { | ||||
| 	long v; | ||||
| 	struct pvclock_gtod_data *gtod = &pvclock_gtod_data; | ||||
| 	u64 tsc_pg_val; | ||||
| 
 | ||||
| 	switch (gtod->clock.vclock_mode) { | ||||
| 	switch (clock->vclock_mode) { | ||||
| 	case VCLOCK_HVCLOCK: | ||||
| 		tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(), | ||||
| 						  tsc_timestamp); | ||||
| 		if (tsc_pg_val != U64_MAX) { | ||||
| 			/* TSC page valid */ | ||||
| 			*mode = VCLOCK_HVCLOCK; | ||||
| 			v = (tsc_pg_val - gtod->clock.cycle_last) & | ||||
| 				gtod->clock.mask; | ||||
| 			v = (tsc_pg_val - clock->cycle_last) & | ||||
| 				clock->mask; | ||||
| 		} else { | ||||
| 			/* TSC page invalid */ | ||||
| 			*mode = VCLOCK_NONE; | ||||
| @ -2032,8 +2038,8 @@ static inline u64 vgettsc(u64 *tsc_timestamp, int *mode) | ||||
| 	case VCLOCK_TSC: | ||||
| 		*mode = VCLOCK_TSC; | ||||
| 		*tsc_timestamp = read_tsc(); | ||||
| 		v = (*tsc_timestamp - gtod->clock.cycle_last) & | ||||
| 			gtod->clock.mask; | ||||
| 		v = (*tsc_timestamp - clock->cycle_last) & | ||||
| 			clock->mask; | ||||
| 		break; | ||||
| 	default: | ||||
| 		*mode = VCLOCK_NONE; | ||||
| @ -2042,10 +2048,10 @@ static inline u64 vgettsc(u64 *tsc_timestamp, int *mode) | ||||
| 	if (*mode == VCLOCK_NONE) | ||||
| 		*tsc_timestamp = v = 0; | ||||
| 
 | ||||
| 	return v * gtod->clock.mult; | ||||
| 	return v * clock->mult; | ||||
| } | ||||
| 
 | ||||
| static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp) | ||||
| static int do_monotonic_raw(s64 *t, u64 *tsc_timestamp) | ||||
| { | ||||
| 	struct pvclock_gtod_data *gtod = &pvclock_gtod_data; | ||||
| 	unsigned long seq; | ||||
| @ -2054,10 +2060,10 @@ static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp) | ||||
| 
 | ||||
| 	do { | ||||
| 		seq = read_seqcount_begin(>od->seq); | ||||
| 		ns = gtod->nsec_base; | ||||
| 		ns += vgettsc(tsc_timestamp, &mode); | ||||
| 		ns = gtod->monotonic_raw_nsec; | ||||
| 		ns += vgettsc(>od->raw_clock, tsc_timestamp, &mode); | ||||
| 		ns >>= gtod->clock.shift; | ||||
| 		ns += gtod->boot_ns; | ||||
| 		ns += gtod->boot_ns_raw; | ||||
| 	} while (unlikely(read_seqcount_retry(>od->seq, seq))); | ||||
| 	*t = ns; | ||||
| 
 | ||||
| @ -2075,7 +2081,7 @@ static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp) | ||||
| 		seq = read_seqcount_begin(>od->seq); | ||||
| 		ts->tv_sec = gtod->wall_time_sec; | ||||
| 		ns = gtod->nsec_base; | ||||
| 		ns += vgettsc(tsc_timestamp, &mode); | ||||
| 		ns += vgettsc(>od->clock, tsc_timestamp, &mode); | ||||
| 		ns >>= gtod->clock.shift; | ||||
| 	} while (unlikely(read_seqcount_retry(>od->seq, seq))); | ||||
| 
 | ||||
| @ -2092,7 +2098,7 @@ static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp) | ||||
| 	if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode)) | ||||
| 		return false; | ||||
| 
 | ||||
| 	return gtod_is_based_on_tsc(do_monotonic_boot(kernel_ns, | ||||
| 	return gtod_is_based_on_tsc(do_monotonic_raw(kernel_ns, | ||||
| 						      tsc_timestamp)); | ||||
| } | ||||
| 
 | ||||
| @ -2715,6 +2721,20 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | ||||
| 	case MSR_IA32_TSC: | ||||
| 		kvm_write_tsc(vcpu, msr_info); | ||||
| 		break; | ||||
| 	case MSR_IA32_XSS: | ||||
| 		if (!msr_info->host_initiated && | ||||
| 		    !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)) | ||||
| 			return 1; | ||||
| 		/*
 | ||||
| 		 * We do support PT if kvm_x86_ops->pt_supported(), but we do | ||||
| 		 * not support IA32_XSS[bit 8]. Guests will have to use | ||||
| 		 * RDMSR/WRMSR rather than XSAVES/XRSTORS to save/restore PT | ||||
| 		 * MSRs. | ||||
| 		 */ | ||||
| 		if (data != 0) | ||||
| 			return 1; | ||||
| 		vcpu->arch.ia32_xss = data; | ||||
| 		break; | ||||
| 	case MSR_SMI_COUNT: | ||||
| 		if (!msr_info->host_initiated) | ||||
| 			return 1; | ||||
| @ -3042,6 +3062,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | ||||
| 	case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: | ||||
| 		return get_msr_mce(vcpu, msr_info->index, &msr_info->data, | ||||
| 				   msr_info->host_initiated); | ||||
| 	case MSR_IA32_XSS: | ||||
| 		if (!msr_info->host_initiated && | ||||
| 		    !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)) | ||||
| 			return 1; | ||||
| 		msr_info->data = vcpu->arch.ia32_xss; | ||||
| 		break; | ||||
| 	case MSR_K7_CLK_CTL: | ||||
| 		/*
 | ||||
| 		 * Provide expected ramp-up count for K7. All other | ||||
| @ -3819,12 +3845,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | ||||
| 				vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; | ||||
| 			else | ||||
| 				vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK; | ||||
| 			if (lapic_in_kernel(vcpu)) { | ||||
| 				if (events->smi.latched_init) | ||||
| 					set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); | ||||
| 				else | ||||
| 					clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		if (lapic_in_kernel(vcpu)) { | ||||
| 			if (events->smi.latched_init) | ||||
| 				set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); | ||||
| 			else | ||||
| 				clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| @ -4415,6 +4442,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | ||||
| 	case KVM_SET_NESTED_STATE: { | ||||
| 		struct kvm_nested_state __user *user_kvm_nested_state = argp; | ||||
| 		struct kvm_nested_state kvm_state; | ||||
| 		int idx; | ||||
| 
 | ||||
| 		r = -EINVAL; | ||||
| 		if (!kvm_x86_ops->set_nested_state) | ||||
| @ -4438,7 +4466,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | ||||
| 		    && !(kvm_state.flags & KVM_STATE_NESTED_GUEST_MODE)) | ||||
| 			break; | ||||
| 
 | ||||
| 		idx = srcu_read_lock(&vcpu->kvm->srcu); | ||||
| 		r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state); | ||||
| 		srcu_read_unlock(&vcpu->kvm->srcu, idx); | ||||
| 		break; | ||||
| 	} | ||||
| 	case KVM_GET_SUPPORTED_HV_CPUID: { | ||||
| @ -4940,9 +4970,6 @@ set_identity_unlock: | ||||
| 		if (!irqchip_kernel(kvm)) | ||||
| 			goto set_irqchip_out; | ||||
| 		r = kvm_vm_ioctl_set_irqchip(kvm, chip); | ||||
| 		if (r) | ||||
| 			goto set_irqchip_out; | ||||
| 		r = 0; | ||||
| 	set_irqchip_out: | ||||
| 		kfree(chip); | ||||
| 		break; | ||||
| @ -6130,7 +6157,7 @@ static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase) | ||||
| static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt, | ||||
| 			      u32 pmc) | ||||
| { | ||||
| 	return kvm_pmu_is_valid_msr_idx(emul_to_vcpu(ctxt), pmc); | ||||
| 	return kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc); | ||||
| } | ||||
| 
 | ||||
| static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt, | ||||
| @ -7860,6 +7887,19 @@ static void process_smi(struct kvm_vcpu *vcpu) | ||||
| 	kvm_make_request(KVM_REQ_EVENT, vcpu); | ||||
| } | ||||
| 
 | ||||
| void kvm_make_scan_ioapic_request_mask(struct kvm *kvm, | ||||
| 				       unsigned long *vcpu_bitmap) | ||||
| { | ||||
| 	cpumask_var_t cpus; | ||||
| 
 | ||||
| 	zalloc_cpumask_var(&cpus, GFP_ATOMIC); | ||||
| 
 | ||||
| 	kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC, | ||||
| 				    vcpu_bitmap, cpus); | ||||
| 
 | ||||
| 	free_cpumask_var(cpus); | ||||
| } | ||||
| 
 | ||||
| void kvm_make_scan_ioapic_request(struct kvm *kvm) | ||||
| { | ||||
| 	kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); | ||||
| @ -7937,7 +7977,6 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) | ||||
| 	 */ | ||||
| 	put_page(page); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page); | ||||
| 
 | ||||
| void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| @ -8696,8 +8735,12 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | ||||
| 	    mp_state->mp_state != KVM_MP_STATE_RUNNABLE) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	/* INITs are latched while in SMM */ | ||||
| 	if ((is_smm(vcpu) || vcpu->arch.smi_pending) && | ||||
| 	/*
 | ||||
| 	 * KVM_MP_STATE_INIT_RECEIVED means the processor is in | ||||
| 	 * INIT state; latched init should be reported using | ||||
| 	 * KVM_SET_VCPU_EVENTS, so reject it here. | ||||
| 	 */ | ||||
| 	if ((kvm_vcpu_latch_init(vcpu) || vcpu->arch.smi_pending) && | ||||
| 	    (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED || | ||||
| 	     mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED)) | ||||
| 		goto out; | ||||
| @ -8789,7 +8832,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) | ||||
| 	vcpu->arch.cr2 = sregs->cr2; | ||||
| 	mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3; | ||||
| 	vcpu->arch.cr3 = sregs->cr3; | ||||
| 	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); | ||||
| 	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3); | ||||
| 
 | ||||
| 	kvm_set_cr8(vcpu, sregs->cr8); | ||||
| 
 | ||||
| @ -9316,6 +9359,9 @@ int kvm_arch_hardware_setup(void) | ||||
| 		kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits; | ||||
| 	} | ||||
| 
 | ||||
| 	if (boot_cpu_has(X86_FEATURE_XSAVES)) | ||||
| 		rdmsrl(MSR_IA32_XSS, host_xss); | ||||
| 
 | ||||
| 	kvm_init_msr_list(); | ||||
| 	return 0; | ||||
| } | ||||
| @ -9369,7 +9415,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | ||||
| 		goto fail_free_pio_data; | ||||
| 
 | ||||
| 	if (irqchip_in_kernel(vcpu->kvm)) { | ||||
| 		vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu); | ||||
| 		vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu->kvm); | ||||
| 		r = kvm_create_lapic(vcpu, lapic_timer_advance_ns); | ||||
| 		if (r < 0) | ||||
| 			goto fail_mmu_destroy; | ||||
| @ -9438,7 +9484,13 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | ||||
| 
 | ||||
| void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) | ||||
| { | ||||
| 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | ||||
| 
 | ||||
| 	vcpu->arch.l1tf_flush_l1d = true; | ||||
| 	if (pmu->version && unlikely(pmu->event_count)) { | ||||
| 		pmu->need_cleanup = true; | ||||
| 		kvm_make_request(KVM_REQ_PMU, vcpu); | ||||
| 	} | ||||
| 	kvm_x86_ops->sched_in(vcpu, cpu); | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -238,8 +238,7 @@ static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) | ||||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu, | ||||
| 					       enum kvm_reg reg) | ||||
| static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu, int reg) | ||||
| { | ||||
| 	unsigned long val = kvm_register_read(vcpu, reg); | ||||
| 
 | ||||
| @ -247,8 +246,7 @@ static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu, | ||||
| } | ||||
| 
 | ||||
| static inline void kvm_register_writel(struct kvm_vcpu *vcpu, | ||||
| 				       enum kvm_reg reg, | ||||
| 				       unsigned long val) | ||||
| 				       int reg, unsigned long val) | ||||
| { | ||||
| 	if (!is_64_bit_mode(vcpu)) | ||||
| 		val = (u32)val; | ||||
| @ -260,6 +258,11 @@ static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk) | ||||
| 	return !(kvm->arch.disabled_quirks & quirk); | ||||
| } | ||||
| 
 | ||||
| static inline bool kvm_vcpu_latch_init(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	return is_smm(vcpu) || kvm_x86_ops->apic_init_signal_blocked(vcpu); | ||||
| } | ||||
| 
 | ||||
| void kvm_set_pending_timer(struct kvm_vcpu *vcpu); | ||||
| void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); | ||||
| 
 | ||||
| @ -366,7 +369,7 @@ static inline bool kvm_pat_valid(u64 data) | ||||
| 	return (data | ((data & 0x0202020202020202ull) << 1)) == data; | ||||
| } | ||||
| 
 | ||||
| void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu); | ||||
| void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu); | ||||
| void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu); | ||||
| void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu); | ||||
| 
 | ||||
| #endif | ||||
|  | ||||
| @ -21,6 +21,8 @@ | ||||
| #include <linux/ccp.h> | ||||
| #include <linux/firmware.h> | ||||
| 
 | ||||
| #include <asm/smp.h> | ||||
| 
 | ||||
| #include "sp-dev.h" | ||||
| #include "psp-dev.h" | ||||
| 
 | ||||
| @ -235,6 +237,13 @@ static int __sev_platform_init_locked(int *error) | ||||
| 		return rc; | ||||
| 
 | ||||
| 	psp->sev_state = SEV_STATE_INIT; | ||||
| 
 | ||||
| 	/* Prepare for first SEV guest launch after INIT */ | ||||
| 	wbinvd_on_all_cpus(); | ||||
| 	rc = __sev_do_cmd_locked(SEV_CMD_DF_FLUSH, NULL, error); | ||||
| 	if (rc) | ||||
| 		return rc; | ||||
| 
 | ||||
| 	dev_dbg(psp->dev, "SEV firmware initialized\n"); | ||||
| 
 | ||||
| 	return rc; | ||||
|  | ||||
| @ -266,7 +266,8 @@ struct kvm_vcpu { | ||||
| 	struct preempt_notifier preempt_notifier; | ||||
| #endif | ||||
| 	int cpu; | ||||
| 	int vcpu_id; | ||||
| 	int vcpu_id; /* id given by userspace at creation */ | ||||
| 	int vcpu_idx; /* index in kvm->vcpus array */ | ||||
| 	int srcu_idx; | ||||
| 	int mode; | ||||
| 	u64 requests; | ||||
| @ -278,7 +279,6 @@ struct kvm_vcpu { | ||||
| 	struct mutex mutex; | ||||
| 	struct kvm_run *run; | ||||
| 
 | ||||
| 	int guest_xcr0_loaded; | ||||
| 	struct swait_queue_head wq; | ||||
| 	struct pid __rcu *pid; | ||||
| 	int sigset_active; | ||||
| @ -571,13 +571,7 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) | ||||
| 
 | ||||
| static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	struct kvm_vcpu *tmp; | ||||
| 	int idx; | ||||
| 
 | ||||
| 	kvm_for_each_vcpu(idx, tmp, vcpu->kvm) | ||||
| 		if (tmp == vcpu) | ||||
| 			return idx; | ||||
| 	BUG(); | ||||
| 	return vcpu->vcpu_idx; | ||||
| } | ||||
| 
 | ||||
| #define kvm_for_each_memslot(memslot, slots)	\ | ||||
| @ -622,6 +616,7 @@ void kvm_exit(void); | ||||
| 
 | ||||
| void kvm_get_kvm(struct kvm *kvm); | ||||
| void kvm_put_kvm(struct kvm *kvm); | ||||
| void kvm_put_kvm_no_destroy(struct kvm *kvm); | ||||
| 
 | ||||
| static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id) | ||||
| { | ||||
| @ -813,6 +808,8 @@ void kvm_reload_remote_mmus(struct kvm *kvm); | ||||
| bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, | ||||
| 				 unsigned long *vcpu_bitmap, cpumask_var_t tmp); | ||||
| bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); | ||||
| bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req, | ||||
| 				unsigned long *vcpu_bitmap); | ||||
| 
 | ||||
| long kvm_arch_dev_ioctl(struct file *filp, | ||||
| 			unsigned int ioctl, unsigned long arg); | ||||
|  | ||||
| @ -1336,6 +1336,8 @@ extern void perf_event_disable_local(struct perf_event *event); | ||||
| extern void perf_event_disable_inatomic(struct perf_event *event); | ||||
| extern void perf_event_task_tick(void); | ||||
| extern int perf_event_account_interrupt(struct perf_event *event); | ||||
| extern int perf_event_period(struct perf_event *event, u64 value); | ||||
| extern u64 perf_event_pause(struct perf_event *event, bool reset); | ||||
| #else /* !CONFIG_PERF_EVENTS: */ | ||||
| static inline void * | ||||
| perf_aux_output_begin(struct perf_output_handle *handle, | ||||
| @ -1415,6 +1417,14 @@ static inline void perf_event_disable(struct perf_event *event)		{ } | ||||
| static inline int __perf_event_disable(void *info)			{ return -1; } | ||||
| static inline void perf_event_task_tick(void)				{ } | ||||
| static inline int perf_event_release_kernel(struct perf_event *event)	{ return 0; } | ||||
| static inline int perf_event_period(struct perf_event *event, u64 value) | ||||
| { | ||||
| 	return -EINVAL; | ||||
| } | ||||
| static inline u64 perf_event_pause(struct perf_event *event, bool reset) | ||||
| { | ||||
| 	return 0; | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) | ||||
|  | ||||
| @ -1006,8 +1006,9 @@ struct kvm_ppc_resize_hpt { | ||||
| #define KVM_CAP_PMU_EVENT_FILTER 173 | ||||
| #define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174 | ||||
| #define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 175 | ||||
| #define KVM_CAP_ARM_NISV_TO_USER 176 | ||||
| #define KVM_CAP_ARM_INJECT_EXT_DABT 177 | ||||
| #define KVM_CAP_PPC_GUEST_DEBUG_SSTEP 176 | ||||
| #define KVM_CAP_ARM_NISV_TO_USER 177 | ||||
| #define KVM_CAP_ARM_INJECT_EXT_DABT 178 | ||||
| 
 | ||||
| #ifdef KVM_CAP_IRQ_ROUTING | ||||
| 
 | ||||
| @ -1347,6 +1348,7 @@ struct kvm_s390_ucas_mapping { | ||||
| #define KVM_PPC_GET_CPU_CHAR	  _IOR(KVMIO,  0xb1, struct kvm_ppc_cpu_char) | ||||
| /* Available with KVM_CAP_PMU_EVENT_FILTER */ | ||||
| #define KVM_SET_PMU_EVENT_FILTER  _IOW(KVMIO,  0xb2, struct kvm_pmu_event_filter) | ||||
| #define KVM_PPC_SVM_OFF		  _IO(KVMIO,  0xb3) | ||||
| 
 | ||||
| /* ioctl for vm fd */ | ||||
| #define KVM_CREATE_DEVICE	  _IOWR(KVMIO,  0xe0, struct kvm_create_device) | ||||
|  | ||||
| @ -5029,6 +5029,24 @@ static void _perf_event_reset(struct perf_event *event) | ||||
| 	perf_event_update_userpage(event); | ||||
| } | ||||
| 
 | ||||
| /* Assume it's not an event with inherit set. */ | ||||
| u64 perf_event_pause(struct perf_event *event, bool reset) | ||||
| { | ||||
| 	struct perf_event_context *ctx; | ||||
| 	u64 count; | ||||
| 
 | ||||
| 	ctx = perf_event_ctx_lock(event); | ||||
| 	WARN_ON_ONCE(event->attr.inherit); | ||||
| 	_perf_event_disable(event); | ||||
| 	count = local64_read(&event->count); | ||||
| 	if (reset) | ||||
| 		local64_set(&event->count, 0); | ||||
| 	perf_event_ctx_unlock(event, ctx); | ||||
| 
 | ||||
| 	return count; | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(perf_event_pause); | ||||
| 
 | ||||
| /*
 | ||||
|  * Holding the top-level event's child_mutex means that any | ||||
|  * descendant process that has inherited this event will block | ||||
| @ -5106,16 +5124,11 @@ static int perf_event_check_period(struct perf_event *event, u64 value) | ||||
| 	return event->pmu->check_period(event, value); | ||||
| } | ||||
| 
 | ||||
| static int perf_event_period(struct perf_event *event, u64 __user *arg) | ||||
| static int _perf_event_period(struct perf_event *event, u64 value) | ||||
| { | ||||
| 	u64 value; | ||||
| 
 | ||||
| 	if (!is_sampling_event(event)) | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| 	if (copy_from_user(&value, arg, sizeof(value))) | ||||
| 		return -EFAULT; | ||||
| 
 | ||||
| 	if (!value) | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| @ -5133,6 +5146,19 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| int perf_event_period(struct perf_event *event, u64 value) | ||||
| { | ||||
| 	struct perf_event_context *ctx; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	ctx = perf_event_ctx_lock(event); | ||||
| 	ret = _perf_event_period(event, value); | ||||
| 	perf_event_ctx_unlock(event, ctx); | ||||
| 
 | ||||
| 	return ret; | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(perf_event_period); | ||||
| 
 | ||||
| static const struct file_operations perf_fops; | ||||
| 
 | ||||
| static inline int perf_fget_light(int fd, struct fd *p) | ||||
| @ -5176,8 +5202,14 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon | ||||
| 		return _perf_event_refresh(event, arg); | ||||
| 
 | ||||
| 	case PERF_EVENT_IOC_PERIOD: | ||||
| 		return perf_event_period(event, (u64 __user *)arg); | ||||
| 	{ | ||||
| 		u64 value; | ||||
| 
 | ||||
| 		if (copy_from_user(&value, (u64 __user *)arg, sizeof(value))) | ||||
| 			return -EFAULT; | ||||
| 
 | ||||
| 		return _perf_event_period(event, value); | ||||
| 	} | ||||
| 	case PERF_EVENT_IOC_ID: | ||||
| 	{ | ||||
| 		u64 id = primary_event_id(event); | ||||
|  | ||||
							
								
								
									
										1
									
								
								mm/ksm.c
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								mm/ksm.c
									
									
									
									
									
								
							| @ -2478,6 +2478,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start, | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(ksm_madvise); | ||||
| 
 | ||||
| int __ksm_enter(struct mm_struct *mm) | ||||
| { | ||||
|  | ||||
							
								
								
									
										1
									
								
								tools/testing/selftests/kvm/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								tools/testing/selftests/kvm/.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -13,6 +13,7 @@ | ||||
| /x86_64/vmx_dirty_log_test | ||||
| /x86_64/vmx_set_nested_state_test | ||||
| /x86_64/vmx_tsc_adjust_test | ||||
| /x86_64/xss_msr_test | ||||
| /clear_dirty_log_test | ||||
| /dirty_log_test | ||||
| /kvm_create_max_vcpus | ||||
|  | ||||
| @ -25,6 +25,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test | ||||
| TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test | ||||
| TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test | ||||
| TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test | ||||
| TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test | ||||
| TEST_GEN_PROGS_x86_64 += clear_dirty_log_test | ||||
| TEST_GEN_PROGS_x86_64 += dirty_log_test | ||||
| TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus | ||||
|  | ||||
| @ -308,6 +308,8 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid); | ||||
| void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, | ||||
| 		     struct kvm_x86_state *state); | ||||
| 
 | ||||
| struct kvm_msr_list *kvm_get_msr_index_list(void); | ||||
| 
 | ||||
| struct kvm_cpuid2 *kvm_get_supported_cpuid(void); | ||||
| void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid, | ||||
| 		    struct kvm_cpuid2 *cpuid); | ||||
| @ -322,10 +324,13 @@ kvm_get_supported_cpuid_entry(uint32_t function) | ||||
| } | ||||
| 
 | ||||
| uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index); | ||||
| int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, | ||||
| 		  uint64_t msr_value); | ||||
| void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, | ||||
| 	  	  uint64_t msr_value); | ||||
| 
 | ||||
| uint32_t kvm_get_cpuid_max(void); | ||||
| uint32_t kvm_get_cpuid_max_basic(void); | ||||
| uint32_t kvm_get_cpuid_max_extended(void); | ||||
| void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); | ||||
| 
 | ||||
| /*
 | ||||
|  | ||||
| @ -29,12 +29,9 @@ void test_vcpu_creation(int first_vcpu_id, int num_vcpus) | ||||
| 
 | ||||
| 	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); | ||||
| 
 | ||||
| 	for (i = 0; i < num_vcpus; i++) { | ||||
| 		int vcpu_id = first_vcpu_id + i; | ||||
| 
 | ||||
| 	for (i = first_vcpu_id; i < first_vcpu_id + num_vcpus; i++) | ||||
| 		/* This asserts that the vCPU was created. */ | ||||
| 		vm_vcpu_add(vm, vcpu_id); | ||||
| 	} | ||||
| 		vm_vcpu_add(vm, i); | ||||
| 
 | ||||
| 	kvm_vm_free(vm); | ||||
| } | ||||
|  | ||||
| @ -869,6 +869,39 @@ uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index) | ||||
| 	return buffer.entry.data; | ||||
| } | ||||
| 
 | ||||
| /* _VCPU Set MSR
 | ||||
|  * | ||||
|  * Input Args: | ||||
|  *   vm - Virtual Machine | ||||
|  *   vcpuid - VCPU ID | ||||
|  *   msr_index - Index of MSR | ||||
|  *   msr_value - New value of MSR | ||||
|  * | ||||
|  * Output Args: None | ||||
|  * | ||||
|  * Return: The result of KVM_SET_MSRS. | ||||
|  * | ||||
|  * Sets the value of an MSR for the given VCPU. | ||||
|  */ | ||||
| int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, | ||||
| 		  uint64_t msr_value) | ||||
| { | ||||
| 	struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||||
| 	struct { | ||||
| 		struct kvm_msrs header; | ||||
| 		struct kvm_msr_entry entry; | ||||
| 	} buffer = {}; | ||||
| 	int r; | ||||
| 
 | ||||
| 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||||
| 	memset(&buffer, 0, sizeof(buffer)); | ||||
| 	buffer.header.nmsrs = 1; | ||||
| 	buffer.entry.index = msr_index; | ||||
| 	buffer.entry.data = msr_value; | ||||
| 	r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header); | ||||
| 	return r; | ||||
| } | ||||
| 
 | ||||
| /* VCPU Set MSR
 | ||||
|  * | ||||
|  * Input Args: | ||||
| @ -886,19 +919,9 @@ uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index) | ||||
| void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, | ||||
| 	uint64_t msr_value) | ||||
| { | ||||
| 	struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||||
| 	struct { | ||||
| 		struct kvm_msrs header; | ||||
| 		struct kvm_msr_entry entry; | ||||
| 	} buffer = {}; | ||||
| 	int r; | ||||
| 
 | ||||
| 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||||
| 	memset(&buffer, 0, sizeof(buffer)); | ||||
| 	buffer.header.nmsrs = 1; | ||||
| 	buffer.entry.index = msr_index; | ||||
| 	buffer.entry.data = msr_value; | ||||
| 	r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header); | ||||
| 	r = _vcpu_set_msr(vm, vcpuid, msr_index, msr_value); | ||||
| 	TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n" | ||||
| 		"  rc: %i errno: %i", r, errno); | ||||
| } | ||||
| @ -1000,19 +1023,45 @@ struct kvm_x86_state { | ||||
| 	struct kvm_msrs msrs; | ||||
| }; | ||||
| 
 | ||||
| static int kvm_get_num_msrs(struct kvm_vm *vm) | ||||
| static int kvm_get_num_msrs_fd(int kvm_fd) | ||||
| { | ||||
| 	struct kvm_msr_list nmsrs; | ||||
| 	int r; | ||||
| 
 | ||||
| 	nmsrs.nmsrs = 0; | ||||
| 	r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs); | ||||
| 	r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs); | ||||
| 	TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i", | ||||
| 		r); | ||||
| 
 | ||||
| 	return nmsrs.nmsrs; | ||||
| } | ||||
| 
 | ||||
| static int kvm_get_num_msrs(struct kvm_vm *vm) | ||||
| { | ||||
| 	return kvm_get_num_msrs_fd(vm->kvm_fd); | ||||
| } | ||||
| 
 | ||||
| struct kvm_msr_list *kvm_get_msr_index_list(void) | ||||
| { | ||||
| 	struct kvm_msr_list *list; | ||||
| 	int nmsrs, r, kvm_fd; | ||||
| 
 | ||||
| 	kvm_fd = open(KVM_DEV_PATH, O_RDONLY); | ||||
| 	if (kvm_fd < 0) | ||||
| 		exit(KSFT_SKIP); | ||||
| 
 | ||||
| 	nmsrs = kvm_get_num_msrs_fd(kvm_fd); | ||||
| 	list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); | ||||
| 	list->nmsrs = nmsrs; | ||||
| 	r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list); | ||||
| 	close(kvm_fd); | ||||
| 
 | ||||
| 	TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i", | ||||
| 		r); | ||||
| 
 | ||||
| 	return list; | ||||
| } | ||||
| 
 | ||||
| struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) | ||||
| { | ||||
| 	struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||||
| @ -1158,7 +1207,12 @@ bool is_intel_cpu(void) | ||||
| 	return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]); | ||||
| } | ||||
| 
 | ||||
| uint32_t kvm_get_cpuid_max(void) | ||||
| uint32_t kvm_get_cpuid_max_basic(void) | ||||
| { | ||||
| 	return kvm_get_supported_cpuid_entry(0)->eax; | ||||
| } | ||||
| 
 | ||||
| uint32_t kvm_get_cpuid_max_extended(void) | ||||
| { | ||||
| 	return kvm_get_supported_cpuid_entry(0x80000000)->eax; | ||||
| } | ||||
| @ -1169,7 +1223,7 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) | ||||
| 	bool pae; | ||||
| 
 | ||||
| 	/* SDM 4.1.4 */ | ||||
| 	if (kvm_get_cpuid_max() < 0x80000008) { | ||||
| 	if (kvm_get_cpuid_max_extended() < 0x80000008) { | ||||
| 		pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6); | ||||
| 		*pa_bits = pae ? 36 : 32; | ||||
| 		*va_bits = 32; | ||||
|  | ||||
| @ -25,12 +25,15 @@ | ||||
| 
 | ||||
| static void guest_code(void) | ||||
| { | ||||
| 	register u64 stage asm("11") = 0; | ||||
| 
 | ||||
| 	for (;;) { | ||||
| 		GUEST_SYNC(0); | ||||
| 		asm volatile ("ahi %0,1" : : "r"(stage)); | ||||
| 	} | ||||
| 	/*
 | ||||
| 	 * We embed diag 501 here instead of doing a ucall to avoid that | ||||
| 	 * the compiler has messed with r11 at the time of the ucall. | ||||
| 	 */ | ||||
| 	asm volatile ( | ||||
| 		"0:	diag 0,0,0x501\n" | ||||
| 		"	ahi 11,1\n" | ||||
| 		"	j 0b\n" | ||||
| 	); | ||||
| } | ||||
| 
 | ||||
| #define REG_COMPARE(reg) \ | ||||
|  | ||||
							
								
								
									
										76
									
								
								tools/testing/selftests/kvm/x86_64/xss_msr_test.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								tools/testing/selftests/kvm/x86_64/xss_msr_test.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,76 @@ | ||||
| // SPDX-License-Identifier: GPL-2.0
 | ||||
| /*
 | ||||
|  * Copyright (C) 2019, Google LLC. | ||||
|  * | ||||
|  * Tests for the IA32_XSS MSR. | ||||
|  */ | ||||
| 
 | ||||
| #define _GNU_SOURCE /* for program_invocation_short_name */ | ||||
| #include <sys/ioctl.h> | ||||
| 
 | ||||
| #include "test_util.h" | ||||
| #include "kvm_util.h" | ||||
| #include "vmx.h" | ||||
| 
 | ||||
| #define VCPU_ID	      1 | ||||
| #define MSR_BITS      64 | ||||
| 
 | ||||
| #define X86_FEATURE_XSAVES	(1<<3) | ||||
| 
 | ||||
| bool is_supported_msr(u32 msr_index) | ||||
| { | ||||
| 	struct kvm_msr_list *list; | ||||
| 	bool found = false; | ||||
| 	int i; | ||||
| 
 | ||||
| 	list = kvm_get_msr_index_list(); | ||||
| 	for (i = 0; i < list->nmsrs; ++i) { | ||||
| 		if (list->indices[i] == msr_index) { | ||||
| 			found = true; | ||||
| 			break; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	free(list); | ||||
| 	return found; | ||||
| } | ||||
| 
 | ||||
| int main(int argc, char *argv[]) | ||||
| { | ||||
| 	struct kvm_cpuid_entry2 *entry; | ||||
| 	bool xss_supported = false; | ||||
| 	struct kvm_vm *vm; | ||||
| 	uint64_t xss_val; | ||||
| 	int i, r; | ||||
| 
 | ||||
| 	/* Create VM */ | ||||
| 	vm = vm_create_default(VCPU_ID, 0, 0); | ||||
| 
 | ||||
| 	if (kvm_get_cpuid_max_basic() >= 0xd) { | ||||
| 		entry = kvm_get_supported_cpuid_index(0xd, 1); | ||||
| 		xss_supported = entry && !!(entry->eax & X86_FEATURE_XSAVES); | ||||
| 	} | ||||
| 	if (!xss_supported) { | ||||
| 		printf("IA32_XSS is not supported by the vCPU.\n"); | ||||
| 		exit(KSFT_SKIP); | ||||
| 	} | ||||
| 
 | ||||
| 	xss_val = vcpu_get_msr(vm, VCPU_ID, MSR_IA32_XSS); | ||||
| 	TEST_ASSERT(xss_val == 0, | ||||
| 		    "MSR_IA32_XSS should be initialized to zero\n"); | ||||
| 
 | ||||
| 	vcpu_set_msr(vm, VCPU_ID, MSR_IA32_XSS, xss_val); | ||||
| 	/*
 | ||||
| 	 * At present, KVM only supports a guest IA32_XSS value of 0. Verify | ||||
| 	 * that trying to set the guest IA32_XSS to an unsupported value fails. | ||||
| 	 * Also, in the future when a non-zero value succeeds check that | ||||
| 	 * IA32_XSS is in the KVM_GET_MSR_INDEX_LIST. | ||||
| 	 */ | ||||
| 	for (i = 0; i < MSR_BITS; ++i) { | ||||
| 		r = _vcpu_set_msr(vm, VCPU_ID, MSR_IA32_XSS, 1ull << i); | ||||
| 		TEST_ASSERT(r == 0 || is_supported_msr(MSR_IA32_XSS), | ||||
| 			    "IA32_XSS was able to be set, but was not found in KVM_GET_MSR_INDEX_LIST.\n"); | ||||
| 	} | ||||
| 
 | ||||
| 	kvm_vm_free(vm); | ||||
| } | ||||
| @ -110,14 +110,11 @@ static const struct kvm_io_device_ops coalesced_mmio_ops = { | ||||
| int kvm_coalesced_mmio_init(struct kvm *kvm) | ||||
| { | ||||
| 	struct page *page; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	ret = -ENOMEM; | ||||
| 	page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||||
| 	if (!page) | ||||
| 		goto out_err; | ||||
| 		return -ENOMEM; | ||||
| 
 | ||||
| 	ret = 0; | ||||
| 	kvm->coalesced_mmio_ring = page_address(page); | ||||
| 
 | ||||
| 	/*
 | ||||
| @ -128,8 +125,7 @@ int kvm_coalesced_mmio_init(struct kvm *kvm) | ||||
| 	spin_lock_init(&kvm->ring_lock); | ||||
| 	INIT_LIST_HEAD(&kvm->coalesced_zones); | ||||
| 
 | ||||
| out_err: | ||||
| 	return ret; | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| void kvm_coalesced_mmio_free(struct kvm *kvm) | ||||
|  | ||||
| @ -838,6 +838,18 @@ void kvm_put_kvm(struct kvm *kvm) | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(kvm_put_kvm); | ||||
| 
 | ||||
| /*
 | ||||
|  * Used to put a reference that was taken on behalf of an object associated | ||||
|  * with a user-visible file descriptor, e.g. a vcpu or device, if installation | ||||
|  * of the new file descriptor fails and the reference cannot be transferred to | ||||
|  * its final owner.  In such cases, the caller is still actively using @kvm and | ||||
|  * will fail miserably if the refcount unexpectedly hits zero. | ||||
|  */ | ||||
| void kvm_put_kvm_no_destroy(struct kvm *kvm) | ||||
| { | ||||
| 	WARN_ON(refcount_dec_and_test(&kvm->users_count)); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(kvm_put_kvm_no_destroy); | ||||
| 
 | ||||
| static int kvm_vm_release(struct inode *inode, struct file *filp) | ||||
| { | ||||
| @ -2739,17 +2751,18 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) | ||||
| 		goto unlock_vcpu_destroy; | ||||
| 	} | ||||
| 
 | ||||
| 	BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]); | ||||
| 	vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus); | ||||
| 	BUG_ON(kvm->vcpus[vcpu->vcpu_idx]); | ||||
| 
 | ||||
| 	/* Now it's all set up, let userspace reach it */ | ||||
| 	kvm_get_kvm(kvm); | ||||
| 	r = create_vcpu_fd(vcpu); | ||||
| 	if (r < 0) { | ||||
| 		kvm_put_kvm(kvm); | ||||
| 		kvm_put_kvm_no_destroy(kvm); | ||||
| 		goto unlock_vcpu_destroy; | ||||
| 	} | ||||
| 
 | ||||
| 	kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu; | ||||
| 	kvm->vcpus[vcpu->vcpu_idx] = vcpu; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Pairs with smp_rmb() in kvm_get_vcpu.  Write kvm->vcpus | ||||
| @ -3183,7 +3196,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm, | ||||
| 	kvm_get_kvm(kvm); | ||||
| 	ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC); | ||||
| 	if (ret < 0) { | ||||
| 		kvm_put_kvm(kvm); | ||||
| 		kvm_put_kvm_no_destroy(kvm); | ||||
| 		mutex_lock(&kvm->lock); | ||||
| 		list_del(&dev->vm_node); | ||||
| 		mutex_unlock(&kvm->lock); | ||||
| @ -4341,12 +4354,12 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, | ||||
| 
 | ||||
| 	r = kvm_arch_hardware_setup(); | ||||
| 	if (r < 0) | ||||
| 		goto out_free_0a; | ||||
| 		goto out_free_1; | ||||
| 
 | ||||
| 	for_each_online_cpu(cpu) { | ||||
| 		smp_call_function_single(cpu, check_processor_compat, &r, 1); | ||||
| 		if (r < 0) | ||||
| 			goto out_free_1; | ||||
| 			goto out_free_2; | ||||
| 	} | ||||
| 
 | ||||
| 	r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "kvm/cpu:starting", | ||||
| @ -4403,9 +4416,8 @@ out_free_3: | ||||
| 	unregister_reboot_notifier(&kvm_reboot_notifier); | ||||
| 	cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING); | ||||
| out_free_2: | ||||
| out_free_1: | ||||
| 	kvm_arch_hardware_unsetup(); | ||||
| out_free_0a: | ||||
| out_free_1: | ||||
| 	free_cpumask_var(cpus_hardware_enabled); | ||||
| out_free_0: | ||||
| 	kvm_irqfd_exit(); | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user