Bugfixes for x86, PPC and s390.
-----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.22 (GNU/Linux) iQEcBAABAgAGBQJZoEmaAAoJEL/70l94x66DmnMH/17uzxBe3UksLBKWC5grWhRq GVlHVI+XH7jPub1hfqKkj09nnJ0OJAiO87vX9A/CCobtxLDk0UB02U2qv+jbFbmN mSkAovY8Rn4YR73SqU+XTYajnnwmYsEiPuHVUDbMaKY3yBLW/BYtSqCuAHSm3NrS UQO8DvQAY7+W7/gA9QY7aaK/sc8N6oAwE4DHsxTYKR70Eax4SjjMLWYQY7oSutTx U8XpguF5CwP8iYbsF++WkNYxe85piheWIpUIKg+3pYxKgpDNBST8ROmxmuvSdAh6 1hkXy2qxpw+YYM6JkHRb7kBpuUAGqzYNrEF/c2Wfor+gufsyoq8LQSq5pB+d/5I= =M40T -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull Paolo Bonzini: "Bugfixes for x86, PPC and s390" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: PPC: Book3S: Fix race and leak in kvm_vm_ioctl_create_spapr_tce() KVM, pkeys: do not use PKRU value in vcpu->arch.guest_fpu.state KVM: x86: simplify handling of PKRU KVM: x86: block guest protection keys unless the host has them enabled KVM: PPC: Book3S HV: Add missing barriers to XIVE code and document them KVM: PPC: Book3S HV: Workaround POWER9 DD1.0 bug causing IPB bit loss KVM: PPC: Book3S HV: Use msgsync with hypervisor doorbells on POWER9 KVM: s390: sthyi: fix specification exception detection KVM: s390: sthyi: fix sthyi inline assembly
This commit is contained in:
commit
67a3b5cb33
@ -294,32 +294,26 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
|
||||
struct kvm_create_spapr_tce_64 *args)
|
||||
{
|
||||
struct kvmppc_spapr_tce_table *stt = NULL;
|
||||
struct kvmppc_spapr_tce_table *siter;
|
||||
unsigned long npages, size;
|
||||
int ret = -ENOMEM;
|
||||
int i;
|
||||
int fd = -1;
|
||||
|
||||
if (!args->size)
|
||||
return -EINVAL;
|
||||
|
||||
/* Check this LIOBN hasn't been previously allocated */
|
||||
list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
|
||||
if (stt->liobn == args->liobn)
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
size = _ALIGN_UP(args->size, PAGE_SIZE >> 3);
|
||||
npages = kvmppc_tce_pages(size);
|
||||
ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
|
||||
if (ret) {
|
||||
stt = NULL;
|
||||
goto fail;
|
||||
}
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = -ENOMEM;
|
||||
stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
|
||||
GFP_KERNEL);
|
||||
if (!stt)
|
||||
goto fail;
|
||||
goto fail_acct;
|
||||
|
||||
stt->liobn = args->liobn;
|
||||
stt->page_shift = args->page_shift;
|
||||
@ -334,24 +328,42 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
|
||||
goto fail;
|
||||
}
|
||||
|
||||
kvm_get_kvm(kvm);
|
||||
ret = fd = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
|
||||
stt, O_RDWR | O_CLOEXEC);
|
||||
if (ret < 0)
|
||||
goto fail;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
|
||||
|
||||
/* Check this LIOBN hasn't been previously allocated */
|
||||
ret = 0;
|
||||
list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) {
|
||||
if (siter->liobn == args->liobn) {
|
||||
ret = -EBUSY;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ret) {
|
||||
list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
|
||||
kvm_get_kvm(kvm);
|
||||
}
|
||||
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
|
||||
stt, O_RDWR | O_CLOEXEC);
|
||||
if (!ret)
|
||||
return fd;
|
||||
|
||||
fail:
|
||||
if (stt) {
|
||||
for (i = 0; i < npages; i++)
|
||||
if (stt->pages[i])
|
||||
__free_page(stt->pages[i]);
|
||||
put_unused_fd(fd);
|
||||
|
||||
kfree(stt);
|
||||
}
|
||||
fail:
|
||||
for (i = 0; i < npages; i++)
|
||||
if (stt->pages[i])
|
||||
__free_page(stt->pages[i]);
|
||||
|
||||
kfree(stt);
|
||||
fail_acct:
|
||||
kvmppc_account_memlimit(kvmppc_stt_pages(npages), false);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1291,6 +1291,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
|
||||
/* Hypervisor doorbell - exit only if host IPI flag set */
|
||||
cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
|
||||
bne 3f
|
||||
BEGIN_FTR_SECTION
|
||||
PPC_MSGSYNC
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
lbz r0, HSTATE_HOST_IPI(r13)
|
||||
cmpwi r0, 0
|
||||
beq 4f
|
||||
|
@ -16,7 +16,22 @@ static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
|
||||
u8 cppr;
|
||||
u16 ack;
|
||||
|
||||
/* XXX DD1 bug workaround: Check PIPR vs. CPPR first ! */
|
||||
/*
|
||||
* Ensure any previous store to CPPR is ordered vs.
|
||||
* the subsequent loads from PIPR or ACK.
|
||||
*/
|
||||
eieio();
|
||||
|
||||
/*
|
||||
* DD1 bug workaround: If PIPR is less favored than CPPR
|
||||
* ignore the interrupt or we might incorrectly lose an IPB
|
||||
* bit.
|
||||
*/
|
||||
if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
|
||||
u8 pipr = __x_readb(__x_tima + TM_QW1_OS + TM_PIPR);
|
||||
if (pipr >= xc->hw_cppr)
|
||||
return;
|
||||
}
|
||||
|
||||
/* Perform the acknowledge OS to register cycle. */
|
||||
ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG));
|
||||
@ -235,6 +250,11 @@ skip_ipi:
|
||||
/*
|
||||
* If we found an interrupt, adjust what the guest CPPR should
|
||||
* be as if we had just fetched that interrupt from HW.
|
||||
*
|
||||
* Note: This can only make xc->cppr smaller as the previous
|
||||
* loop will only exit with hirq != 0 if prio is lower than
|
||||
* the current xc->cppr. Thus we don't need to re-check xc->mfrr
|
||||
* for pending IPIs.
|
||||
*/
|
||||
if (hirq)
|
||||
xc->cppr = prio;
|
||||
@ -380,6 +400,12 @@ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
|
||||
old_cppr = xc->cppr;
|
||||
xc->cppr = cppr;
|
||||
|
||||
/*
|
||||
* Order the above update of xc->cppr with the subsequent
|
||||
* read of xc->mfrr inside push_pending_to_hw()
|
||||
*/
|
||||
smp_mb();
|
||||
|
||||
/*
|
||||
* We are masking less, we need to look for pending things
|
||||
* to deliver and set VP pending bits accordingly to trigger
|
||||
@ -420,21 +446,37 @@ X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr)
|
||||
* used to signal MFRR changes is EOId when fetched from
|
||||
* the queue.
|
||||
*/
|
||||
if (irq == XICS_IPI || irq == 0)
|
||||
if (irq == XICS_IPI || irq == 0) {
|
||||
/*
|
||||
* This barrier orders the setting of xc->cppr vs.
|
||||
* subsquent test of xc->mfrr done inside
|
||||
* scan_interrupts and push_pending_to_hw
|
||||
*/
|
||||
smp_mb();
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* Find interrupt source */
|
||||
sb = kvmppc_xive_find_source(xive, irq, &src);
|
||||
if (!sb) {
|
||||
pr_devel(" source not found !\n");
|
||||
rc = H_PARAMETER;
|
||||
/* Same as above */
|
||||
smp_mb();
|
||||
goto bail;
|
||||
}
|
||||
state = &sb->irq_state[src];
|
||||
kvmppc_xive_select_irq(state, &hw_num, &xd);
|
||||
|
||||
state->in_eoi = true;
|
||||
mb();
|
||||
|
||||
/*
|
||||
* This barrier orders both setting of in_eoi above vs,
|
||||
* subsequent test of guest_priority, and the setting
|
||||
* of xc->cppr vs. subsquent test of xc->mfrr done inside
|
||||
* scan_interrupts and push_pending_to_hw
|
||||
*/
|
||||
smp_mb();
|
||||
|
||||
again:
|
||||
if (state->guest_priority == MASKED) {
|
||||
@ -461,6 +503,14 @@ again:
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* This barrier orders the above guest_priority check
|
||||
* and spin_lock/unlock with clearing in_eoi below.
|
||||
*
|
||||
* It also has to be a full mb() as it must ensure
|
||||
* the MMIOs done in source_eoi() are completed before
|
||||
* state->in_eoi is visible.
|
||||
*/
|
||||
mb();
|
||||
state->in_eoi = false;
|
||||
bail:
|
||||
@ -495,6 +545,18 @@ X_STATIC int GLUE(X_PFX,h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
|
||||
/* Locklessly write over MFRR */
|
||||
xc->mfrr = mfrr;
|
||||
|
||||
/*
|
||||
* The load of xc->cppr below and the subsequent MMIO store
|
||||
* to the IPI must happen after the above mfrr update is
|
||||
* globally visible so that:
|
||||
*
|
||||
* - Synchronize with another CPU doing an H_EOI or a H_CPPR
|
||||
* updating xc->cppr then reading xc->mfrr.
|
||||
*
|
||||
* - The target of the IPI sees the xc->mfrr update
|
||||
*/
|
||||
mb();
|
||||
|
||||
/* Shoot the IPI if most favored than target cppr */
|
||||
if (mfrr < xc->cppr)
|
||||
__x_writeq(0, __x_trig_page(&xc->vp_ipi_data));
|
||||
|
@ -394,7 +394,7 @@ static int sthyi(u64 vaddr)
|
||||
"srl %[cc],28\n"
|
||||
: [cc] "=d" (cc)
|
||||
: [code] "d" (code), [addr] "a" (addr)
|
||||
: "memory", "cc");
|
||||
: "3", "memory", "cc");
|
||||
return cc;
|
||||
}
|
||||
|
||||
@ -425,7 +425,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
|
||||
VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr);
|
||||
trace_kvm_s390_handle_sthyi(vcpu, code, addr);
|
||||
|
||||
if (reg1 == reg2 || reg1 & 1 || reg2 & 1 || addr & ~PAGE_MASK)
|
||||
if (reg1 == reg2 || reg1 & 1 || reg2 & 1)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
|
||||
if (code & 0xffff) {
|
||||
@ -433,6 +433,9 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (addr & ~PAGE_MASK)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
|
||||
/*
|
||||
* If the page has not yet been faulted in, we want to do that
|
||||
* now and not after all the expensive calculations.
|
||||
|
@ -450,10 +450,10 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate)
|
||||
static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask)
|
||||
{
|
||||
if (use_xsave()) {
|
||||
copy_kernel_to_xregs(&fpstate->xsave, -1);
|
||||
copy_kernel_to_xregs(&fpstate->xsave, mask);
|
||||
} else {
|
||||
if (use_fxsr())
|
||||
copy_kernel_to_fxregs(&fpstate->fxsave);
|
||||
@ -477,7 +477,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
|
||||
: : [addr] "m" (fpstate));
|
||||
}
|
||||
|
||||
__copy_kernel_to_fpregs(fpstate);
|
||||
__copy_kernel_to_fpregs(fpstate, -1);
|
||||
}
|
||||
|
||||
extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
|
||||
|
@ -492,6 +492,7 @@ struct kvm_vcpu_arch {
|
||||
unsigned long cr4;
|
||||
unsigned long cr4_guest_owned_bits;
|
||||
unsigned long cr8;
|
||||
u32 pkru;
|
||||
u32 hflags;
|
||||
u64 efer;
|
||||
u64 apic_base;
|
||||
|
@ -469,7 +469,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
||||
entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
|
||||
cpuid_mask(&entry->ecx, CPUID_7_ECX);
|
||||
/* PKU is not yet implemented for shadow paging. */
|
||||
if (!tdp_enabled)
|
||||
if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
|
||||
entry->ecx &= ~F(PKU);
|
||||
entry->edx &= kvm_cpuid_7_0_edx_x86_features;
|
||||
entry->edx &= get_scattered_cpuid_leaf(7, 0, CPUID_EDX);
|
||||
|
@ -84,11 +84,6 @@ static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
|
||||
| ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32);
|
||||
}
|
||||
|
||||
static inline u32 kvm_read_pkru(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_x86_ops->get_pkru(vcpu);
|
||||
}
|
||||
|
||||
static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.hflags |= HF_GUEST_MASK;
|
||||
|
@ -185,7 +185,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
||||
* index of the protection domain, so pte_pkey * 2 is
|
||||
* is the index of the first bit for the domain.
|
||||
*/
|
||||
pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3;
|
||||
pkru_bits = (vcpu->arch.pkru >> (pte_pkey * 2)) & 3;
|
||||
|
||||
/* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */
|
||||
offset = (pfec & ~1) +
|
||||
|
@ -1777,11 +1777,6 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
|
||||
to_svm(vcpu)->vmcb->save.rflags = rflags;
|
||||
}
|
||||
|
||||
static u32 svm_get_pkru(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
|
||||
{
|
||||
switch (reg) {
|
||||
@ -5413,8 +5408,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
|
||||
.get_rflags = svm_get_rflags,
|
||||
.set_rflags = svm_set_rflags,
|
||||
|
||||
.get_pkru = svm_get_pkru,
|
||||
|
||||
.tlb_flush = svm_flush_tlb,
|
||||
|
||||
.run = svm_vcpu_run,
|
||||
|
@ -636,8 +636,6 @@ struct vcpu_vmx {
|
||||
|
||||
u64 current_tsc_ratio;
|
||||
|
||||
bool guest_pkru_valid;
|
||||
u32 guest_pkru;
|
||||
u32 host_pkru;
|
||||
|
||||
/*
|
||||
@ -2383,11 +2381,6 @@ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
|
||||
to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
|
||||
}
|
||||
|
||||
static u32 vmx_get_pkru(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return to_vmx(vcpu)->guest_pkru;
|
||||
}
|
||||
|
||||
static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
|
||||
@ -9020,8 +9013,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
|
||||
vmx_set_interrupt_shadow(vcpu, 0);
|
||||
|
||||
if (vmx->guest_pkru_valid)
|
||||
__write_pkru(vmx->guest_pkru);
|
||||
if (static_cpu_has(X86_FEATURE_PKU) &&
|
||||
kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
|
||||
vcpu->arch.pkru != vmx->host_pkru)
|
||||
__write_pkru(vcpu->arch.pkru);
|
||||
|
||||
atomic_switch_perf_msrs(vmx);
|
||||
debugctlmsr = get_debugctlmsr();
|
||||
@ -9169,13 +9164,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
* back on host, so it is safe to read guest PKRU from current
|
||||
* XSAVE.
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_OSPKE)) {
|
||||
vmx->guest_pkru = __read_pkru();
|
||||
if (vmx->guest_pkru != vmx->host_pkru) {
|
||||
vmx->guest_pkru_valid = true;
|
||||
if (static_cpu_has(X86_FEATURE_PKU) &&
|
||||
kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) {
|
||||
vcpu->arch.pkru = __read_pkru();
|
||||
if (vcpu->arch.pkru != vmx->host_pkru)
|
||||
__write_pkru(vmx->host_pkru);
|
||||
} else
|
||||
vmx->guest_pkru_valid = false;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -11682,8 +11675,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
|
||||
.get_rflags = vmx_get_rflags,
|
||||
.set_rflags = vmx_set_rflags,
|
||||
|
||||
.get_pkru = vmx_get_pkru,
|
||||
|
||||
.tlb_flush = vmx_flush_tlb,
|
||||
|
||||
.run = vmx_vcpu_run,
|
||||
|
@ -3245,7 +3245,12 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
|
||||
u32 size, offset, ecx, edx;
|
||||
cpuid_count(XSTATE_CPUID, index,
|
||||
&size, &offset, &ecx, &edx);
|
||||
memcpy(dest + offset, src, size);
|
||||
if (feature == XFEATURE_MASK_PKRU)
|
||||
memcpy(dest + offset, &vcpu->arch.pkru,
|
||||
sizeof(vcpu->arch.pkru));
|
||||
else
|
||||
memcpy(dest + offset, src, size);
|
||||
|
||||
}
|
||||
|
||||
valid -= feature;
|
||||
@ -3283,7 +3288,11 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
|
||||
u32 size, offset, ecx, edx;
|
||||
cpuid_count(XSTATE_CPUID, index,
|
||||
&size, &offset, &ecx, &edx);
|
||||
memcpy(dest, src + offset, size);
|
||||
if (feature == XFEATURE_MASK_PKRU)
|
||||
memcpy(&vcpu->arch.pkru, src + offset,
|
||||
sizeof(vcpu->arch.pkru));
|
||||
else
|
||||
memcpy(dest, src + offset, size);
|
||||
}
|
||||
|
||||
valid -= feature;
|
||||
@ -7633,7 +7642,9 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
vcpu->guest_fpu_loaded = 1;
|
||||
__kernel_fpu_begin();
|
||||
__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state);
|
||||
/* PKRU is separately restored in kvm_x86_ops->run. */
|
||||
__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
|
||||
~XFEATURE_MASK_PKRU);
|
||||
trace_kvm_fpu(1);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user