Merge branch 'kvm-bugfixes' into HEAD

Merge bugfixes from 5.17 before merging more tricky work.
This commit is contained in:
Paolo Bonzini 2022-03-04 18:39:29 -05:00
commit 0564eeb71b
17 changed files with 131 additions and 53 deletions

View File

@ -1394,7 +1394,7 @@ documentation when it pops into existence).
-------------------
:Capability: KVM_CAP_ENABLE_CAP
:Architectures: mips, ppc, s390
:Architectures: mips, ppc, s390, x86
:Type: vcpu ioctl
:Parameters: struct kvm_enable_cap (in)
:Returns: 0 on success; -1 on error

View File

@ -46,8 +46,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
* specification (ARM DEN 0022A). This means all suspend states
* for KVM will preserve the register state.
*/
kvm_vcpu_halt(vcpu);
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
kvm_vcpu_wfi(vcpu);
return PSCI_RET_SUCCESS;
}

View File

@ -704,7 +704,6 @@ struct kvm_vcpu_arch {
struct fpu_guest guest_fpu;
u64 xcr0;
u64 guest_supported_xcr0;
struct kvm_pio_request pio;
void *pio_data;

View File

@ -1558,7 +1558,10 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
fpregs_restore_userregs();
newfps->xfeatures = curfps->xfeatures | xfeatures;
newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
if (!guest_fpu)
newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
newfps->xfd = curfps->xfd & ~xfeatures;
/* Do the final updates within the locked region */

View File

@ -462,19 +462,24 @@ static bool pv_tlb_flush_supported(void)
{
return (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
!kvm_para_has_hint(KVM_HINTS_REALTIME) &&
kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
kvm_para_has_feature(KVM_FEATURE_STEAL_TIME) &&
!boot_cpu_has(X86_FEATURE_MWAIT) &&
(num_possible_cpus() != 1));
}
static bool pv_ipi_supported(void)
{
return kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI);
return (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI) &&
(num_possible_cpus() != 1));
}
static bool pv_sched_yield_supported(void)
{
return (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) &&
!kvm_para_has_hint(KVM_HINTS_REALTIME) &&
kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
kvm_para_has_feature(KVM_FEATURE_STEAL_TIME) &&
!boot_cpu_has(X86_FEATURE_MWAIT) &&
(num_possible_cpus() != 1));
}
#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
@ -619,7 +624,7 @@ static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
/* Make sure other vCPUs get a chance to run if they need to. */
for_each_cpu(cpu, mask) {
if (vcpu_is_preempted(cpu)) {
if (!idle_cpu(cpu) && vcpu_is_preempted(cpu)) {
kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu));
break;
}

View File

@ -239,6 +239,9 @@ static void __init kvmclock_init_mem(void)
static int __init kvm_setup_vsyscall_timeinfo(void)
{
if (!kvm_para_available() || !kvmclock)
return 0;
kvmclock_init_mem();
#ifdef CONFIG_X86_64

View File

@ -282,6 +282,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
struct kvm_cpuid_entry2 *best;
u64 guest_supported_xcr0;
best = kvm_find_cpuid_entry(vcpu, 1, 0);
if (best && apic) {
@ -293,9 +294,11 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
kvm_apic_set_version(vcpu);
}
vcpu->arch.guest_supported_xcr0 =
guest_supported_xcr0 =
cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
vcpu->arch.guest_fpu.fpstate->user_xfeatures = guest_supported_xcr0;
kvm_update_pv_runtime(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);

View File

@ -3539,7 +3539,7 @@ set_root_pgd:
out_unlock:
write_unlock(&vcpu->kvm->mmu_lock);
return 0;
return r;
}
static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
@ -3865,12 +3865,23 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
walk_shadow_page_lockless_end(vcpu);
}
static u32 alloc_apf_token(struct kvm_vcpu *vcpu)
{
/* make sure the token value is not 0 */
u32 id = vcpu->arch.apf.id;
if (id << 12 == 0)
vcpu->arch.apf.id = 1;
return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
}
static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
gfn_t gfn)
{
struct kvm_arch_async_pf arch;
arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
arch.token = alloc_apf_token(vcpu);
arch.gfn = gfn;
arch.direct_map = vcpu->arch.mmu->direct_map;
arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu);

View File

@ -2705,8 +2705,23 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
u64 data = msr->data;
switch (ecx) {
case MSR_AMD64_TSC_RATIO:
if (!msr->host_initiated && !svm->tsc_scaling_enabled)
return 1;
if (!svm->tsc_scaling_enabled) {
if (!msr->host_initiated)
return 1;
/*
* In case TSC scaling is not enabled, always
* leave this MSR at the default value.
*
* Due to bug in qemu 6.2.0, it would try to set
* this msr to 0 if tsc scaling is not enabled.
* Ignore this value as well.
*/
if (data != 0 && data != svm->tsc_ratio_msr)
return 1;
break;
}
if (data & TSC_RATIO_RSVD)
return 1;

View File

@ -246,8 +246,7 @@ static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
src = &prev->host_state;
dest = &vmx->loaded_vmcs->host_state;
vmx_set_vmcs_host_state(dest, src->cr3, src->fs_sel, src->gs_sel,
src->fs_base, src->gs_base);
vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base);
dest->ldt_sel = src->ldt_sel;
#ifdef CONFIG_X86_64
dest->ds_sel = src->ds_sel;
@ -3056,7 +3055,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long cr4;
unsigned long cr3, cr4;
bool vm_fail;
if (!nested_early_check)
@ -3079,6 +3078,12 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
*/
vmcs_writel(GUEST_RFLAGS, 0);
cr3 = __get_current_cr3_fast();
if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
vmcs_writel(HOST_CR3, cr3);
vmx->loaded_vmcs->host_state.cr3 = cr3;
}
cr4 = cr4_read_shadow();
if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
vmcs_writel(HOST_CR4, cr4);

View File

@ -1075,14 +1075,9 @@ static void pt_guest_exit(struct vcpu_vmx *vmx)
wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
}
void vmx_set_vmcs_host_state(struct vmcs_host_state *host, unsigned long cr3,
u16 fs_sel, u16 gs_sel,
unsigned long fs_base, unsigned long gs_base)
void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
unsigned long fs_base, unsigned long gs_base)
{
if (unlikely(cr3 != host->cr3)) {
vmcs_writel(HOST_CR3, cr3);
host->cr3 = cr3;
}
if (unlikely(fs_sel != host->fs_sel)) {
if (!(fs_sel & 7))
vmcs_write16(HOST_FS_SELECTOR, fs_sel);
@ -1177,9 +1172,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
gs_base = segment_base(gs_sel);
#endif
vmx_set_vmcs_host_state(host_state, __get_current_cr3_fast(),
fs_sel, gs_sel, fs_base, gs_base);
vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base);
vmx->guest_state_loaded = true;
}
@ -6795,7 +6788,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long cr4;
unsigned long cr3, cr4;
/* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!enable_vnmi &&
@ -6838,6 +6831,19 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
vcpu->arch.regs_dirty = 0;
/*
* Refresh vmcs.HOST_CR3 if necessary. This must be done immediately
* prior to VM-Enter, as the kernel may load a new ASID (PCID) any time
* it switches back to the current->mm, which can occur in KVM context
* when switching to a temporary mm to patch kernel code, e.g. if KVM
* toggles a static key while handling a VM-Exit.
*/
cr3 = __get_current_cr3_fast();
if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
vmcs_writel(HOST_CR3, cr3);
vmx->loaded_vmcs->host_state.cr3 = cr3;
}
cr4 = cr4_read_shadow();
if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
vmcs_writel(HOST_CR4, cr4);

View File

@ -374,9 +374,8 @@ int allocate_vpid(void);
void free_vpid(int vpid);
void vmx_set_constant_host_state(struct vcpu_vmx *vmx);
void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
void vmx_set_vmcs_host_state(struct vmcs_host_state *host, unsigned long cr3,
u16 fs_sel, u16 gs_sel,
unsigned long fs_base, unsigned long gs_base);
void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
unsigned long fs_base, unsigned long gs_base);
int vmx_get_cpl(struct kvm_vcpu *vcpu);
bool vmx_emulation_required(struct kvm_vcpu *vcpu);
unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);

View File

@ -995,6 +995,18 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state);
static inline u64 kvm_guest_supported_xcr0(struct kvm_vcpu *vcpu)
{
return vcpu->arch.guest_fpu.fpstate->user_xfeatures;
}
#ifdef CONFIG_X86_64
static inline u64 kvm_guest_supported_xfd(struct kvm_vcpu *vcpu)
{
return kvm_guest_supported_xcr0(vcpu) & XFEATURE_MASK_USER_DYNAMIC;
}
#endif
static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
{
u64 xcr0 = xcr;
@ -1014,7 +1026,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
* saving. However, xcr0 bit 0 is always set, even if the
* emulated CPU does not support XSAVE (see kvm_vcpu_reset()).
*/
valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP;
valid_bits = kvm_guest_supported_xcr0(vcpu) | XFEATURE_MASK_FP;
if (xcr0 & ~valid_bits)
return 1;
@ -2369,10 +2381,12 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
return tsc;
}
#ifdef CONFIG_X86_64
static inline int gtod_is_based_on_tsc(int mode)
{
return mode == VDSO_CLOCKMODE_TSC || mode == VDSO_CLOCKMODE_HVCLOCK;
}
#endif
static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
{
@ -3724,8 +3738,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
!guest_cpuid_has(vcpu, X86_FEATURE_XFD))
return 1;
if (data & ~(XFEATURE_MASK_USER_DYNAMIC &
vcpu->arch.guest_supported_xcr0))
if (data & ~kvm_guest_supported_xfd(vcpu))
return 1;
fpu_update_guest_xfd(&vcpu->arch.guest_fpu, data);
@ -3735,8 +3748,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
!guest_cpuid_has(vcpu, X86_FEATURE_XFD))
return 1;
if (data & ~(XFEATURE_MASK_USER_DYNAMIC &
vcpu->arch.guest_supported_xcr0))
if (data & ~kvm_guest_supported_xfd(vcpu))
return 1;
vcpu->arch.guest_fpu.xfd_err = data;
@ -4252,6 +4264,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_VCPU_ATTRIBUTES:
case KVM_CAP_SYS_ATTRIBUTES:
case KVM_CAP_VAPIC:
case KVM_CAP_ENABLE_CAP:
r = 1;
break;
case KVM_CAP_EXIT_HYPERCALL:
@ -8981,6 +8994,13 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK)
return -KVM_EOPNOTSUPP;
/*
* When tsc is in permanent catchup mode guests won't be able to use
* pvclock_read_retry loop to get consistent view of pvclock
*/
if (vcpu->arch.tsc_always_catchup)
return -KVM_EOPNOTSUPP;
if (!kvm_get_walltime_and_clockread(&ts, &cycle))
return -KVM_EOPNOTSUPP;
@ -9199,6 +9219,7 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
likely(!pic_in_kernel(vcpu->kvm));
}
/* Called within kvm->srcu read side. */
static void post_kvm_run_save(struct kvm_vcpu *vcpu)
{
struct kvm_run *kvm_run = vcpu->run;
@ -9207,16 +9228,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
kvm_run->cr8 = kvm_get_cr8(vcpu);
kvm_run->apic_base = kvm_get_apic_base(vcpu);
/*
* The call to kvm_ready_for_interrupt_injection() may end up in
* kvm_xen_has_interrupt() which may require the srcu lock to be
* held, to protect against changes in the vcpu_info address.
*/
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
kvm_run->ready_for_interrupt_injection =
pic_in_kernel(vcpu->kvm) ||
kvm_vcpu_ready_for_interrupt_injection(vcpu);
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (is_smm(vcpu))
kvm_run->flags |= KVM_RUN_X86_SMM;
@ -9832,6 +9846,7 @@ void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
/*
* Called within kvm->srcu read side.
* Returns 1 to let vcpu_run() continue the guest execution loop without
* exiting to the userspace. Otherwise, the value will be returned to the
* userspace.
@ -10210,6 +10225,7 @@ out:
return r;
}
/* Called within kvm->srcu read side. */
static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
{
bool hv_timer;
@ -10269,12 +10285,12 @@ static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
!vcpu->arch.apf.halted);
}
/* Called within kvm->srcu read side. */
static int vcpu_run(struct kvm_vcpu *vcpu)
{
int r;
struct kvm *kvm = vcpu->kvm;
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
vcpu->arch.l1tf_flush_l1d = true;
for (;;) {
@ -10302,14 +10318,12 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
if (__xfer_to_guest_mode_work_pending()) {
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
r = xfer_to_guest_mode_handle_work(vcpu);
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
if (r)
return r;
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
}
}
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
return r;
}
@ -10412,6 +10426,7 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
{
struct kvm_run *kvm_run = vcpu->run;
struct kvm *kvm = vcpu->kvm;
int r;
vcpu_load(vcpu);
@ -10419,6 +10434,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
kvm_run->flags = 0;
kvm_load_guest_fpu(vcpu);
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
if (kvm_run->immediate_exit) {
r = -EINTR;
@ -10429,7 +10445,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* use before KVM has ever run the vCPU.
*/
WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu));
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
kvm_vcpu_block(vcpu);
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
if (kvm_apic_accept_events(vcpu) < 0) {
r = 0;
goto out;
@ -10489,8 +10509,9 @@ out:
if (kvm_run->kvm_valid_regs)
store_regs(vcpu);
post_kvm_run_save(vcpu);
kvm_sigset_deactivate(vcpu);
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
kvm_sigset_deactivate(vcpu);
vcpu_put(vcpu);
return r;
}

View File

@ -366,6 +366,7 @@ static struct kvm_vm *test_vm_create(void)
{
struct kvm_vm *vm;
unsigned int i;
int ret;
int nr_vcpus = test_args.nr_vcpus;
vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL);
@ -382,7 +383,11 @@ static struct kvm_vm *test_vm_create(void)
ucall_init(vm, NULL);
test_init_timer_irq(vm);
vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
ret = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
if (ret < 0) {
print_skip("Failed to create vgic-v3");
exit(KSFT_SKIP);
}
/* Make all the test's cmdline args visible to the guest */
sync_global_to_guest(vm, test_args);

View File

@ -761,6 +761,10 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
gic_fd = vgic_v3_setup(vm, 1, nr_irqs,
GICD_BASE_GPA, GICR_BASE_GPA);
if (gic_fd < 0) {
print_skip("Failed to create vgic-v3, skipping");
exit(KSFT_SKIP);
}
vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT,
guest_irq_handlers[args.eoi_split][args.level_sensitive]);

View File

@ -52,7 +52,9 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs,
nr_vcpus, nr_vcpus_created);
/* Distributor setup */
gic_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3, false);
if (_kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3,
false, &gic_fd) != 0)
return -1;
kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS,
0, &nr_irqs, true);

View File

@ -5522,9 +5522,7 @@ static int kvm_suspend(void)
static void kvm_resume(void)
{
if (kvm_usage_count) {
#ifdef CONFIG_LOCKDEP
WARN_ON(lockdep_is_held(&kvm_count_lock));
#endif
lockdep_assert_not_held(&kvm_count_lock);
hardware_enable_nolock(NULL);
}
}