From 7d4bd1d2819ef1035ba1ed648358df37b51ade6f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 1 Apr 2016 12:12:22 +0100 Subject: [PATCH 1/4] arm64: KVM: Add braces to multi-line if statement in virtual PMU code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kernel is written in C, not python, so we need braces around multi-line if statements. GCC 6 actually warns about this, thanks to the fantastic new "-Wmisleading-indentation" flag: | virt/kvm/arm/pmu.c: In function ‘kvm_pmu_overflow_status’: | virt/kvm/arm/pmu.c:198:3: warning: statement is indented as if it were guarded by... [-Wmisleading-indentation] | reg &= vcpu_sys_reg(vcpu, PMCNTENSET_EL0); | ^~~ | arch/arm64/kvm/../../../virt/kvm/arm/pmu.c:196:2: note: ...this ‘if’ clause, but it is not | if ((vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) | ^~ As it turns out, this particular case is harmless (we just do some &= operations with 0), but worth fixing nonetheless. Signed-off-by: Will Deacon Signed-off-by: Christoffer Dall --- virt/kvm/arm/pmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index b5754c6c5508..575c7aa30d7e 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -193,11 +193,12 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) { u64 reg = 0; - if ((vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) + if ((vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { reg = vcpu_sys_reg(vcpu, PMOVSSET_EL0); reg &= vcpu_sys_reg(vcpu, PMCNTENSET_EL0); reg &= vcpu_sys_reg(vcpu, PMINTENSET_EL1); reg &= kvm_pmu_valid_counter_mask(vcpu); + } return reg; } From 1c5631c73fc2261a5df64a72c155cb53dcdc0c45 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 6 Apr 2016 09:37:22 +0100 Subject: [PATCH 2/4] KVM: arm/arm64: Handle forward time correction gracefully On a host that runs NTP, corrections can have a direct impact on the background timer that we program on the behalf of a vcpu. In particular, NTP performing a forward correction will result in a timer expiring sooner than expected from a guest point of view. Not a big deal, we kick the vcpu anyway. But on wake-up, the vcpu thread is going to perform a check to find out whether or not it should block. And at that point, the timer check is going to say "timer has not expired yet, go back to sleep". This results in the timer event being lost forever. There are multiple ways to handle this. One would be record that the timer has expired and let kvm_cpu_has_pending_timer return true in that case, but that would be fairly invasive. Another is to check for the "short sleep" condition in the hrtimer callback, and restart the timer for the remaining time when the condition is detected. This patch implements the latter, with a bit of refactoring in order to avoid too much code duplication. Cc: Reported-by: Alexander Graf Reviewed-by: Alexander Graf Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 49 +++++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index a9ad4fe3f68f..9aaa35dd9144 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -91,6 +91,8 @@ static void kvm_timer_inject_irq_work(struct work_struct *work) vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired); vcpu->arch.timer_cpu.armed = false; + WARN_ON(!kvm_timer_should_fire(vcpu)); + /* * If the vcpu is blocked we want to wake it up so that it will see * the timer has expired when entering the guest. @@ -98,10 +100,46 @@ static void kvm_timer_inject_irq_work(struct work_struct *work) kvm_vcpu_kick(vcpu); } +static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu) +{ + cycle_t cval, now; + + cval = vcpu->arch.timer_cpu.cntv_cval; + now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + + if (now < cval) { + u64 ns; + + ns = cyclecounter_cyc2ns(timecounter->cc, + cval - now, + timecounter->mask, + &timecounter->frac); + return ns; + } + + return 0; +} + static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) { struct arch_timer_cpu *timer; + struct kvm_vcpu *vcpu; + u64 ns; + timer = container_of(hrt, struct arch_timer_cpu, timer); + vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); + + /* + * Check that the timer has really expired from the guest's + * PoV (NTP on the host may have forced it to expire + * early). If we should have slept longer, restart it. + */ + ns = kvm_timer_compute_delta(vcpu); + if (unlikely(ns)) { + hrtimer_forward_now(hrt, ns_to_ktime(ns)); + return HRTIMER_RESTART; + } + queue_work(wqueue, &timer->expired); return HRTIMER_NORESTART; } @@ -176,8 +214,6 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu) void kvm_timer_schedule(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - u64 ns; - cycle_t cval, now; BUG_ON(timer_is_armed(timer)); @@ -197,14 +233,7 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu) return; /* The timer has not yet expired, schedule a background timer */ - cval = timer->cntv_cval; - now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; - - ns = cyclecounter_cyc2ns(timecounter->cc, - cval - now, - timecounter->mask, - &timecounter->frac); - timer_arm(timer, ns); + timer_arm(timer, kvm_timer_compute_delta(vcpu)); } void kvm_timer_unschedule(struct kvm_vcpu *vcpu) From 6141570c36f0c937d5deff20d9cf08cbd8d8ed48 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 5 Apr 2016 16:11:47 +0100 Subject: [PATCH 3/4] arm64: KVM: Warn when PARange is less than 40 bits We always thought that 40bits of PA range would be the minimum people would actually build. Anything less is terrifyingly small. Turns out that we were both right and wrong. Nobody has ever built such a system, but the ARM Foundation Model has a PARange set to 36bits. Just because we can. Oh well. Now, the KVM API explicitely says that we offer a 40bit PA space to the VM, so we shouldn't run KVM on the Foundation Model at all. That being said, this patch offers a less agressive alternative, and loudly warns about the configuration being unsupported. You'll still be able to run VMs (at your own risks, though). This is just a workaround until we have a proper userspace API where we report the PARange to userspace. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_arm.h | 6 ++--- arch/arm64/include/asm/kvm_asm.h | 2 +- arch/arm64/include/asm/kvm_host.h | 7 +++--- arch/arm64/kvm/hyp/s2-setup.c | 39 +++++++++++++++++++++++++++++-- 4 files changed, 44 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 4150fd8bae01..3f29887995bc 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -151,8 +151,7 @@ */ #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \ VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ - VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B | \ - VTCR_EL2_RES1) + VTCR_EL2_SL0_LVL1 | VTCR_EL2_RES1) #define VTTBR_X (38 - VTCR_EL2_T0SZ_40B) #else /* @@ -163,8 +162,7 @@ */ #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \ VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ - VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B | \ - VTCR_EL2_RES1) + VTCR_EL2_SL0_LVL1 | VTCR_EL2_RES1) #define VTTBR_X (37 - VTCR_EL2_T0SZ_40B) #endif diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index eb7490d232a0..40a0a24e6c98 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -54,7 +54,7 @@ extern void __vgic_v3_init_lrs(void); extern u32 __kvm_get_mdcr_el2(void); -extern void __init_stage2_translation(void); +extern u32 __init_stage2_translation(void); #endif diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 227ed475dbd3..4cd4196e94a9 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -370,11 +370,12 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); -/* #define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) */ - static inline void __cpu_init_stage2(void) { - kvm_call_hyp(__init_stage2_translation); + u32 parange = kvm_call_hyp(__init_stage2_translation); + + WARN_ONCE(parange < 40, + "PARange is %d bits, unsupported configuration!", parange); } #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c index 5a9f3bf542b0..bcbe761a5a3d 100644 --- a/arch/arm64/kvm/hyp/s2-setup.c +++ b/arch/arm64/kvm/hyp/s2-setup.c @@ -20,9 +20,10 @@ #include #include -void __hyp_text __init_stage2_translation(void) +u32 __hyp_text __init_stage2_translation(void) { u64 val = VTCR_EL2_FLAGS; + u64 parange; u64 tmp; /* @@ -30,7 +31,39 @@ void __hyp_text __init_stage2_translation(void) * bits in VTCR_EL2. Amusingly, the PARange is 4 bits, while * PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2... */ - val |= (read_sysreg(id_aa64mmfr0_el1) & 7) << 16; + parange = read_sysreg(id_aa64mmfr0_el1) & 7; + val |= parange << 16; + + /* Compute the actual PARange... */ + switch (parange) { + case 0: + parange = 32; + break; + case 1: + parange = 36; + break; + case 2: + parange = 40; + break; + case 3: + parange = 42; + break; + case 4: + parange = 44; + break; + case 5: + default: + parange = 48; + break; + } + + /* + * ... and clamp it to 40 bits, unless we have some braindead + * HW that implements less than that. In all cases, we'll + * return that value for the rest of the kernel to decide what + * to do. + */ + val |= 64 - (parange > 40 ? 40 : parange); /* * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS @@ -42,4 +75,6 @@ void __hyp_text __init_stage2_translation(void) VTCR_EL2_VS_8BIT; write_sysreg(val, vtcr_el2); + + return parange; } From 06a71a24bae57a07afee9cda6b00495347d8a448 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 4 Apr 2016 14:46:51 +0100 Subject: [PATCH 4/4] arm64: KVM: unregister notifiers in hyp mode teardown path Commit 1e947bad0b63 ("arm64: KVM: Skip HYP setup when already running in HYP") re-organized the hyp init code and ended up leaving the CPU hotplug and PM notifier even if hyp mode initialization fails. Since KVM is not yet supported with ACPI, the above mentioned commit breaks CPU hotplug in ACPI boot. This patch fixes teardown_hyp_mode to properly unregister both CPU hotplug and PM notifiers in the teardown path. Fixes: 1e947bad0b63 ("arm64: KVM: Skip HYP setup when already running in HYP") Cc: Christoffer Dall Cc: Marc Zyngier Signed-off-by: Sudeep Holla Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index b5384311dec4..dded1b763c16 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -1112,10 +1112,17 @@ static void __init hyp_cpu_pm_init(void) { cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); } +static void __init hyp_cpu_pm_exit(void) +{ + cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb); +} #else static inline void hyp_cpu_pm_init(void) { } +static inline void hyp_cpu_pm_exit(void) +{ +} #endif static void teardown_common_resources(void) @@ -1141,9 +1148,7 @@ static int init_subsystems(void) /* * Register CPU Hotplug notifier */ - cpu_notifier_register_begin(); - err = __register_cpu_notifier(&hyp_init_cpu_nb); - cpu_notifier_register_done(); + err = register_cpu_notifier(&hyp_init_cpu_nb); if (err) { kvm_err("Cannot register KVM init CPU notifier (%d)\n", err); return err; @@ -1193,6 +1198,8 @@ static void teardown_hyp_mode(void) free_hyp_pgds(); for_each_possible_cpu(cpu) free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); + unregister_cpu_notifier(&hyp_init_cpu_nb); + hyp_cpu_pm_exit(); } static int init_vhe_mode(void)