linux/arch/loongarch/kvm/timer.c
Huacai Chen 73adbd92f3 LoongArch: KVM: Mark hrtimer to expire in hard interrupt context
Like commit 2c0d278f32 ("KVM: LAPIC: Mark hrtimer to expire in hard
interrupt context") and commit 9090825fa9 ("KVM: arm/arm64: Let the
timer expire in hardirq context on RT"), On PREEMPT_RT enabled kernels
unmarked hrtimers are moved into soft interrupt expiry mode by default.
Then the timers are canceled from an preempt-notifier which is invoked
with disabled preemption which is not allowed on PREEMPT_RT.

The timer callback is short so in could be invoked in hard-IRQ context.
So let the timer expire on hard-IRQ context even on -RT.

This fix a "scheduling while atomic" bug for PREEMPT_RT enabled kernels:

 BUG: scheduling while atomic: qemu-system-loo/1011/0x00000002
 Modules linked in: amdgpu rfkill nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat ns
 CPU: 1 UID: 0 PID: 1011 Comm: qemu-system-loo Tainted: G        W          6.12.0-rc2+ #1774
 Tainted: [W]=WARN
 Hardware name: Loongson Loongson-3A5000-7A1000-1w-CRB/Loongson-LS3A5000-7A1000-1w-CRB, BIOS vUDK2018-LoongArch-V2.0.0-prebeta9 10/21/2022
 Stack : ffffffffffffffff 0000000000000000 9000000004e3ea38 9000000116744000
         90000001167475a0 0000000000000000 90000001167475a8 9000000005644830
         90000000058dc000 90000000058dbff8 9000000116747420 0000000000000001
         0000000000000001 6a613fc938313980 000000000790c000 90000001001c1140
         00000000000003fe 0000000000000001 000000000000000d 0000000000000003
         0000000000000030 00000000000003f3 000000000790c000 9000000116747830
         90000000057ef000 0000000000000000 9000000005644830 0000000000000004
         0000000000000000 90000000057f4b58 0000000000000001 9000000116747868
         900000000451b600 9000000005644830 9000000003a13998 0000000010000020
         00000000000000b0 0000000000000004 0000000000000000 0000000000071c1d
         ...
 Call Trace:
 [<9000000003a13998>] show_stack+0x38/0x180
 [<9000000004e3ea34>] dump_stack_lvl+0x84/0xc0
 [<9000000003a71708>] __schedule_bug+0x48/0x60
 [<9000000004e45734>] __schedule+0x1114/0x1660
 [<9000000004e46040>] schedule_rtlock+0x20/0x60
 [<9000000004e4e330>] rtlock_slowlock_locked+0x3f0/0x10a0
 [<9000000004e4f038>] rt_spin_lock+0x58/0x80
 [<9000000003b02d68>] hrtimer_cancel_wait_running+0x68/0xc0
 [<9000000003b02e30>] hrtimer_cancel+0x70/0x80
 [<ffff80000235eb70>] kvm_restore_timer+0x50/0x1a0 [kvm]
 [<ffff8000023616c8>] kvm_arch_vcpu_load+0x68/0x2a0 [kvm]
 [<ffff80000234c2d4>] kvm_sched_in+0x34/0x60 [kvm]
 [<9000000003a749a0>] finish_task_switch.isra.0+0x140/0x2e0
 [<9000000004e44a70>] __schedule+0x450/0x1660
 [<9000000004e45cb0>] schedule+0x30/0x180
 [<ffff800002354c70>] kvm_vcpu_block+0x70/0x120 [kvm]
 [<ffff800002354d80>] kvm_vcpu_halt+0x60/0x3e0 [kvm]
 [<ffff80000235b194>] kvm_handle_gspr+0x3f4/0x4e0 [kvm]
 [<ffff80000235f548>] kvm_handle_exit+0x1c8/0x260 [kvm]

Reviewed-by: Bibo Mao <maobibo@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2024-10-23 22:15:44 +08:00

192 lines
5.0 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2020-2023 Loongson Technology Corporation Limited
*/
#include <linux/kvm_host.h>
#include <asm/kvm_csr.h>
#include <asm/kvm_vcpu.h>
/*
* ktime_to_tick() - Scale ktime_t to timer tick value.
*/
static inline u64 ktime_to_tick(struct kvm_vcpu *vcpu, ktime_t now)
{
u64 delta;
delta = ktime_to_ns(now);
return div_u64(delta * vcpu->arch.timer_mhz, MNSEC_PER_SEC);
}
static inline u64 tick_to_ns(struct kvm_vcpu *vcpu, u64 tick)
{
return div_u64(tick * MNSEC_PER_SEC, vcpu->arch.timer_mhz);
}
/* Low level hrtimer wake routine */
enum hrtimer_restart kvm_swtimer_wakeup(struct hrtimer *timer)
{
struct kvm_vcpu *vcpu;
vcpu = container_of(timer, struct kvm_vcpu, arch.swtimer);
kvm_queue_irq(vcpu, INT_TI);
rcuwait_wake_up(&vcpu->wait);
return HRTIMER_NORESTART;
}
/*
* Initialise the timer to the specified frequency, zero it
*/
void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long timer_hz)
{
vcpu->arch.timer_mhz = timer_hz >> 20;
/* Starting at 0 */
kvm_write_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_TVAL, 0);
}
/*
* Restore soft timer state from saved context.
*/
void kvm_restore_timer(struct kvm_vcpu *vcpu)
{
unsigned long cfg, estat;
unsigned long ticks, delta, period;
ktime_t expire, now;
struct loongarch_csrs *csr = vcpu->arch.csr;
/*
* Set guest stable timer cfg csr
* Disable timer before restore estat CSR register, avoid to
* get invalid timer interrupt for old timer cfg
*/
cfg = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG);
write_gcsr_timercfg(0);
kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_ESTAT);
kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TCFG);
if (!(cfg & CSR_TCFG_EN)) {
/* Guest timer is disabled, just restore timer registers */
kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TVAL);
return;
}
/*
* Freeze the soft-timer and sync the guest stable timer with it.
*/
if (kvm_vcpu_is_blocking(vcpu))
hrtimer_cancel(&vcpu->arch.swtimer);
/*
* From LoongArch Reference Manual Volume 1 Chapter 7.6.2
* If oneshot timer is fired, CSR TVAL will be -1, there are two
* conditions:
* 1) timer is fired during exiting to host
* 2) timer is fired and vm is doing timer irq, and then exiting to
* host. Host should not inject timer irq to avoid spurious
* timer interrupt again
*/
ticks = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TVAL);
estat = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
if (!(cfg & CSR_TCFG_PERIOD) && (ticks > cfg)) {
/*
* Writing 0 to LOONGARCH_CSR_TVAL will inject timer irq
* and set CSR TVAL with -1
*/
write_gcsr_timertick(0);
/*
* Writing CSR_TINTCLR_TI to LOONGARCH_CSR_TINTCLR will clear
* timer interrupt, and CSR TVAL keeps unchanged with -1, it
* avoids spurious timer interrupt
*/
if (!(estat & CPU_TIMER))
gcsr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR);
return;
}
/*
* Set remainder tick value if not expired
*/
delta = 0;
now = ktime_get();
expire = vcpu->arch.expire;
if (ktime_before(now, expire))
delta = ktime_to_tick(vcpu, ktime_sub(expire, now));
else if (cfg & CSR_TCFG_PERIOD) {
period = cfg & CSR_TCFG_VAL;
delta = ktime_to_tick(vcpu, ktime_sub(now, expire));
delta = period - (delta % period);
/*
* Inject timer here though sw timer should inject timer
* interrupt async already, since sw timer may be cancelled
* during injecting intr async
*/
kvm_queue_irq(vcpu, INT_TI);
}
write_gcsr_timertick(delta);
}
/*
* Save guest timer state and switch to software emulation of guest
* timer. The hard timer must already be in use, so preemption should be
* disabled.
*/
static void _kvm_save_timer(struct kvm_vcpu *vcpu)
{
unsigned long ticks, delta, cfg;
ktime_t expire;
struct loongarch_csrs *csr = vcpu->arch.csr;
cfg = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG);
ticks = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TVAL);
/*
* From LoongArch Reference Manual Volume 1 Chapter 7.6.2
* If period timer is fired, CSR TVAL will be reloaded from CSR TCFG
* If oneshot timer is fired, CSR TVAL will be -1
* Here judge one-shot timer fired by checking whether TVAL is larger
* than TCFG
*/
if (ticks < cfg)
delta = tick_to_ns(vcpu, ticks);
else
delta = 0;
expire = ktime_add_ns(ktime_get(), delta);
vcpu->arch.expire = expire;
if (kvm_vcpu_is_blocking(vcpu)) {
/*
* HRTIMER_MODE_PINNED_HARD is suggested since vcpu may run in
* the same physical cpu in next time, and the timer should run
* in hardirq context even in the PREEMPT_RT case.
*/
hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED_HARD);
}
}
/*
* Save guest timer state and switch to soft guest timer if hard timer was in
* use.
*/
void kvm_save_timer(struct kvm_vcpu *vcpu)
{
struct loongarch_csrs *csr = vcpu->arch.csr;
preempt_disable();
/* Save hard timer state */
kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TCFG);
kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TVAL);
if (kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG) & CSR_TCFG_EN)
_kvm_save_timer(vcpu);
/* Save timer-related state to vCPU context */
kvm_save_hw_gcsr(csr, LOONGARCH_CSR_ESTAT);
preempt_enable();
}