From 5177fe91e4cf78a659aada2c9cf712db4d788481 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 27 Jan 2022 16:17:54 +0000 Subject: [PATCH 1/6] KVM: arm64: Do not change the PMU event filter after a VCPU has run Userspace can specify which events a guest is allowed to use with the KVM_ARM_VCPU_PMU_V3_FILTER attribute. The list of allowed events can be identified by a guest from reading the PMCEID{0,1}_EL0 registers. Changing the PMU event filter after a VCPU has run can cause reads of the registers performed before the filter is changed to return different values than reads performed with the new event filter in place. The architecture defines the two registers as read-only, and this behaviour contradicts that. Keep track when the first VCPU has run and deny changes to the PMU event filter to prevent this from happening. Signed-off-by: Marc Zyngier [ Alexandru E: Added commit message, updated ioctl documentation ] Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220127161759.53553-2-alexandru.elisei@arm.com --- Documentation/virt/kvm/devices/vcpu.rst | 2 +- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/arm.c | 4 +++ arch/arm64/kvm/pmu-emul.c | 33 +++++++++++++++---------- 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst index 60a29972d3f1..d063aaee5bb7 100644 --- a/Documentation/virt/kvm/devices/vcpu.rst +++ b/Documentation/virt/kvm/devices/vcpu.rst @@ -70,7 +70,7 @@ irqchip. -ENODEV PMUv3 not supported or GIC not initialized -ENXIO PMUv3 not properly configured or in-kernel irqchip not configured as required prior to calling this attribute - -EBUSY PMUv3 already initialized + -EBUSY PMUv3 already initialized or a VCPU has already run -EINVAL Invalid filter range ======= ====================================================== diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5bc01e62c08a..2869259e10c0 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -136,6 +136,7 @@ struct kvm_arch { /* Memory Tagging Extension enabled for the guest */ bool mte_enabled; + bool ran_once; }; struct kvm_vcpu_fault_info { diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index ecc5958e27fe..4783dbf66df2 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -634,6 +634,10 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (kvm_vm_is_protected(kvm)) kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu); + mutex_lock(&kvm->lock); + kvm->arch.ran_once = true; + mutex_unlock(&kvm->lock); + return ret; } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index fbcfd4ec6f92..bc771bc1a041 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -924,6 +924,8 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq) int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { + struct kvm *kvm = vcpu->kvm; + if (!kvm_vcpu_has_pmu(vcpu)) return -ENODEV; @@ -941,7 +943,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) int __user *uaddr = (int __user *)(long)attr->addr; int irq; - if (!irqchip_in_kernel(vcpu->kvm)) + if (!irqchip_in_kernel(kvm)) return -EINVAL; if (get_user(irq, uaddr)) @@ -951,7 +953,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) if (!(irq_is_ppi(irq) || irq_is_spi(irq))) return -EINVAL; - if (!pmu_irq_is_valid(vcpu->kvm, irq)) + if (!pmu_irq_is_valid(kvm, irq)) return -EINVAL; if (kvm_arm_pmu_irq_initialized(vcpu)) @@ -966,7 +968,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) struct kvm_pmu_event_filter filter; int nr_events; - nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; + nr_events = kvm_pmu_event_mask(kvm) + 1; uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr; @@ -978,12 +980,17 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) filter.action != KVM_PMU_EVENT_DENY)) return -EINVAL; - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&kvm->lock); - if (!vcpu->kvm->arch.pmu_filter) { - vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT); - if (!vcpu->kvm->arch.pmu_filter) { - mutex_unlock(&vcpu->kvm->lock); + if (kvm->arch.ran_once) { + mutex_unlock(&kvm->lock); + return -EBUSY; + } + + if (!kvm->arch.pmu_filter) { + kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT); + if (!kvm->arch.pmu_filter) { + mutex_unlock(&kvm->lock); return -ENOMEM; } @@ -994,17 +1001,17 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) * events, the default is to allow. */ if (filter.action == KVM_PMU_EVENT_ALLOW) - bitmap_zero(vcpu->kvm->arch.pmu_filter, nr_events); + bitmap_zero(kvm->arch.pmu_filter, nr_events); else - bitmap_fill(vcpu->kvm->arch.pmu_filter, nr_events); + bitmap_fill(kvm->arch.pmu_filter, nr_events); } if (filter.action == KVM_PMU_EVENT_ALLOW) - bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents); + bitmap_set(kvm->arch.pmu_filter, filter.base_event, filter.nevents); else - bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents); + bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&kvm->lock); return 0; } From 2093057ab879da1070c851b9e07126eaa86d0dfc Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Thu, 27 Jan 2022 16:17:55 +0000 Subject: [PATCH 2/6] perf: Fix wrong name in comment for struct perf_cpu_context Commit 0793a61d4df8 ("performance counters: core code") added the perf subsystem (then called Performance Counters) to Linux, creating the struct perf_cpu_context. The comment for the struct referred to it as a "struct perf_counter_cpu_context". Commit cdd6c482c9ff ("perf: Do the big rename: Performance Counters -> Performance Events") changed the comment to refer to a "struct perf_event_cpu_context", which was still the wrong name for the struct. Change the comment to say "struct perf_cpu_context". CC: Thomas Gleixner CC: Ingo Molnar Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220127161759.53553-3-alexandru.elisei@arm.com --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 733649184b27..af97dd427501 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -864,7 +864,7 @@ struct perf_event_context { #define PERF_NR_CONTEXTS 4 /** - * struct perf_event_cpu_context - per cpu event context structure + * struct perf_cpu_context - per cpu event context structure */ struct perf_cpu_context { struct perf_event_context ctx; From 46b18782147248b62f00e98a7f87abaf934951e8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 27 Jan 2022 16:17:56 +0000 Subject: [PATCH 3/6] KVM: arm64: Keep a per-VM pointer to the default PMU As we are about to allow selection of the PMU exposed to a guest, start by keeping track of the default one instead of only the PMU version. Signed-off-by: Marc Zyngier Signed-off-by: Alexandru Elisei Link: https://lore.kernel.org/r/20220127161759.53553-4-alexandru.elisei@arm.com --- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/kvm/pmu-emul.c | 42 +++++++++++++++++++------------ 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2869259e10c0..57141a3a3740 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -129,7 +129,7 @@ struct kvm_arch { * up to 2^10 events (ARMv8.0) or 2^16 events (ARMv8.1+). */ unsigned long *pmu_filter; - unsigned int pmuver; + struct arm_pmu *arm_pmu; u8 pfr0_csv2; u8 pfr0_csv3; diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index bc771bc1a041..b238b3d5515c 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -24,7 +24,11 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc); static u32 kvm_pmu_event_mask(struct kvm *kvm) { - switch (kvm->arch.pmuver) { + unsigned int pmuver; + + pmuver = kvm->arch.arm_pmu->pmuver; + + switch (pmuver) { case ID_AA64DFR0_PMUVER_8_0: return GENMASK(9, 0); case ID_AA64DFR0_PMUVER_8_1: @@ -33,7 +37,7 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm) case ID_AA64DFR0_PMUVER_8_7: return GENMASK(15, 0); default: /* Shouldn't be here, just for sanity */ - WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver); + WARN_ONCE(1, "Unknown PMU version %d\n", pmuver); return 0; } } @@ -600,6 +604,7 @@ static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) */ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) { + struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu; struct kvm_pmu *pmu = &vcpu->arch.pmu; struct kvm_pmc *pmc; struct perf_event *event; @@ -636,7 +641,7 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) return; memset(&attr, 0, sizeof(struct perf_event_attr)); - attr.type = PERF_TYPE_RAW; + attr.type = arm_pmu->pmu.type; attr.size = sizeof(attr); attr.pinned = 1; attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx); @@ -750,12 +755,11 @@ void kvm_host_pmu_init(struct arm_pmu *pmu) static_branch_enable(&kvm_arm_pmu_available); } -static int kvm_pmu_probe_pmuver(void) +static struct arm_pmu *kvm_pmu_probe_armpmu(void) { struct perf_event_attr attr = { }; struct perf_event *event; - struct arm_pmu *pmu; - int pmuver = ID_AA64DFR0_PMUVER_IMP_DEF; + struct arm_pmu *pmu = NULL; /* * Create a dummy event that only counts user cycles. As we'll never @@ -780,19 +784,20 @@ static int kvm_pmu_probe_pmuver(void) if (IS_ERR(event)) { pr_err_once("kvm: pmu event creation failed %ld\n", PTR_ERR(event)); - return ID_AA64DFR0_PMUVER_IMP_DEF; + return NULL; } if (event->pmu) { pmu = to_arm_pmu(event->pmu); - if (pmu->pmuver) - pmuver = pmu->pmuver; + if (pmu->pmuver == 0 || + pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF) + pmu = NULL; } perf_event_disable(event); perf_event_release_kernel(event); - return pmuver; + return pmu; } u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) @@ -810,7 +815,7 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled * as RAZ */ - if (vcpu->kvm->arch.pmuver >= ID_AA64DFR0_PMUVER_8_4) + if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_PMUVER_8_4) val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32); base = 32; } @@ -932,11 +937,16 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) if (vcpu->arch.pmu.created) return -EBUSY; - if (!vcpu->kvm->arch.pmuver) - vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver(); - - if (vcpu->kvm->arch.pmuver == ID_AA64DFR0_PMUVER_IMP_DEF) - return -ENODEV; + mutex_lock(&kvm->lock); + if (!kvm->arch.arm_pmu) { + /* No PMU set, get the default one */ + kvm->arch.arm_pmu = kvm_pmu_probe_armpmu(); + if (!kvm->arch.arm_pmu) { + mutex_unlock(&kvm->lock); + return -ENODEV; + } + } + mutex_unlock(&kvm->lock); switch (attr->attr) { case KVM_ARM_VCPU_PMU_V3_IRQ: { From db858060b1a788fba03711793dcaff19ea43286c Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Thu, 27 Jan 2022 16:17:57 +0000 Subject: [PATCH 4/6] KVM: arm64: Keep a list of probed PMUs The ARM PMU driver calls kvm_host_pmu_init() after probing to tell KVM that a hardware PMU is available for guest emulation. Heterogeneous systems can have more than one PMU present, and the callback gets called multiple times, once for each of them. Keep track of all the PMUs available to KVM, as they're going to be needed later. Reviewed-by: Reiji Watanabe Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220127161759.53553-5-alexandru.elisei@arm.com --- arch/arm64/kvm/pmu-emul.c | 25 +++++++++++++++++++++++-- include/kvm/arm_pmu.h | 5 +++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index b238b3d5515c..7bab73f85b58 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -16,6 +17,9 @@ DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available); +static LIST_HEAD(arm_pmus); +static DEFINE_MUTEX(arm_pmus_lock); + static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx); static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc); @@ -750,9 +754,26 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, void kvm_host_pmu_init(struct arm_pmu *pmu) { - if (pmu->pmuver != 0 && pmu->pmuver != ID_AA64DFR0_PMUVER_IMP_DEF && - !kvm_arm_support_pmu_v3() && !is_protected_kvm_enabled()) + struct arm_pmu_entry *entry; + + if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF || + is_protected_kvm_enabled()) + return; + + mutex_lock(&arm_pmus_lock); + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + goto out_unlock; + + entry->arm_pmu = pmu; + list_add_tail(&entry->entry, &arm_pmus); + + if (list_is_singular(&arm_pmus)) static_branch_enable(&kvm_arm_pmu_available); + +out_unlock: + mutex_unlock(&arm_pmus_lock); } static struct arm_pmu *kvm_pmu_probe_armpmu(void) diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index f9ed4c171d7b..20193416d214 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -29,6 +29,11 @@ struct kvm_pmu { struct irq_work overflow_work; }; +struct arm_pmu_entry { + struct list_head entry; + struct arm_pmu *arm_pmu; +}; + DECLARE_STATIC_KEY_FALSE(kvm_arm_pmu_available); static __always_inline bool kvm_arm_support_pmu_v3(void) From 6ee7fca2a4a023b14aa1f1f3c4f6c833116116ef Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Thu, 27 Jan 2022 16:17:58 +0000 Subject: [PATCH 5/6] KVM: arm64: Add KVM_ARM_VCPU_PMU_V3_SET_PMU attribute When KVM creates an event and there are more than one PMUs present on the system, perf_init_event() will go through the list of available PMUs and will choose the first one that can create the event. The order of the PMUs in this list depends on the probe order, which can change under various circumstances, for example if the order of the PMU nodes change in the DTB or if asynchronous driver probing is enabled on the kernel command line (with the driver_async_probe=armv8-pmu option). Another consequence of this approach is that on heteregeneous systems all virtual machines that KVM creates will use the same PMU. This might cause unexpected behaviour for userspace: when a VCPU is executing on the physical CPU that uses this default PMU, PMU events in the guest work correctly; but when the same VCPU executes on another CPU, PMU events in the guest will suddenly stop counting. Fortunately, perf core allows user to specify on which PMU to create an event by using the perf_event_attr->type field, which is used by perf_init_event() as an index in the radix tree of available PMUs. Add the KVM_ARM_VCPU_PMU_V3_CTRL(KVM_ARM_VCPU_PMU_V3_SET_PMU) VCPU attribute to allow userspace to specify the arm_pmu that KVM will use when creating events for that VCPU. KVM will make no attempt to run the VCPU on the physical CPUs that share the PMU, leaving it up to userspace to manage the VCPU threads' affinity accordingly. To ensure that KVM doesn't expose an asymmetric system to the guest, the PMU set for one VCPU will be used by all other VCPUs. Once a VCPU has run, the PMU cannot be changed in order to avoid changing the list of available events for a VCPU, or to change the semantics of existing events. Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220127161759.53553-6-alexandru.elisei@arm.com --- Documentation/virt/kvm/devices/vcpu.rst | 28 +++++++++++++++++ arch/arm64/include/uapi/asm/kvm.h | 1 + arch/arm64/kvm/pmu-emul.c | 40 +++++++++++++++++++++++++ tools/arch/arm64/include/uapi/asm/kvm.h | 1 + 4 files changed, 70 insertions(+) diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst index d063aaee5bb7..e8c5770590a2 100644 --- a/Documentation/virt/kvm/devices/vcpu.rst +++ b/Documentation/virt/kvm/devices/vcpu.rst @@ -104,6 +104,34 @@ hardware event. Filtering event 0x1E (CHAIN) has no effect either, as it isn't strictly speaking an event. Filtering the cycle counter is possible using event 0x11 (CPU_CYCLES). +1.4 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_SET_PMU +------------------------------------------ + +:Parameters: in kvm_device_attr.addr the address to an int representing the PMU + identifier. + +:Returns: + + ======= ==================================================== + -EBUSY PMUv3 already initialized, a VCPU has already run or + an event filter has already been set + -EFAULT Error accessing the PMU identifier + -ENXIO PMU not found + -ENODEV PMUv3 not supported or GIC not initialized + -ENOMEM Could not allocate memory + ======= ==================================================== + +Request that the VCPU uses the specified hardware PMU when creating guest events +for the purpose of PMU emulation. The PMU identifier can be read from the "type" +file for the desired PMU instance under /sys/devices (or, equivalent, +/sys/bus/even_source). This attribute is particularly useful on heterogeneous +systems where there are at least two CPU PMUs on the system. The PMU that is set +for one VCPU will be used by all the other VCPUs. It isn't possible to set a PMU +if a PMU event filter is already present. + +Note that KVM will not make any attempts to run the VCPU on the physical CPUs +associated with the PMU specified by this attribute. This is entirely left to +userspace. 2. GROUP: KVM_ARM_VCPU_TIMER_CTRL ================================= diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index b3edde68bc3e..1d0a0a2a9711 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -362,6 +362,7 @@ struct kvm_arm_copy_mte_tags { #define KVM_ARM_VCPU_PMU_V3_IRQ 0 #define KVM_ARM_VCPU_PMU_V3_INIT 1 #define KVM_ARM_VCPU_PMU_V3_FILTER 2 +#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3 #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 7bab73f85b58..18361f367495 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -948,6 +948,36 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq) return true; } +static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) +{ + struct kvm *kvm = vcpu->kvm; + struct arm_pmu_entry *entry; + struct arm_pmu *arm_pmu; + int ret = -ENXIO; + + mutex_lock(&kvm->lock); + mutex_lock(&arm_pmus_lock); + + list_for_each_entry(entry, &arm_pmus, entry) { + arm_pmu = entry->arm_pmu; + if (arm_pmu->pmu.type == pmu_id) { + if (kvm->arch.ran_once || + (kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) { + ret = -EBUSY; + break; + } + + kvm->arch.arm_pmu = arm_pmu; + ret = 0; + break; + } + } + + mutex_unlock(&arm_pmus_lock); + mutex_unlock(&kvm->lock); + return ret; +} + int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { struct kvm *kvm = vcpu->kvm; @@ -1046,6 +1076,15 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) return 0; } + case KVM_ARM_VCPU_PMU_V3_SET_PMU: { + int __user *uaddr = (int __user *)(long)attr->addr; + int pmu_id; + + if (get_user(pmu_id, uaddr)) + return -EFAULT; + + return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id); + } case KVM_ARM_VCPU_PMU_V3_INIT: return kvm_arm_pmu_v3_init(vcpu); } @@ -1083,6 +1122,7 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) case KVM_ARM_VCPU_PMU_V3_IRQ: case KVM_ARM_VCPU_PMU_V3_INIT: case KVM_ARM_VCPU_PMU_V3_FILTER: + case KVM_ARM_VCPU_PMU_V3_SET_PMU: if (kvm_vcpu_has_pmu(vcpu)) return 0; } diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index b3edde68bc3e..1d0a0a2a9711 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -362,6 +362,7 @@ struct kvm_arm_copy_mte_tags { #define KVM_ARM_VCPU_PMU_V3_IRQ 0 #define KVM_ARM_VCPU_PMU_V3_INIT 1 #define KVM_ARM_VCPU_PMU_V3_FILTER 2 +#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3 #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 From 583cda1b0e7d5d49db5fc15db623166310e36bf6 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Thu, 27 Jan 2022 16:17:59 +0000 Subject: [PATCH 6/6] KVM: arm64: Refuse to run VCPU if the PMU doesn't match the physical CPU Userspace can assign a PMU to a VCPU with the KVM_ARM_VCPU_PMU_V3_SET_PMU device ioctl. If the VCPU is scheduled on a physical CPU which has a different PMU, the perf events needed to emulate a guest PMU won't be scheduled in and the guest performance counters will stop counting. Treat it as an userspace error and refuse to run the VCPU in this situation. Suggested-by: Marc Zyngier Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220127161759.53553-7-alexandru.elisei@arm.com --- Documentation/virt/kvm/devices/vcpu.rst | 6 +++++- arch/arm64/include/asm/kvm_host.h | 12 ++++++++++++ arch/arm64/include/uapi/asm/kvm.h | 3 +++ arch/arm64/kvm/arm.c | 17 +++++++++++++++++ arch/arm64/kvm/pmu-emul.c | 1 + 5 files changed, 38 insertions(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst index e8c5770590a2..260db203a1e2 100644 --- a/Documentation/virt/kvm/devices/vcpu.rst +++ b/Documentation/virt/kvm/devices/vcpu.rst @@ -131,7 +131,11 @@ if a PMU event filter is already present. Note that KVM will not make any attempts to run the VCPU on the physical CPUs associated with the PMU specified by this attribute. This is entirely left to -userspace. +userspace. However, attempting to run the VCPU on a physical CPU not supported +by the PMU will fail and KVM_RUN will return with +exit_reason = KVM_EXIT_FAIL_ENTRY and populate the fail_entry struct by setting +hardare_entry_failure_reason field to KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED and +the cpu field to the processor id. 2. GROUP: KVM_ARM_VCPU_TIMER_CTRL ================================= diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 57141a3a3740..0bed0e33e1ae 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -131,6 +131,8 @@ struct kvm_arch { unsigned long *pmu_filter; struct arm_pmu *arm_pmu; + cpumask_var_t supported_cpus; + u8 pfr0_csv2; u8 pfr0_csv3; @@ -436,6 +438,7 @@ struct kvm_vcpu_arch { #define KVM_ARM64_DEBUG_STATE_SAVE_SPE (1 << 12) /* Save SPE context if active */ #define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */ #define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14) +#define KVM_ARM64_ON_UNSUPPORTED_CPU (1 << 15) /* Physical CPU not in supported_cpus */ #define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ KVM_GUESTDBG_USE_SW_BP | \ @@ -454,6 +457,15 @@ struct kvm_vcpu_arch { #define vcpu_has_ptrauth(vcpu) false #endif +#define vcpu_on_unsupported_cpu(vcpu) \ + ((vcpu)->arch.flags & KVM_ARM64_ON_UNSUPPORTED_CPU) + +#define vcpu_set_on_unsupported_cpu(vcpu) \ + ((vcpu)->arch.flags |= KVM_ARM64_ON_UNSUPPORTED_CPU) + +#define vcpu_clear_on_unsupported_cpu(vcpu) \ + ((vcpu)->arch.flags &= ~KVM_ARM64_ON_UNSUPPORTED_CPU) + #define vcpu_gp_regs(v) (&(v)->arch.ctxt.regs) /* diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 1d0a0a2a9711..d49f714f48e6 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -414,6 +414,9 @@ struct kvm_arm_copy_mte_tags { #define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS #define KVM_PSCI_RET_DENIED PSCI_RET_DENIED +/* run->fail_entry.hardware_entry_failure_reason codes. */ +#define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED (1ULL << 0) + #endif #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 4783dbf66df2..13c1318d8b9a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -150,6 +150,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) goto out_free_stage2_pgd; + if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) + goto out_free_stage2_pgd; + cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask); + kvm_vgic_early_init(kvm); /* The maximum number of VCPUs is limited by the host's GIC model */ @@ -176,6 +180,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) void kvm_arch_destroy_vm(struct kvm *kvm) { bitmap_free(kvm->arch.pmu_filter); + free_cpumask_var(kvm->arch.supported_cpus); kvm_vgic_destroy(kvm); @@ -411,6 +416,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (vcpu_has_ptrauth(vcpu)) vcpu_ptrauth_disable(vcpu); kvm_arch_vcpu_load_debug_state_flags(vcpu); + + if (!cpumask_test_cpu(smp_processor_id(), vcpu->kvm->arch.supported_cpus)) + vcpu_set_on_unsupported_cpu(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -423,6 +431,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) kvm_vgic_put(vcpu); kvm_vcpu_pmu_restore_host(vcpu); + vcpu_clear_on_unsupported_cpu(vcpu); vcpu->cpu = -1; } @@ -796,6 +805,14 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret) } } + if (unlikely(vcpu_on_unsupported_cpu(vcpu))) { + run->exit_reason = KVM_EXIT_FAIL_ENTRY; + run->fail_entry.hardware_entry_failure_reason = KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED; + run->fail_entry.cpu = smp_processor_id(); + *ret = 0; + return true; + } + return kvm_request_pending(vcpu) || need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) || xfer_to_guest_mode_work_pending(); diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 18361f367495..4526a5824dac 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -968,6 +968,7 @@ static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) } kvm->arch.arm_pmu = arm_pmu; + cpumask_copy(kvm->arch.supported_cpus, &arm_pmu->supported_cpus); ret = 0; break; }