From 66b030e48af68fd4c22d343908bc057207a0a31e Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Mon, 15 Dec 2014 18:43:32 +0100 Subject: [PATCH 01/36] KVM: arm/arm64: vgic: vgic_init returns -ENODEV when no online vcpu To be more explicit on vgic initialization failure, -ENODEV is returned by vgic_init when no online vcpus can be found at init. Signed-off-by: Eric Auger Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 03affc7bf453..b8d57e86cfdc 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1809,7 +1809,7 @@ static int vgic_init(struct kvm *kvm) nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus); if (!nr_cpus) /* No vcpus? Can't be good... */ - return -EINVAL; + return -ENODEV; /* * If nobody configured the number of interrupts, use the From 065c0034823b513d3ca95760a2ad1765e3ef629c Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Mon, 15 Dec 2014 18:43:33 +0100 Subject: [PATCH 02/36] KVM: arm/arm64: vgic: add init entry to VGIC KVM device Since the advent of VGIC dynamic initialization, this latter is initialized quite late on the first vcpu run or "on-demand", when injecting an IRQ or when the guest sets its registers. This initialization could be initiated explicitly much earlier by the users-space, as soon as it has provided the requested dimensioning parameters. This patch adds a new entry to the VGIC KVM device that allows the user to manually request the VGIC init: - a new KVM_DEV_ARM_VGIC_GRP_CTRL group is introduced. - Its first attribute is KVM_DEV_ARM_VGIC_CTRL_INIT The rationale behind introducing a group is to be able to add other controls later on, if needed. Signed-off-by: Eric Auger Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/devices/arm-vgic.txt | 11 +++++++++++ arch/arm/include/uapi/asm/kvm.h | 2 ++ arch/arm64/include/uapi/asm/kvm.h | 2 ++ virt/kvm/arm/vgic.c | 14 +++++++++++++- 4 files changed, 28 insertions(+), 1 deletion(-) diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt index df8b0c7540b6..30f5427cf9b0 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt @@ -81,3 +81,14 @@ Groups: -EINVAL: Value set is out of the expected range -EBUSY: Value has already be set, or GIC has already been initialized with default values. + + KVM_DEV_ARM_VGIC_GRP_CTRL + Attributes: + KVM_DEV_ARM_VGIC_CTRL_INIT + request the initialization of the VGIC, no additional parameter in + kvm_device_attr.addr. + Errors: + -ENXIO: VGIC not properly configured as required prior to calling + this attribute + -ENODEV: no online VCPU + -ENOMEM: memory shortage when allocating vgic internal data diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 09ee408c1a67..0db25bc32864 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -175,6 +175,8 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 +#define KVM_DEV_ARM_VGIC_GRP_CTRL 4 +#define KVM_DEV_ARM_VGIC_CTRL_INIT 0 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 8e38878c87c6..480af3461068 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -161,6 +161,8 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 +#define KVM_DEV_ARM_VGIC_GRP_CTRL 4 +#define KVM_DEV_ARM_VGIC_CTRL_INIT 0 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index b8d57e86cfdc..c84f53dfcd62 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -2302,7 +2302,14 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) return ret; } - + case KVM_DEV_ARM_VGIC_GRP_CTRL: { + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_CTRL_INIT: + r = vgic_init(dev->kvm); + return r; + } + break; + } } return -ENXIO; @@ -2381,6 +2388,11 @@ static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) return vgic_has_attr_regs(vgic_cpu_ranges, offset); case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: return 0; + case KVM_DEV_ARM_VGIC_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_CTRL_INIT: + return 0; + } } return -ENXIO; } From 0d97f884810410b2e0515e29424fe1ec5357176f Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Mon, 12 Jan 2015 11:53:36 -0500 Subject: [PATCH 03/36] arm/arm64: KVM: add tracing support for arm64 exit handler arm64 uses its own copy of exit handler (arm64/kvm/handle_exit.c). Currently this file doesn't hook up with any trace points. As a result users might not see certain events (e.g. HVC & WFI) while using ftrace with arm64 KVM. This patch fixes this issue by adding a new trace file and defining two trace events (one of which is shared by wfi and wfe) for arm64. The new trace points are then linked with related functions in handle_exit.c. Signed-off-by: Wei Huang Signed-off-by: Andre Przywara Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_arm.h | 2 + arch/arm64/include/asm/kvm_emulate.h | 5 +++ arch/arm64/kvm/handle_exit.c | 13 ++++++- arch/arm64/kvm/trace.h | 55 ++++++++++++++++++++++++++++ 4 files changed, 73 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/kvm/trace.h diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 8afb863f5a9e..3da2d3acec0b 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -257,4 +257,6 @@ #define ESR_EL2_EC_WFI_ISS_WFE (1 << 0) +#define ESR_EL2_HVC_IMM_MASK ((1UL << 16) - 1) + #endif /* __ARM64_KVM_ARM_H__ */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 8127e45e2637..a6fa2d2cd41c 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -126,6 +126,11 @@ static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu) return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8; } +static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_HVC_IMM_MASK; +} + static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu) { return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_ISV); diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 34b8bd0711e9..6a7eb3ce7027 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -26,12 +26,18 @@ #include #include +#define CREATE_TRACE_POINTS +#include "trace.h" + typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) { int ret; + trace_kvm_hvc_arm64(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0), + kvm_vcpu_hvc_get_imm(vcpu)); + ret = kvm_psci_call(vcpu); if (ret < 0) { kvm_inject_undefined(vcpu); @@ -61,10 +67,13 @@ static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) */ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) { - if (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EC_WFI_ISS_WFE) + if (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EC_WFI_ISS_WFE) { + trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true); kvm_vcpu_on_spin(vcpu); - else + } else { + trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); kvm_vcpu_block(vcpu); + } kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); diff --git a/arch/arm64/kvm/trace.h b/arch/arm64/kvm/trace.h new file mode 100644 index 000000000000..157416e963f2 --- /dev/null +++ b/arch/arm64/kvm/trace.h @@ -0,0 +1,55 @@ +#if !defined(_TRACE_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_ARM64_KVM_H + +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm + +TRACE_EVENT(kvm_wfx_arm64, + TP_PROTO(unsigned long vcpu_pc, bool is_wfe), + TP_ARGS(vcpu_pc, is_wfe), + + TP_STRUCT__entry( + __field(unsigned long, vcpu_pc) + __field(bool, is_wfe) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->is_wfe = is_wfe; + ), + + TP_printk("guest executed wf%c at: 0x%08lx", + __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc) +); + +TRACE_EVENT(kvm_hvc_arm64, + TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm), + TP_ARGS(vcpu_pc, r0, imm), + + TP_STRUCT__entry( + __field(unsigned long, vcpu_pc) + __field(unsigned long, r0) + __field(unsigned long, imm) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->r0 = r0; + __entry->imm = imm; + ), + + TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)", + __entry->vcpu_pc, __entry->r0, __entry->imm) +); + +#endif /* _TRACE_ARM64_KVM_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace + +/* This part must be outside protection */ +#include From 924de80db9cab3e2ee53933deee7945e81808151 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 12 Jan 2015 16:56:16 +0000 Subject: [PATCH 04/36] ARM: KVM: extend WFI tracepoint to differentiate between wfi and wfe Currently the trace printk talks about "wfi" only, though the trace point triggers both on wfi and wfe traps. Add a parameter to differentiate between the two. Signed-off-by: Andre Przywara Reviewed-by: Wei Huang Signed-off-by: Christoffer Dall --- arch/arm/kvm/handle_exit.c | 8 +++++--- arch/arm/kvm/trace.h | 11 +++++++---- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index a96a8043277c..95f12b2ccdcb 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -87,11 +87,13 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) */ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) { - trace_kvm_wfi(*vcpu_pc(vcpu)); - if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) + if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) { + trace_kvm_wfx(*vcpu_pc(vcpu), true); kvm_vcpu_on_spin(vcpu); - else + } else { + trace_kvm_wfx(*vcpu_pc(vcpu), false); kvm_vcpu_block(vcpu); + } kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index b1d640f78623..f741449121f3 100644 --- a/arch/arm/kvm/trace.h +++ b/arch/arm/kvm/trace.h @@ -140,19 +140,22 @@ TRACE_EVENT(kvm_emulate_cp15_imp, __entry->CRm, __entry->Op2) ); -TRACE_EVENT(kvm_wfi, - TP_PROTO(unsigned long vcpu_pc), - TP_ARGS(vcpu_pc), +TRACE_EVENT(kvm_wfx, + TP_PROTO(unsigned long vcpu_pc, bool is_wfe), + TP_ARGS(vcpu_pc, is_wfe), TP_STRUCT__entry( __field( unsigned long, vcpu_pc ) + __field( bool, is_wfe ) ), TP_fast_assign( __entry->vcpu_pc = vcpu_pc; + __entry->is_wfe = is_wfe; ), - TP_printk("guest executed wfi at: 0x%08lx", __entry->vcpu_pc) + TP_printk("guest executed wf%c at: 0x%08lx", + __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc) ); TRACE_EVENT(kvm_unmap_hva, From a6d5101661c88d642b1fc85657fb0c58da821aa7 Mon Sep 17 00:00:00 2001 From: Mario Smarduch Date: Thu, 15 Jan 2015 15:58:52 -0800 Subject: [PATCH 05/36] KVM: Add architecture-defined TLB flush support Allow architectures to override the generic kvm_flush_remote_tlbs() function via HAVE_KVM_ARCH_TLB_FLUSH_ALL. ARMv7 will need this to provide its own TLB flush interface. Reviewed-by: Christoffer Dall Reviewed-by: Marc Zyngier Reviewed-by: Paolo Bonzini Signed-off-by: Mario Smarduch --- virt/kvm/Kconfig | 3 +++ virt/kvm/kvm_main.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index fc0c5e603eb4..3796a2132a06 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -37,3 +37,6 @@ config HAVE_KVM_CPU_RELAX_INTERCEPT config KVM_VFIO bool + +config HAVE_KVM_ARCH_TLB_FLUSH_ALL + bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 167e8c14b143..d03bd2255801 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -176,6 +176,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) return called; } +#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL void kvm_flush_remote_tlbs(struct kvm *kvm) { long dirty_count = kvm->tlbs_dirty; @@ -186,6 +187,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); } EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); +#endif void kvm_reload_remote_mmus(struct kvm *kvm) { From ba0513b5b8ffbcb0cc89e2f172c0bcb70497ba2e Mon Sep 17 00:00:00 2001 From: Mario Smarduch Date: Thu, 15 Jan 2015 15:58:53 -0800 Subject: [PATCH 06/36] KVM: Add generic support for dirty page logging kvm_get_dirty_log() provides generic handling of dirty bitmap, currently reused by several architectures. Building on that we intrdoduce kvm_get_dirty_log_protect() adding write protection to mark these pages dirty for future write access, before next KVM_GET_DIRTY_LOG ioctl call from user space. Reviewed-by: Christoffer Dall Signed-off-by: Mario Smarduch --- include/linux/kvm_host.h | 9 +++++ virt/kvm/Kconfig | 6 +++ virt/kvm/kvm_main.c | 80 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 26f106022c88..3b934cc94cc8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -611,6 +611,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext); int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, int *is_dirty); + +int kvm_get_dirty_log_protect(struct kvm *kvm, + struct kvm_dirty_log *log, bool *is_dirty); + +void kvm_arch_mmu_write_protect_pt_masked(struct kvm *kvm, + struct kvm_memory_slot *slot, + gfn_t gfn_offset, + unsigned long mask); + int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log); diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 3796a2132a06..314950c51d9f 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -40,3 +40,9 @@ config KVM_VFIO config HAVE_KVM_ARCH_TLB_FLUSH_ALL bool + +config HAVE_KVM_ARCH_DIRTY_LOG_PROTECT + bool + +config KVM_GENERIC_DIRTYLOG_READ_PROTECT + bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d03bd2255801..246cf291c6fd 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -995,6 +995,86 @@ out: } EXPORT_SYMBOL_GPL(kvm_get_dirty_log); +#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT +/** + * kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages + * are dirty write protect them for next write. + * @kvm: pointer to kvm instance + * @log: slot id and address to which we copy the log + * @is_dirty: flag set if any page is dirty + * + * We need to keep it in mind that VCPU threads can write to the bitmap + * concurrently. So, to avoid losing track of dirty pages we keep the + * following order: + * + * 1. Take a snapshot of the bit and clear it if needed. + * 2. Write protect the corresponding page. + * 3. Copy the snapshot to the userspace. + * 4. Upon return caller flushes TLB's if needed. + * + * Between 2 and 4, the guest may write to the page using the remaining TLB + * entry. This is not a problem because the page is reported dirty using + * the snapshot taken before and step 4 ensures that writes done after + * exiting to userspace will be logged for the next call. + * + */ +int kvm_get_dirty_log_protect(struct kvm *kvm, + struct kvm_dirty_log *log, bool *is_dirty) +{ + struct kvm_memory_slot *memslot; + int r, i; + unsigned long n; + unsigned long *dirty_bitmap; + unsigned long *dirty_bitmap_buffer; + + r = -EINVAL; + if (log->slot >= KVM_USER_MEM_SLOTS) + goto out; + + memslot = id_to_memslot(kvm->memslots, log->slot); + + dirty_bitmap = memslot->dirty_bitmap; + r = -ENOENT; + if (!dirty_bitmap) + goto out; + + n = kvm_dirty_bitmap_bytes(memslot); + + dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long); + memset(dirty_bitmap_buffer, 0, n); + + spin_lock(&kvm->mmu_lock); + *is_dirty = false; + for (i = 0; i < n / sizeof(long); i++) { + unsigned long mask; + gfn_t offset; + + if (!dirty_bitmap[i]) + continue; + + *is_dirty = true; + + mask = xchg(&dirty_bitmap[i], 0); + dirty_bitmap_buffer[i] = mask; + + offset = i * BITS_PER_LONG; + kvm_arch_mmu_write_protect_pt_masked(kvm, memslot, offset, + mask); + } + + spin_unlock(&kvm->mmu_lock); + + r = -EFAULT; + if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) + goto out; + + r = 0; +out: + return r; +} +EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect); +#endif + bool kvm_largepages_enabled(void) { return largepages_enabled; From e108ff2f8033a417ee3e517d9f8730f665646076 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 15 Jan 2015 15:58:54 -0800 Subject: [PATCH 07/36] KVM: x86: switch to kvm_get_dirty_log_protect We now have a generic function that does most of the work of kvm_vm_ioctl_get_dirty_log, now use it. Acked-by: Christoffer Dall Signed-off-by: Mario Smarduch --- arch/x86/include/asm/kvm_host.h | 3 -- arch/x86/kvm/Kconfig | 1 + arch/x86/kvm/mmu.c | 4 +- arch/x86/kvm/x86.c | 72 ++++++--------------------------- 4 files changed, 16 insertions(+), 64 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index cb19d05af3cd..3ceddf41ca74 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -821,9 +821,6 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); -void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, - struct kvm_memory_slot *slot, - gfn_t gfn_offset, unsigned long mask); void kvm_mmu_zap_all(struct kvm *kvm); void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm); unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index f9d16ff56c6b..d07359466d5d 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -39,6 +39,7 @@ config KVM select PERF_EVENTS select HAVE_KVM_MSI select HAVE_KVM_CPU_RELAX_INTERCEPT + select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_VFIO ---help--- Support hosting fully virtualized guest machines using hardware diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a0985ebb5512..3aa46aaa8cb3 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1203,7 +1203,7 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, } /** - * kvm_mmu_write_protect_pt_masked - write protect selected PT level pages + * kvm_arch_mmu_write_protect_pt_masked - write protect selected PT level pages * @kvm: kvm instance * @slot: slot to protect * @gfn_offset: start of the BITS_PER_LONG pages we care about @@ -1212,7 +1212,7 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, * Used when we do not need to care about huge page mappings: e.g. during dirty * logging we do not have any such mappings. */ -void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, +void kvm_arch_mmu_write_protect_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 49ecda7ca958..556dfb4efc43 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3748,83 +3748,37 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, * @kvm: kvm instance * @log: slot id and address to which we copy the log * - * We need to keep it in mind that VCPU threads can write to the bitmap - * concurrently. So, to avoid losing data, we keep the following order for - * each bit: + * Steps 1-4 below provide general overview of dirty page logging. See + * kvm_get_dirty_log_protect() function description for additional details. + * + * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we + * always flush the TLB (step 4) even if previous step failed and the dirty + * bitmap may be corrupt. Regardless of previous outcome the KVM logging API + * does not preclude user space subsequent dirty log read. Flushing TLB ensures + * writes will be marked dirty for next log read. * * 1. Take a snapshot of the bit and clear it if needed. * 2. Write protect the corresponding page. - * 3. Flush TLB's if needed. - * 4. Copy the snapshot to the userspace. - * - * Between 2 and 3, the guest may write to the page using the remaining TLB - * entry. This is not a problem because the page will be reported dirty at - * step 4 using the snapshot taken before and step 3 ensures that successive - * writes will be logged for the next call. + * 3. Copy the snapshot to the userspace. + * 4. Flush TLB's if needed. */ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { - int r; - struct kvm_memory_slot *memslot; - unsigned long n, i; - unsigned long *dirty_bitmap; - unsigned long *dirty_bitmap_buffer; bool is_dirty = false; + int r; mutex_lock(&kvm->slots_lock); - r = -EINVAL; - if (log->slot >= KVM_USER_MEM_SLOTS) - goto out; - - memslot = id_to_memslot(kvm->memslots, log->slot); - - dirty_bitmap = memslot->dirty_bitmap; - r = -ENOENT; - if (!dirty_bitmap) - goto out; - - n = kvm_dirty_bitmap_bytes(memslot); - - dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long); - memset(dirty_bitmap_buffer, 0, n); - - spin_lock(&kvm->mmu_lock); - - for (i = 0; i < n / sizeof(long); i++) { - unsigned long mask; - gfn_t offset; - - if (!dirty_bitmap[i]) - continue; - - is_dirty = true; - - mask = xchg(&dirty_bitmap[i], 0); - dirty_bitmap_buffer[i] = mask; - - offset = i * BITS_PER_LONG; - kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask); - } - - spin_unlock(&kvm->mmu_lock); - - /* See the comments in kvm_mmu_slot_remove_write_access(). */ - lockdep_assert_held(&kvm->slots_lock); + r = kvm_get_dirty_log_protect(kvm, log, &is_dirty); /* * All the TLBs can be flushed out of mmu lock, see the comments in * kvm_mmu_slot_remove_write_access(). */ + lockdep_assert_held(&kvm->slots_lock); if (is_dirty) kvm_flush_remote_tlbs(kvm); - r = -EFAULT; - if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) - goto out; - - r = 0; -out: mutex_unlock(&kvm->slots_lock); return r; } From 72fc36b600f73e37557576af80c58836dcf389f9 Mon Sep 17 00:00:00 2001 From: Mario Smarduch Date: Thu, 15 Jan 2015 15:58:55 -0800 Subject: [PATCH 08/36] KVM: arm: Add ARMv7 API to flush TLBs This patch adds ARMv7 architecture TLB Flush function. Acked-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Mario Smarduch --- arch/arm/include/asm/kvm_asm.h | 1 + arch/arm/include/asm/kvm_host.h | 12 ++++++++++++ arch/arm/kvm/Kconfig | 1 + arch/arm/kvm/interrupts.S | 11 +++++++++++ 4 files changed, 25 insertions(+) diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 3a67bec72d0c..25410b2d8bc1 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -96,6 +96,7 @@ extern char __kvm_hyp_code_end[]; extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); +extern void __kvm_tlb_flush_vmid(struct kvm *kvm); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); #endif diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 254e0650e48b..acfced36986b 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -221,6 +221,18 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr); } +/** + * kvm_flush_remote_tlbs() - flush all VM TLB entries + * @kvm: pointer to kvm structure. + * + * Interface to HYP function to flush all VM TLB entries without address + * parameter. + */ +static inline void kvm_flush_remote_tlbs(struct kvm *kvm) +{ + kvm_call_hyp(__kvm_tlb_flush_vmid, kvm); +} + static inline int kvm_arch_dev_ioctl_check_extension(long ext) { return 0; diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 466bd299b1a8..f27f3360e52b 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -21,6 +21,7 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_CPU_RELAX_INTERCEPT + select HAVE_KVM_ARCH_TLB_FLUSH_ALL select KVM_MMIO select KVM_ARM_HOST depends on ARM_VIRT_EXT && ARM_LPAE diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 01dcb0e752d9..79caf79b304a 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -66,6 +66,17 @@ ENTRY(__kvm_tlb_flush_vmid_ipa) bx lr ENDPROC(__kvm_tlb_flush_vmid_ipa) +/** + * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs + * + * Reuses __kvm_tlb_flush_vmid_ipa() for ARMv7, without passing address + * parameter + */ + +ENTRY(__kvm_tlb_flush_vmid) + b __kvm_tlb_flush_vmid_ipa +ENDPROC(__kvm_tlb_flush_vmid) + /******************************************************************** * Flush TLBs and instruction caches of all CPUs inside the inner-shareable * domain, for all VMIDs From c64735554c0aad2dc83c737ecf4c32075bed57a4 Mon Sep 17 00:00:00 2001 From: Mario Smarduch Date: Thu, 15 Jan 2015 15:58:56 -0800 Subject: [PATCH 09/36] KVM: arm: Add initial dirty page locking support Add support for initial write protection of VM memslots. This patch series assumes that huge PUDs will not be used in 2nd stage tables, which is always valid on ARMv7 Acked-by: Christoffer Dall Signed-off-by: Mario Smarduch --- arch/arm/include/asm/kvm_host.h | 2 + arch/arm/include/asm/kvm_mmu.h | 21 ++++ arch/arm/include/asm/pgtable-3level.h | 1 + arch/arm/kvm/mmu.c | 135 ++++++++++++++++++++++++++ 4 files changed, 159 insertions(+) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index acfced36986b..38931d4f4ba3 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -246,6 +246,8 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic) int kvm_perf_init(void); int kvm_perf_teardown(void); +void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); + static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 63e0ecc04901..2672cf84afd1 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -114,6 +114,27 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) pmd_val(*pmd) |= L_PMD_S2_RDWR; } +static inline void kvm_set_s2pte_readonly(pte_t *pte) +{ + pte_val(*pte) = (pte_val(*pte) & ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY; +} + +static inline bool kvm_s2pte_readonly(pte_t *pte) +{ + return (pte_val(*pte) & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY; +} + +static inline void kvm_set_s2pmd_readonly(pmd_t *pmd) +{ + pmd_val(*pmd) = (pmd_val(*pmd) & ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY; +} + +static inline bool kvm_s2pmd_readonly(pmd_t *pmd) +{ + return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY; +} + + /* Open coded p*d_addr_end that can deal with 64bit addresses */ #define kvm_pgd_addr_end(addr, end) \ ({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index a31ecdad4b59..ae1d30a1aaae 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -130,6 +130,7 @@ #define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ #define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ +#define L_PMD_S2_RDONLY (_AT(pmdval_t, 1) << 6) /* HAP[1] */ #define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ /* diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 1dc9778a00af..c9e5f384832e 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -45,6 +45,7 @@ static phys_addr_t hyp_idmap_vector; #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) #define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) +#define kvm_pud_huge(_x) pud_huge(_x) static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { @@ -905,6 +906,131 @@ static bool kvm_is_device_pfn(unsigned long pfn) return !pfn_valid(pfn); } +#ifdef CONFIG_ARM +/** + * stage2_wp_ptes - write protect PMD range + * @pmd: pointer to pmd entry + * @addr: range start address + * @end: range end address + */ +static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) +{ + pte_t *pte; + + pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte)) { + if (!kvm_s2pte_readonly(pte)) + kvm_set_s2pte_readonly(pte); + } + } while (pte++, addr += PAGE_SIZE, addr != end); +} + +/** + * stage2_wp_pmds - write protect PUD range + * @pud: pointer to pud entry + * @addr: range start address + * @end: range end address + */ +static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end) +{ + pmd_t *pmd; + phys_addr_t next; + + pmd = pmd_offset(pud, addr); + + do { + next = kvm_pmd_addr_end(addr, end); + if (!pmd_none(*pmd)) { + if (kvm_pmd_huge(*pmd)) { + if (!kvm_s2pmd_readonly(pmd)) + kvm_set_s2pmd_readonly(pmd); + } else { + stage2_wp_ptes(pmd, addr, next); + } + } + } while (pmd++, addr = next, addr != end); +} + +/** + * stage2_wp_puds - write protect PGD range + * @pgd: pointer to pgd entry + * @addr: range start address + * @end: range end address + * + * Process PUD entries, for a huge PUD we cause a panic. + */ +static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end) +{ + pud_t *pud; + phys_addr_t next; + + pud = pud_offset(pgd, addr); + do { + next = kvm_pud_addr_end(addr, end); + if (!pud_none(*pud)) { + /* TODO:PUD not supported, revisit later if supported */ + BUG_ON(kvm_pud_huge(*pud)); + stage2_wp_pmds(pud, addr, next); + } + } while (pud++, addr = next, addr != end); +} + +/** + * stage2_wp_range() - write protect stage2 memory region range + * @kvm: The KVM pointer + * @addr: Start address of range + * @end: End address of range + */ +static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) +{ + pgd_t *pgd; + phys_addr_t next; + + pgd = kvm->arch.pgd + pgd_index(addr); + do { + /* + * Release kvm_mmu_lock periodically if the memory region is + * large. Otherwise, we may see kernel panics with + * CONFIG_DETECT_HUNG_TASK, CONFIG_LOCK_DETECTOR, + * CONFIG_LOCK_DEP. Additionally, holding the lock too long + * will also starve other vCPUs. + */ + if (need_resched() || spin_needbreak(&kvm->mmu_lock)) + cond_resched_lock(&kvm->mmu_lock); + + next = kvm_pgd_addr_end(addr, end); + if (pgd_present(*pgd)) + stage2_wp_puds(pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + +/** + * kvm_mmu_wp_memory_region() - write protect stage 2 entries for memory slot + * @kvm: The KVM pointer + * @slot: The memory slot to write protect + * + * Called to start logging dirty pages after memory region + * KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns + * all present PMD and PTEs are write protected in the memory region. + * Afterwards read of dirty page log can be called. + * + * Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired, + * serializing operations for VM memory regions. + */ +void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) +{ + struct kvm_memory_slot *memslot = id_to_memslot(kvm->memslots, slot); + phys_addr_t start = memslot->base_gfn << PAGE_SHIFT; + phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; + + spin_lock(&kvm->mmu_lock); + stage2_wp_range(kvm, start, end); + spin_unlock(&kvm->mmu_lock); + kvm_flush_remote_tlbs(kvm); +} +#endif + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) @@ -1292,6 +1418,15 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old, enum kvm_mr_change change) { +#ifdef CONFIG_ARM + /* + * At this point memslot has been committed and there is an + * allocated dirty_bitmap[], dirty pages will be be tracked while the + * memory slot is write protected. + */ + if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) + kvm_mmu_wp_memory_region(kvm, mem->slot); +#endif } int kvm_arch_prepare_memory_region(struct kvm *kvm, From 53c810c364d7a2d5cebd5a0e4c882643af3b0a0e Mon Sep 17 00:00:00 2001 From: Mario Smarduch Date: Thu, 15 Jan 2015 15:58:57 -0800 Subject: [PATCH 10/36] KVM: arm: dirty logging write protect support Add support to track dirty pages between user space KVM_GET_DIRTY_LOG ioctl calls. We call kvm_get_dirty_log_protect() function to do most of the work. Reviewed-by: Christoffer Dall Reviewed-by: Marc Zyngier Signed-off-by: Mario Smarduch --- arch/arm/kvm/Kconfig | 1 + arch/arm/kvm/arm.c | 34 ++++++++++++++++++++++++++++++++++ arch/arm/kvm/mmu.c | 22 ++++++++++++++++++++++ 3 files changed, 57 insertions(+) diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index f27f3360e52b..a8d1ace3ea51 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -24,6 +24,7 @@ config KVM select HAVE_KVM_ARCH_TLB_FLUSH_ALL select KVM_MMIO select KVM_ARM_HOST + select KVM_GENERIC_DIRTYLOG_READ_PROTECT depends on ARM_VIRT_EXT && ARM_LPAE ---help--- Support hosting virtualized guest machines. You will also diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 2d6d91001062..1434410e7f46 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -787,9 +787,43 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } } +/** + * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot + * @kvm: kvm instance + * @log: slot id and address to which we copy the log + * + * Steps 1-4 below provide general overview of dirty page logging. See + * kvm_get_dirty_log_protect() function description for additional details. + * + * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we + * always flush the TLB (step 4) even if previous step failed and the dirty + * bitmap may be corrupt. Regardless of previous outcome the KVM logging API + * does not preclude user space subsequent dirty log read. Flushing TLB ensures + * writes will be marked dirty for next log read. + * + * 1. Take a snapshot of the bit and clear it if needed. + * 2. Write protect the corresponding page. + * 3. Copy the snapshot to the userspace. + * 4. Flush TLB's if needed. + */ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { +#ifdef CONFIG_ARM + bool is_dirty = false; + int r; + + mutex_lock(&kvm->slots_lock); + + r = kvm_get_dirty_log_protect(kvm, log, &is_dirty); + + if (is_dirty) + kvm_flush_remote_tlbs(kvm); + + mutex_unlock(&kvm->slots_lock); + return r; +#else /* arm64 */ return -EINVAL; +#endif } static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index c9e5f384832e..565e903f6f78 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1029,6 +1029,28 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) spin_unlock(&kvm->mmu_lock); kvm_flush_remote_tlbs(kvm); } + +/** + * kvm_arch_mmu_write_protect_pt_masked() - write protect dirty pages + * @kvm: The KVM pointer + * @slot: The memory slot associated with mask + * @gfn_offset: The gfn offset in memory slot + * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory + * slot to be write protected + * + * Walks bits set in mask write protects the associated pte's. Caller must + * acquire kvm_mmu_lock. + */ +void kvm_arch_mmu_write_protect_pt_masked(struct kvm *kvm, + struct kvm_memory_slot *slot, + gfn_t gfn_offset, unsigned long mask) +{ + phys_addr_t base_gfn = slot->base_gfn + gfn_offset; + phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT; + phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT; + + stage2_wp_range(kvm, start, end); +} #endif static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, From 15a49a44fc36209e1112e9b8451d653cd07f17a8 Mon Sep 17 00:00:00 2001 From: Mario Smarduch Date: Thu, 15 Jan 2015 15:58:58 -0800 Subject: [PATCH 11/36] KVM: arm: page logging 2nd stage fault handling This patch adds support for 2nd stage page fault handling while dirty page logging. On huge page faults, huge pages are dissolved to normal pages, and rebuilding of 2nd stage huge pages is blocked. In case migration is canceled this restriction is removed and huge pages may be rebuilt again. Signed-off-by: Mario Smarduch Reviewed-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 97 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 88 insertions(+), 9 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 565e903f6f78..6685c68ef4ba 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -47,6 +47,18 @@ static phys_addr_t hyp_idmap_vector; #define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) #define kvm_pud_huge(_x) pud_huge(_x) +#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0) +#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1) + +static bool memslot_is_logging(struct kvm_memory_slot *memslot) +{ +#ifdef CONFIG_ARM + return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY); +#else + return false; +#endif +} + static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { /* @@ -59,6 +71,25 @@ static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); } +/** + * stage2_dissolve_pmd() - clear and flush huge PMD entry + * @kvm: pointer to kvm structure. + * @addr: IPA + * @pmd: pmd pointer for IPA + * + * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. Marks all + * pages in the range dirty. + */ +static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd) +{ + if (!kvm_pmd_huge(*pmd)) + return; + + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + put_page(virt_to_page(pmd)); +} + static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, int min, int max) { @@ -768,10 +799,15 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache } static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, - phys_addr_t addr, const pte_t *new_pte, bool iomap) + phys_addr_t addr, const pte_t *new_pte, + unsigned long flags) { pmd_t *pmd; pte_t *pte, old_pte; + bool iomap = flags & KVM_S2PTE_FLAG_IS_IOMAP; + bool logging_active = flags & KVM_S2_FLAG_LOGGING_ACTIVE; + + VM_BUG_ON(logging_active && !cache); /* Create stage-2 page table mapping - Levels 0 and 1 */ pmd = stage2_get_pmd(kvm, cache, addr); @@ -783,6 +819,13 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, return 0; } + /* + * While dirty page logging - dissolve huge PMD, then continue on to + * allocate page. + */ + if (logging_active) + stage2_dissolve_pmd(kvm, addr, pmd); + /* Create stage-2 page mappings - Level 2 */ if (pmd_none(*pmd)) { if (!cache) @@ -839,7 +882,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, if (ret) goto out; spin_lock(&kvm->mmu_lock); - ret = stage2_set_pte(kvm, &cache, addr, &pte, true); + ret = stage2_set_pte(kvm, &cache, addr, &pte, + KVM_S2PTE_FLAG_IS_IOMAP); spin_unlock(&kvm->mmu_lock); if (ret) goto out; @@ -1067,6 +1111,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, pfn_t pfn; pgprot_t mem_type = PAGE_S2; bool fault_ipa_uncached; + bool logging_active = memslot_is_logging(memslot); + unsigned long flags = 0; write_fault = kvm_is_write_fault(vcpu); if (fault_status == FSC_PERM && !write_fault) { @@ -1083,7 +1129,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } - if (is_vm_hugetlb_page(vma)) { + if (is_vm_hugetlb_page(vma) && !logging_active) { hugetlb = true; gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; } else { @@ -1124,12 +1170,30 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (is_error_pfn(pfn)) return -EFAULT; - if (kvm_is_device_pfn(pfn)) + if (kvm_is_device_pfn(pfn)) { mem_type = PAGE_S2_DEVICE; + flags |= KVM_S2PTE_FLAG_IS_IOMAP; + } else if (logging_active) { + /* + * Faults on pages in a memslot with logging enabled + * should not be mapped with huge pages (it introduces churn + * and performance degradation), so force a pte mapping. + */ + force_pte = true; + flags |= KVM_S2_FLAG_LOGGING_ACTIVE; + + /* + * Only actually map the page as writable if this was a write + * fault. + */ + if (!write_fault) + writable = false; + } spin_lock(&kvm->mmu_lock); if (mmu_notifier_retry(kvm, mmu_seq)) goto out_unlock; + if (!hugetlb && !force_pte) hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); @@ -1147,17 +1211,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); } else { pte_t new_pte = pfn_pte(pfn, mem_type); + if (writable) { kvm_set_s2pte_writable(&new_pte); kvm_set_pfn_dirty(pfn); + mark_page_dirty(kvm, gfn); } coherent_cache_guest_page(vcpu, hva, PAGE_SIZE, fault_ipa_uncached); - ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, - pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE)); + ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags); } - out_unlock: spin_unlock(&kvm->mmu_lock); kvm_release_pfn_clean(pfn); @@ -1307,7 +1371,14 @@ static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data) { pte_t *pte = (pte_t *)data; - stage2_set_pte(kvm, NULL, gpa, pte, false); + /* + * We can always call stage2_set_pte with KVM_S2PTE_FLAG_LOGGING_ACTIVE + * flag clear because MMU notifiers will have unmapped a huge PMD before + * calling ->change_pte() (which in turn calls kvm_set_spte_hva()) and + * therefore stage2_set_pte() never needs to clear out a huge PMD + * through this calling path. + */ + stage2_set_pte(kvm, NULL, gpa, pte, 0); } @@ -1461,7 +1532,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, bool writable = !(mem->flags & KVM_MEM_READONLY); int ret = 0; - if (change != KVM_MR_CREATE && change != KVM_MR_MOVE) + if (change != KVM_MR_CREATE && change != KVM_MR_MOVE && + change != KVM_MR_FLAGS_ONLY) return 0; /* @@ -1512,6 +1584,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) + vm_start - vma->vm_start; + /* IO region dirty page logging not allowed */ + if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) + return -EINVAL; + ret = kvm_phys_addr_ioremap(kvm, gpa, pa, vm_end - vm_start, writable); @@ -1521,6 +1597,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, hva = vm_end; } while (hva < reg_end); + if (change == KVM_MR_FLAGS_ONLY) + return ret; + spin_lock(&kvm->mmu_lock); if (ret) unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size); From 8199ed0e7c28ece79674a9fbba3208e93395a646 Mon Sep 17 00:00:00 2001 From: Mario Smarduch Date: Thu, 15 Jan 2015 15:58:59 -0800 Subject: [PATCH 12/36] KVM: arm64: ARMv8 header changes for page logging This patch adds arm64 helpers to write protect pmds/ptes and retrieve permissions while logging dirty pages. Also adds prototype to write protect a memory slot and adds a pmd define to check for read-only pmds. Reviewed-by: Christoffer Dall Signed-off-by: Mario Smarduch --- arch/arm64/include/asm/kvm_asm.h | 1 + arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/include/asm/kvm_mmu.h | 21 +++++++++++++++++++++ arch/arm64/include/asm/pgtable-hwdef.h | 1 + 4 files changed, 24 insertions(+) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 483842180f8f..4f7310fa77f0 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -126,6 +126,7 @@ extern char __kvm_hyp_vector[]; extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); +extern void __kvm_tlb_flush_vmid(struct kvm *kvm); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0b7dfdb931df..012af6ce9eed 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -199,6 +199,7 @@ struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); u64 kvm_call_hyp(void *hypfn, ...); void force_vm_exit(const cpumask_t *mask); +void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 14a74f136272..66577581ce68 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -118,6 +118,27 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) pmd_val(*pmd) |= PMD_S2_RDWR; } +static inline void kvm_set_s2pte_readonly(pte_t *pte) +{ + pte_val(*pte) = (pte_val(*pte) & ~PTE_S2_RDWR) | PTE_S2_RDONLY; +} + +static inline bool kvm_s2pte_readonly(pte_t *pte) +{ + return (pte_val(*pte) & PTE_S2_RDWR) == PTE_S2_RDONLY; +} + +static inline void kvm_set_s2pmd_readonly(pmd_t *pmd) +{ + pmd_val(*pmd) = (pmd_val(*pmd) & ~PMD_S2_RDWR) | PMD_S2_RDONLY; +} + +static inline bool kvm_s2pmd_readonly(pmd_t *pmd) +{ + return (pmd_val(*pmd) & PMD_S2_RDWR) == PMD_S2_RDONLY; +} + + #define kvm_pgd_addr_end(addr, end) pgd_addr_end(addr, end) #define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end) #define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 88174e0bfafe..5f930cc9ea83 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -119,6 +119,7 @@ #define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */ #define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ +#define PMD_S2_RDONLY (_AT(pmdval_t, 1) << 6) /* HAP[2:1] */ #define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ /* From 9836c6b9fff8cbc43e8bb0098e901fe84ceb0810 Mon Sep 17 00:00:00 2001 From: Mario Smarduch Date: Thu, 15 Jan 2015 15:59:00 -0800 Subject: [PATCH 13/36] KVM: arm64: Add HYP interface to flush VM Stage 1/2 TLB entries This patch adds support for arm64 hyp interface to flush all TLBs associated with VMID. Reviewed-by: Christoffer Dall Signed-off-by: Mario Smarduch --- arch/arm64/kvm/hyp.S | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index fbe909fb0a1a..d9c43447eb04 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -1030,6 +1030,28 @@ ENTRY(__kvm_tlb_flush_vmid_ipa) ret ENDPROC(__kvm_tlb_flush_vmid_ipa) +/** + * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs + * @struct kvm *kvm - pointer to kvm structure + * + * Invalidates all Stage 1 and 2 TLB entries for current VMID. + */ +ENTRY(__kvm_tlb_flush_vmid) + dsb ishst + + kern_hyp_va x0 + ldr x2, [x0, #KVM_VTTBR] + msr vttbr_el2, x2 + isb + + tlbi vmalls12e1is + dsb ish + isb + + msr vttbr_el2, xzr + ret +ENDPROC(__kvm_tlb_flush_vmid) + ENTRY(__kvm_flush_vm_context) dsb ishst tlbi alle1is From 7276030a082c9c33150e5900a80e26c6e3189b16 Mon Sep 17 00:00:00 2001 From: Mario Smarduch Date: Thu, 15 Jan 2015 15:59:01 -0800 Subject: [PATCH 14/36] KVM: arm/arm64: Enable Dirty Page logging for ARMv8 This patch enables ARMv8 ditry page logging support. Plugs ARMv8 into generic layer through Kconfig symbol, and drops earlier ARM64 constraints to enable logging at architecture layer. Reviewed-by: Christoffer Dall Signed-off-by: Mario Smarduch --- arch/arm/include/asm/kvm_host.h | 12 ------------ arch/arm/kvm/arm.c | 4 ---- arch/arm/kvm/mmu.c | 19 +++++++++++-------- arch/arm64/kvm/Kconfig | 2 ++ 4 files changed, 13 insertions(+), 24 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 38931d4f4ba3..7d07eb85c0b0 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -221,18 +221,6 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr); } -/** - * kvm_flush_remote_tlbs() - flush all VM TLB entries - * @kvm: pointer to kvm structure. - * - * Interface to HYP function to flush all VM TLB entries without address - * parameter. - */ -static inline void kvm_flush_remote_tlbs(struct kvm *kvm) -{ - kvm_call_hyp(__kvm_tlb_flush_vmid, kvm); -} - static inline int kvm_arch_dev_ioctl_check_extension(long ext) { return 0; diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 1434410e7f46..74603a09ee76 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -808,7 +808,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, */ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { -#ifdef CONFIG_ARM bool is_dirty = false; int r; @@ -821,9 +820,6 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) mutex_unlock(&kvm->slots_lock); return r; -#else /* arm64 */ - return -EINVAL; -#endif } static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 6685c68ef4ba..eb945973b3be 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -52,11 +52,18 @@ static phys_addr_t hyp_idmap_vector; static bool memslot_is_logging(struct kvm_memory_slot *memslot) { -#ifdef CONFIG_ARM return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY); -#else - return false; -#endif +} + +/** + * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8 + * @kvm: pointer to kvm structure. + * + * Interface to HYP function to flush all VM TLB entries + */ +void kvm_flush_remote_tlbs(struct kvm *kvm) +{ + kvm_call_hyp(__kvm_tlb_flush_vmid, kvm); } static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) @@ -950,7 +957,6 @@ static bool kvm_is_device_pfn(unsigned long pfn) return !pfn_valid(pfn); } -#ifdef CONFIG_ARM /** * stage2_wp_ptes - write protect PMD range * @pmd: pointer to pmd entry @@ -1095,7 +1101,6 @@ void kvm_arch_mmu_write_protect_pt_masked(struct kvm *kvm, stage2_wp_range(kvm, start, end); } -#endif static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, @@ -1511,7 +1516,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old, enum kvm_mr_change change) { -#ifdef CONFIG_ARM /* * At this point memslot has been committed and there is an * allocated dirty_bitmap[], dirty pages will be be tracked while the @@ -1519,7 +1523,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, */ if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) kvm_mmu_wp_memory_region(kvm, mem->slot); -#endif } int kvm_arch_prepare_memory_region(struct kvm *kvm, diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 8ba85e9ea388..3ce389b3c21c 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -22,10 +22,12 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_CPU_RELAX_INTERCEPT + select HAVE_KVM_ARCH_TLB_FLUSH_ALL select KVM_MMIO select KVM_ARM_HOST select KVM_ARM_VGIC select KVM_ARM_TIMER + select KVM_GENERIC_DIRTYLOG_READ_PROTECT ---help--- Support hosting virtualized guest machines. From 4429fc64b90368e9bc93f933ea8b011d8db3a2f2 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 2 Jun 2014 15:37:13 +0200 Subject: [PATCH 15/36] arm/arm64: KVM: rework MPIDR assignment and add accessors The virtual MPIDR registers (containing topology information) for the guest are currently mapped linearily to the vcpu_id. Improve this mapping for arm64 by using three levels to not artificially limit the number of vCPUs. To help this, change and rename the kvm_vcpu_get_mpidr() function to mask off the non-affinity bits in the MPIDR register. Also add an accessor to later allow easier access to a vCPU with a given MPIDR. Use this new accessor in the PSCI emulation. Signed-off-by: Andre Przywara Reviewed-by: Christoffer Dall Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_emulate.h | 5 +++-- arch/arm/include/asm/kvm_host.h | 2 ++ arch/arm/kvm/arm.c | 13 +++++++++++++ arch/arm/kvm/psci.c | 17 +++++------------ arch/arm64/include/asm/kvm_emulate.h | 5 +++-- arch/arm64/include/asm/kvm_host.h | 2 ++ arch/arm64/kvm/sys_regs.c | 13 +++++++++++-- 7 files changed, 39 insertions(+), 18 deletions(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 66ce17655bb9..c52861577567 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -23,6 +23,7 @@ #include #include #include +#include unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num); unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu); @@ -167,9 +168,9 @@ static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK; } -static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) +static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) { - return vcpu->arch.cp15[c0_MPIDR]; + return vcpu->arch.cp15[c0_MPIDR] & MPIDR_HWID_BITMASK; } static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 7d07eb85c0b0..2fa51740edc2 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -236,6 +236,8 @@ int kvm_perf_teardown(void); void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); +struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); + static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 74603a09ee76..a7b94ecba0d8 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -1075,6 +1075,19 @@ static void check_kvm_target_cpu(void *ret) *(int *)ret = kvm_target_cpu(); } +struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr) +{ + struct kvm_vcpu *vcpu; + int i; + + mpidr &= MPIDR_HWID_BITMASK; + kvm_for_each_vcpu(i, vcpu, kvm) { + if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu)) + return vcpu; + } + return NULL; +} + /** * Initialize Hyp-mode and memory mappings on all CPUs. */ diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 58cb3248d277..02fa8eff6ae1 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -22,6 +22,7 @@ #include #include #include +#include /* * This is an implementation of the Power State Coordination Interface @@ -66,25 +67,17 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) { struct kvm *kvm = source_vcpu->kvm; - struct kvm_vcpu *vcpu = NULL, *tmp; + struct kvm_vcpu *vcpu = NULL; wait_queue_head_t *wq; unsigned long cpu_id; unsigned long context_id; - unsigned long mpidr; phys_addr_t target_pc; - int i; - cpu_id = *vcpu_reg(source_vcpu, 1); + cpu_id = *vcpu_reg(source_vcpu, 1) & MPIDR_HWID_BITMASK; if (vcpu_mode_is_32bit(source_vcpu)) cpu_id &= ~((u32) 0); - kvm_for_each_vcpu(i, tmp, kvm) { - mpidr = kvm_vcpu_get_mpidr(tmp); - if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & MPIDR_HWID_BITMASK)) { - vcpu = tmp; - break; - } - } + vcpu = kvm_mpidr_to_vcpu(kvm, cpu_id); /* * Make sure the caller requested a valid CPU and that the CPU is @@ -155,7 +148,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) * then ON else OFF */ kvm_for_each_vcpu(i, tmp, kvm) { - mpidr = kvm_vcpu_get_mpidr(tmp); + mpidr = kvm_vcpu_get_mpidr_aff(tmp); if (((mpidr & target_affinity_mask) == target_affinity) && !tmp->arch.pause) { return PSCI_0_2_AFFINITY_LEVEL_ON; diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index a6fa2d2cd41c..b3f1defcb081 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -27,6 +27,7 @@ #include #include #include +#include unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu); @@ -192,9 +193,9 @@ static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE; } -static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) +static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) { - return vcpu_sys_reg(vcpu, MPIDR_EL1); + return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; } static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 012af6ce9eed..ff8ee3ec32f4 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -207,6 +207,8 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int kvm_perf_init(void); int kvm_perf_teardown(void); +struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); + static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 3d7c2df89946..136e6797676b 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -252,10 +252,19 @@ static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { + u64 mpidr; + /* - * Simply map the vcpu_id into the Aff0 field of the MPIDR. + * Map the vcpu_id into the first three affinity level fields of + * the MPIDR. We limit the number of VCPUs in level 0 due to a + * limitation to 16 CPUs in that level in the ICC_SGIxR registers + * of the GICv3 to be able to address each CPU directly when + * sending IPIs. */ - vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff); + mpidr = (vcpu->vcpu_id & 0x0f) << MPIDR_LEVEL_SHIFT(0); + mpidr |= ((vcpu->vcpu_id >> 4) & 0xff) << MPIDR_LEVEL_SHIFT(1); + mpidr |= ((vcpu->vcpu_id >> 12) & 0xff) << MPIDR_LEVEL_SHIFT(2); + vcpu_sys_reg(vcpu, MPIDR_EL1) = (1ULL << 31) | mpidr; } /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ From 59892136c40d6735e627dcea6a056380dfd6721d Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 3 Jun 2014 09:33:10 +0200 Subject: [PATCH 16/36] arm/arm64: KVM: pass down user space provided GIC type into vGIC code With the introduction of a second emulated GIC model we need to let userspace specify the GIC model to use for each VM. Pass the userspace provided value down into the vGIC code and store it there to differentiate later. Signed-off-by: Andre Przywara Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 2 +- include/kvm/arm_vgic.h | 7 +++++-- virt/kvm/arm/vgic.c | 15 +++++++++++++-- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index a7b94ecba0d8..3a51ffca75e3 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -851,7 +851,7 @@ long kvm_arch_vm_ioctl(struct file *filp, switch (ioctl) { case KVM_CREATE_IRQCHIP: { if (vgic_present) - return kvm_vgic_create(kvm); + return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); else return -ENXIO; } diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index ac4888dc86bc..525ce4228495 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -140,6 +140,9 @@ struct vgic_dist { bool in_kernel; bool ready; + /* vGIC model the kernel emulates for the guest (GICv2 or GICv3) */ + u32 vgic_model; + int nr_cpus; int nr_irqs; @@ -275,7 +278,7 @@ struct kvm_exit_mmio; int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); int kvm_vgic_hyp_init(void); int kvm_vgic_map_resources(struct kvm *kvm); -int kvm_vgic_create(struct kvm *kvm); +int kvm_vgic_create(struct kvm *kvm, u32 type); void kvm_vgic_destroy(struct kvm *kvm); void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); @@ -327,7 +330,7 @@ static inline int kvm_vgic_map_resources(struct kvm *kvm) return 0; } -static inline int kvm_vgic_create(struct kvm *kvm) +static inline int kvm_vgic_create(struct kvm *kvm, u32 type) { return 0; } diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index c84f53dfcd62..283038e98d53 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1698,6 +1698,16 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, int vcpu_id; if (unlikely(!vgic_initialized(kvm))) { + /* + * We only provide the automatic initialization of the VGIC + * for the legacy case of a GICv2. Any other type must + * be explicitly initialized once setup with the respective + * KVM device call. + */ + if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) { + ret = -EBUSY; + goto out; + } mutex_lock(&kvm->lock); ret = vgic_init(kvm); mutex_unlock(&kvm->lock); @@ -1935,7 +1945,7 @@ out: return ret; } -int kvm_vgic_create(struct kvm *kvm) +int kvm_vgic_create(struct kvm *kvm, u32 type) { int i, vcpu_lock_idx = -1, ret; struct kvm_vcpu *vcpu; @@ -1967,6 +1977,7 @@ int kvm_vgic_create(struct kvm *kvm) spin_lock_init(&kvm->arch.vgic.lock); kvm->arch.vgic.in_kernel = true; + kvm->arch.vgic.vgic_model = type; kvm->arch.vgic.vctrl_base = vgic->vctrl_base; kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; @@ -2404,7 +2415,7 @@ static void vgic_destroy(struct kvm_device *dev) static int vgic_create(struct kvm_device *dev, u32 type) { - return kvm_vgic_create(dev->kvm); + return kvm_vgic_create(dev->kvm, type); } static struct kvm_device_ops kvm_arm_vgic_v2_ops = { From 96415257a1bdf5c84bdf4991cf65eaf2265812c9 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 2 Jun 2014 22:44:37 +0200 Subject: [PATCH 17/36] arm/arm64: KVM: refactor vgic_handle_mmio() function Currently we only need to deal with one MMIO region for the GIC emulation (the GICv2 distributor), but we soon need to extend this. Refactor the existing code to allow easier addition of different ranges without code duplication. Signed-off-by: Andre Przywara Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 73 ++++++++++++++++++++++++++++++++------------- 1 file changed, 53 insertions(+), 20 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 283038e98d53..b5aa821b89a8 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1034,36 +1034,27 @@ static bool vgic_validate_access(const struct vgic_dist *dist, } /** - * vgic_handle_mmio - handle an in-kernel MMIO access + * vgic_handle_mmio_range - handle an in-kernel MMIO access * @vcpu: pointer to the vcpu performing the access * @run: pointer to the kvm_run structure * @mmio: pointer to the data describing the access + * @ranges: array of MMIO ranges in a given region + * @mmio_base: base address of that region * - * returns true if the MMIO access has been performed in kernel space, - * and false if it needs to be emulated in user space. + * returns true if the MMIO access could be performed */ -bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) +static bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_exit_mmio *mmio, + const struct mmio_range *ranges, + unsigned long mmio_base) { const struct mmio_range *range; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - unsigned long base = dist->vgic_dist_base; bool updated_state; unsigned long offset; - if (!irqchip_in_kernel(vcpu->kvm) || - mmio->phys_addr < base || - (mmio->phys_addr + mmio->len) > (base + KVM_VGIC_V2_DIST_SIZE)) - return false; - - /* We don't support ldrd / strd or ldm / stm to the emulated vgic */ - if (mmio->len > 4) { - kvm_inject_dabt(vcpu, mmio->phys_addr); - return true; - } - - offset = mmio->phys_addr - base; - range = find_matching_range(vgic_dist_ranges, mmio, offset); + offset = mmio->phys_addr - mmio_base; + range = find_matching_range(ranges, mmio, offset); if (unlikely(!range || !range->handle_mmio)) { pr_warn("Unhandled access %d %08llx %d\n", mmio->is_write, mmio->phys_addr, mmio->len); @@ -1071,7 +1062,7 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, } spin_lock(&vcpu->kvm->arch.vgic.lock); - offset = mmio->phys_addr - range->base - base; + offset -= range->base; if (vgic_validate_access(dist, range, offset)) { updated_state = range->handle_mmio(vcpu, mmio, offset); } else { @@ -1089,6 +1080,48 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, return true; } +static inline bool is_in_range(phys_addr_t addr, unsigned long len, + phys_addr_t baseaddr, unsigned long size) +{ + return (addr >= baseaddr) && (addr + len <= baseaddr + size); +} + +static bool vgic_v2_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_exit_mmio *mmio) +{ + unsigned long base = vcpu->kvm->arch.vgic.vgic_dist_base; + + if (!is_in_range(mmio->phys_addr, mmio->len, base, + KVM_VGIC_V2_DIST_SIZE)) + return false; + + /* GICv2 does not support accesses wider than 32 bits */ + if (mmio->len > 4) { + kvm_inject_dabt(vcpu, mmio->phys_addr); + return true; + } + + return vgic_handle_mmio_range(vcpu, run, mmio, vgic_dist_ranges, base); +} + +/** + * vgic_handle_mmio - handle an in-kernel MMIO access for the GIC emulation + * @vcpu: pointer to the vcpu performing the access + * @run: pointer to the kvm_run structure + * @mmio: pointer to the data describing the access + * + * returns true if the MMIO access has been performed in kernel space, + * and false if it needs to be emulated in user space. + */ +bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_exit_mmio *mmio) +{ + if (!irqchip_in_kernel(vcpu->kvm)) + return false; + + return vgic_v2_handle_mmio(vcpu, run, mmio); +} + static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi) { return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi; From 05bc8aafe664b3c7507736db305b3d3910ed2f05 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 5 Jun 2014 16:07:50 +0200 Subject: [PATCH 18/36] arm/arm64: KVM: wrap 64 bit MMIO accesses with two 32 bit ones Some GICv3 registers can and will be accessed as 64 bit registers. Currently the register handling code can only deal with 32 bit accesses, so we do two consecutive calls to cover this. Signed-off-by: Andre Przywara Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 53 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index b5aa821b89a8..a1fda798148e 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1033,6 +1033,53 @@ static bool vgic_validate_access(const struct vgic_dist *dist, return true; } +/* + * Call the respective handler function for the given range. + * We split up any 64 bit accesses into two consecutive 32 bit + * handler calls and merge the result afterwards. + * We do this in a little endian fashion regardless of the host's + * or guest's endianness, because the GIC is always LE and the rest of + * the code (vgic_reg_access) also puts it in a LE fashion already. + * At this point we have already identified the handle function, so + * range points to that one entry and offset is relative to this. + */ +static bool call_range_handler(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + unsigned long offset, + const struct mmio_range *range) +{ + u32 *data32 = (void *)mmio->data; + struct kvm_exit_mmio mmio32; + bool ret; + + if (likely(mmio->len <= 4)) + return range->handle_mmio(vcpu, mmio, offset); + + /* + * Any access bigger than 4 bytes (that we currently handle in KVM) + * is actually 8 bytes long, caused by a 64-bit access + */ + + mmio32.len = 4; + mmio32.is_write = mmio->is_write; + + mmio32.phys_addr = mmio->phys_addr + 4; + if (mmio->is_write) + *(u32 *)mmio32.data = data32[1]; + ret = range->handle_mmio(vcpu, &mmio32, offset + 4); + if (!mmio->is_write) + data32[1] = *(u32 *)mmio32.data; + + mmio32.phys_addr = mmio->phys_addr; + if (mmio->is_write) + *(u32 *)mmio32.data = data32[0]; + ret |= range->handle_mmio(vcpu, &mmio32, offset); + if (!mmio->is_write) + data32[0] = *(u32 *)mmio32.data; + + return ret; +} + /** * vgic_handle_mmio_range - handle an in-kernel MMIO access * @vcpu: pointer to the vcpu performing the access @@ -1064,10 +1111,10 @@ static bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, spin_lock(&vcpu->kvm->arch.vgic.lock); offset -= range->base; if (vgic_validate_access(dist, range, offset)) { - updated_state = range->handle_mmio(vcpu, mmio, offset); + updated_state = call_range_handler(vcpu, mmio, offset, range); } else { - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); + if (!mmio->is_write) + memset(mmio->data, 0, mmio->len); updated_state = false; } spin_unlock(&vcpu->kvm->arch.vgic.lock); From b26e5fdac43c1b7c394502917e42e3b91f3aa1a3 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 2 Jun 2014 16:19:12 +0200 Subject: [PATCH 19/36] arm/arm64: KVM: introduce per-VM ops Currently we only have one virtual GIC model supported, so all guests use the same emulation code. With the addition of another model we end up with different guests using potentially different vGIC models, so we have to split up some functions to be per VM. Introduce a vgic_vm_ops struct to hold function pointers for those functions that are different and provide the necessary code to initialize them. Also split up the vgic_init() function to separate out VGIC model specific functionality into a separate function, which will later be different for a GICv3 model. Signed-off-by: Andre Przywara Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 11 ++++++ virt/kvm/arm/vgic.c | 86 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 87 insertions(+), 10 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 525ce4228495..dd243969bfc3 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -134,6 +134,15 @@ struct vgic_params { void __iomem *vctrl_base; }; +struct vgic_vm_ops { + bool (*handle_mmio)(struct kvm_vcpu *, struct kvm_run *, + struct kvm_exit_mmio *); + bool (*queue_sgi)(struct kvm_vcpu *, int irq); + void (*add_sgi_source)(struct kvm_vcpu *, int irq, int source); + int (*init_model)(struct kvm *); + int (*map_resources)(struct kvm *, const struct vgic_params *); +}; + struct vgic_dist { #ifdef CONFIG_KVM_ARM_VGIC spinlock_t lock; @@ -215,6 +224,8 @@ struct vgic_dist { /* Bitmap indicating which CPU has something pending */ unsigned long *irq_pending_on_cpu; + + struct vgic_vm_ops vm_ops; #endif }; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index a1fda798148e..9b63141a599d 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -106,6 +106,21 @@ static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); static const struct vgic_ops *vgic_ops; static const struct vgic_params *vgic; +static void add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source) +{ + vcpu->kvm->arch.vgic.vm_ops.add_sgi_source(vcpu, irq, source); +} + +static bool queue_sgi(struct kvm_vcpu *vcpu, int irq) +{ + return vcpu->kvm->arch.vgic.vm_ops.queue_sgi(vcpu, irq); +} + +int kvm_vgic_map_resources(struct kvm *kvm) +{ + return kvm->arch.vgic.vm_ops.map_resources(kvm, vgic); +} + /* * struct vgic_bitmap contains a bitmap made of unsigned longs, but * extracts u32s out of them. @@ -762,6 +777,13 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, return false; } +static void vgic_v2_add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + *vgic_get_sgi_sources(dist, vcpu->vcpu_id, irq) |= 1 << source; +} + /** * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs @@ -776,9 +798,7 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, */ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) { - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - int vcpu_id = vcpu->vcpu_id; int i; for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { @@ -805,7 +825,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) */ vgic_dist_irq_set_pending(vcpu, lr.irq); if (lr.irq < VGIC_NR_SGIS) - *vgic_get_sgi_sources(dist, vcpu_id, lr.irq) |= 1 << lr.source; + add_sgi_source(vcpu, lr.irq, lr.source); lr.state &= ~LR_STATE_PENDING; vgic_set_lr(vcpu, i, lr); @@ -1159,6 +1179,7 @@ static bool vgic_v2_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, * * returns true if the MMIO access has been performed in kernel space, * and false if it needs to be emulated in user space. + * Calls the actual handling routine for the selected VGIC model. */ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_exit_mmio *mmio) @@ -1166,7 +1187,12 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, if (!irqchip_in_kernel(vcpu->kvm)) return false; - return vgic_v2_handle_mmio(vcpu, run, mmio); + /* + * This will currently call either vgic_v2_handle_mmio() or + * vgic_v3_handle_mmio(), which in turn will call + * vgic_handle_mmio_range() defined above. + */ + return vcpu->kvm->arch.vgic.vm_ops.handle_mmio(vcpu, run, mmio); } static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi) @@ -1418,7 +1444,7 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) return true; } -static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) +static bool vgic_v2_queue_sgi(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; unsigned long sources; @@ -1493,7 +1519,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) /* SGIs */ for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) { - if (!vgic_queue_sgi(vcpu, i)) + if (!queue_sgi(vcpu, i)) overflow = 1; } @@ -1883,6 +1909,16 @@ void kvm_vgic_destroy(struct kvm *kvm) dist->nr_cpus = 0; } +static int vgic_v2_init_model(struct kvm *kvm) +{ + int i; + + for (i = VGIC_NR_PRIVATE_IRQS; i < kvm->arch.vgic.nr_irqs; i += 4) + vgic_set_target_reg(kvm, 0, i); + + return 0; +} + /* * Allocate and initialize the various data structures. Must be called * with kvm->lock held! @@ -1942,8 +1978,9 @@ static int vgic_init(struct kvm *kvm) if (ret) goto out; - for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4) - vgic_set_target_reg(kvm, 0, i); + ret = kvm->arch.vgic.vm_ops.init_model(kvm); + if (ret) + goto out; kvm_for_each_vcpu(vcpu_id, vcpu, kvm) { ret = vgic_vcpu_init_maps(vcpu, nr_irqs); @@ -1980,7 +2017,8 @@ out: * can't do this at creation time, because user space must first set the * virtual CPU interface address in the guest physical address space. */ -int kvm_vgic_map_resources(struct kvm *kvm) +static int vgic_v2_map_resources(struct kvm *kvm, + const struct vgic_params *params) { int ret = 0; @@ -2010,7 +2048,7 @@ int kvm_vgic_map_resources(struct kvm *kvm) } ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, - vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE, + params->vcpu_base, KVM_VGIC_V2_CPU_SIZE, true); if (ret) { kvm_err("Unable to remap VGIC CPU to VCPU\n"); @@ -2025,6 +2063,30 @@ out: return ret; } +static void vgic_v2_init_emulation(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + + dist->vm_ops.handle_mmio = vgic_v2_handle_mmio; + dist->vm_ops.queue_sgi = vgic_v2_queue_sgi; + dist->vm_ops.add_sgi_source = vgic_v2_add_sgi_source; + dist->vm_ops.init_model = vgic_v2_init_model; + dist->vm_ops.map_resources = vgic_v2_map_resources; +} + +static int init_vgic_model(struct kvm *kvm, int type) +{ + switch (type) { + case KVM_DEV_TYPE_ARM_VGIC_V2: + vgic_v2_init_emulation(kvm); + break; + default: + return -ENODEV; + } + + return 0; +} + int kvm_vgic_create(struct kvm *kvm, u32 type) { int i, vcpu_lock_idx = -1, ret; @@ -2055,6 +2117,10 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) } ret = 0; + ret = init_vgic_model(kvm, type); + if (ret) + goto out_unlock; + spin_lock_init(&kvm->arch.vgic.lock); kvm->arch.vgic.in_kernel = true; kvm->arch.vgic.vgic_model = type; From ea2f83a7de9d0abbd145e37177905aab57fdb835 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Sun, 26 Oct 2014 23:17:00 +0000 Subject: [PATCH 20/36] arm/arm64: KVM: move kvm_register_device_ops() into vGIC probing Currently we unconditionally register the GICv2 emulation device during the host's KVM initialization. Since with GICv3 support we may end up with only v2 or only v3 or both supported, we move the registration into the GIC probing function, where we will later know which combination is valid. Signed-off-by: Andre Przywara Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 1 + virt/kvm/arm/vgic-v2.c | 2 ++ virt/kvm/arm/vgic-v3.c | 1 + virt/kvm/arm/vgic.c | 5 ++--- 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3b934cc94cc8..25d7ce31a5d4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1051,6 +1051,7 @@ void kvm_unregister_device_ops(u32 type); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; +extern struct kvm_device_ops kvm_arm_vgic_v2_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index 2935405ad22f..e1cd3cb95903 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c @@ -229,6 +229,8 @@ int vgic_v2_probe(struct device_node *vgic_node, goto out_unmap; } + kvm_register_device_ops(&kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2); + vgic->vcpu_base = vcpu_res.start; kvm_info("%s@%llx IRQ%d\n", vgic_node->name, diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index 1c2c8eef0599..d14c75f4a33b 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c @@ -230,6 +230,7 @@ int vgic_v3_probe(struct device_node *vgic_node, ret = -ENXIO; goto out; } + kvm_register_device_ops(&kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2); vgic->vcpu_base = vcpu_res.start; vgic->vctrl_base = NULL; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 9b63141a599d..69f6e7aa573e 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -2564,7 +2564,7 @@ static int vgic_create(struct kvm_device *dev, u32 type) return kvm_vgic_create(dev->kvm, type); } -static struct kvm_device_ops kvm_arm_vgic_v2_ops = { +struct kvm_device_ops kvm_arm_vgic_v2_ops = { .name = "kvm-arm-vgic", .create = vgic_create, .destroy = vgic_destroy, @@ -2643,8 +2643,7 @@ int kvm_vgic_hyp_init(void) on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); - return kvm_register_device_ops(&kvm_arm_vgic_v2_ops, - KVM_DEV_TYPE_ARM_VGIC_V2); + return 0; out_free_irq: free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus()); From 4ce7ebdfc69d1d5d166eec103ed2976eb45a6173 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Sun, 26 Oct 2014 23:18:14 +0000 Subject: [PATCH 21/36] arm/arm64: KVM: dont rely on a valid GICH base address To check whether the vGIC was already initialized, we currently check the GICH base address for not being NULL. Since with GICv3 we may get along without this address, lets use the irqchip_in_kernel() function to detect an already initialized vGIC. Signed-off-by: Andre Przywara Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 69f6e7aa573e..1c3b75eb28f0 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -2094,7 +2094,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) mutex_lock(&kvm->lock); - if (kvm->arch.vgic.vctrl_base) { + if (irqchip_in_kernel(kvm)) { ret = -EEXIST; goto out; } From 3caa2d8c3b2d80f5e342fe8cec07c03c8147dcab Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 2 Jun 2014 16:26:01 +0200 Subject: [PATCH 22/36] arm/arm64: KVM: make the maximum number of vCPUs a per-VM value Currently the maximum number of vCPUs supported is a global value limited by the used GIC model. GICv3 will lift this limit, but we still need to observe it for guests using GICv2. So the maximum number of vCPUs is per-VM value, depending on the GIC model the guest uses. Store and check the value in struct kvm_arch, but keep it down to 8 for now. Signed-off-by: Andre Przywara Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 1 + arch/arm/kvm/arm.c | 8 ++++++++ arch/arm64/include/asm/kvm_host.h | 3 +++ include/kvm/arm_vgic.h | 8 ++++++++ virt/kvm/arm/vgic-v2.c | 1 + virt/kvm/arm/vgic-v3.c | 1 + virt/kvm/arm/vgic.c | 16 ++++++++++++++++ 7 files changed, 38 insertions(+) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 2fa51740edc2..bde494654bcc 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -68,6 +68,7 @@ struct kvm_arch { /* Interrupt controller */ struct vgic_dist vgic; + int max_vcpus; }; #define KVM_NR_MEM_OBJS 40 diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 3a51ffca75e3..6fbfa5fff05d 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -132,6 +132,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) /* Mark the initial VMID generation invalid */ kvm->arch.vmid_gen = 0; + /* The maximum number of VCPUs is limited by the host's GIC model */ + kvm->arch.max_vcpus = kvm_vgic_get_max_vcpus(); + return ret; out_free_stage2_pgd: kvm_free_stage2_pgd(kvm); @@ -218,6 +221,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) goto out; } + if (id >= kvm->arch.max_vcpus) { + err = -EINVAL; + goto out; + } + vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); if (!vcpu) { err = -ENOMEM; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index ff8ee3ec32f4..2c49aa4ac818 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -59,6 +59,9 @@ struct kvm_arch { /* VTTBR value associated with above pgd and vmid */ u64 vttbr; + /* The maximum number of vCPUs depends on the used GIC model */ + int max_vcpus; + /* Interrupt controller */ struct vgic_dist vgic; diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index dd243969bfc3..1c0e9dbabe6d 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -33,6 +33,7 @@ #define VGIC_V2_MAX_LRS (1 << 6) #define VGIC_V3_MAX_LRS 16 #define VGIC_MAX_IRQS 1024 +#define VGIC_V2_MAX_CPUS 8 /* Sanity checks... */ #if (KVM_MAX_VCPUS > 8) @@ -132,6 +133,7 @@ struct vgic_params { unsigned int maint_irq; /* Virtual control interface base address */ void __iomem *vctrl_base; + int max_gic_vcpus; }; struct vgic_vm_ops { @@ -289,6 +291,7 @@ struct kvm_exit_mmio; int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); int kvm_vgic_hyp_init(void); int kvm_vgic_map_resources(struct kvm *kvm); +int kvm_vgic_get_max_vcpus(void); int kvm_vgic_create(struct kvm *kvm, u32 type); void kvm_vgic_destroy(struct kvm *kvm); void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); @@ -393,6 +396,11 @@ static inline bool vgic_ready(struct kvm *kvm) { return true; } + +static inline int kvm_vgic_get_max_vcpus(void) +{ + return KVM_MAX_VCPUS; +} #endif #endif diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index e1cd3cb95903..e8b82b289844 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c @@ -237,6 +237,7 @@ int vgic_v2_probe(struct device_node *vgic_node, vctrl_res.start, vgic->maint_irq); vgic->type = VGIC_V2; + vgic->max_gic_vcpus = VGIC_V2_MAX_CPUS; *ops = &vgic_v2_ops; *params = vgic; goto out; diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index d14c75f4a33b..ea39bad4b004 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c @@ -235,6 +235,7 @@ int vgic_v3_probe(struct device_node *vgic_node, vgic->vcpu_base = vcpu_res.start; vgic->vctrl_base = NULL; vgic->type = VGIC_V3; + vgic->max_gic_vcpus = KVM_MAX_VCPUS; kvm_info("%s@%llx IRQ%d\n", vgic_node->name, vcpu_res.start, vgic->maint_irq); diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 1c3b75eb28f0..2126bf5b0035 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1878,6 +1878,17 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) return 0; } +/** + * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW + * + * The host's GIC naturally limits the maximum amount of VCPUs a guest + * can use. + */ +int kvm_vgic_get_max_vcpus(void) +{ + return vgic->max_gic_vcpus; +} + void kvm_vgic_destroy(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; @@ -2072,6 +2083,8 @@ static void vgic_v2_init_emulation(struct kvm *kvm) dist->vm_ops.add_sgi_source = vgic_v2_add_sgi_source; dist->vm_ops.init_model = vgic_v2_init_model; dist->vm_ops.map_resources = vgic_v2_map_resources; + + kvm->arch.max_vcpus = VGIC_V2_MAX_CPUS; } static int init_vgic_model(struct kvm *kvm, int type) @@ -2084,6 +2097,9 @@ static int init_vgic_model(struct kvm *kvm, int type) return -ENODEV; } + if (atomic_read(&kvm->online_vcpus) > kvm->arch.max_vcpus) + return -E2BIG; + return 0; } From 2f5fa41a7a7f47f3109a6596b0ec96258dbf06e6 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 3 Jun 2014 08:58:15 +0200 Subject: [PATCH 23/36] arm/arm64: KVM: make the value of ICC_SRE_EL1 a per-VM variable ICC_SRE_EL1 is a system register allowing msr/mrs accesses to the GIC CPU interface for EL1 (guests). Currently we force it to 0, but for proper GICv3 support we have to allow guests to use it (depending on their selected virtual GIC model). So add ICC_SRE_EL1 to the list of saved/restored registers on a world switch, but actually disallow a guest to change it by only restoring a fixed, once-initialized value. This value depends on the GIC model userland has chosen for a guest. Signed-off-by: Andre Przywara Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kvm/vgic-v3-switch.S | 14 +++++++++----- include/kvm/arm_vgic.h | 1 + virt/kvm/arm/vgic-v3.c | 8 ++++++-- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 9a9fce090d58..9d34486985fd 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -140,6 +140,7 @@ int main(void) DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr)); DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); + DEFINE(VGIC_V3_CPU_SRE, offsetof(struct vgic_cpu, vgic_v3.vgic_sre)); DEFINE(VGIC_V3_CPU_HCR, offsetof(struct vgic_cpu, vgic_v3.vgic_hcr)); DEFINE(VGIC_V3_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr)); DEFINE(VGIC_V3_CPU_MISR, offsetof(struct vgic_cpu, vgic_v3.vgic_misr)); diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S index d16046999e06..617a012a0107 100644 --- a/arch/arm64/kvm/vgic-v3-switch.S +++ b/arch/arm64/kvm/vgic-v3-switch.S @@ -148,17 +148,18 @@ * x0: Register pointing to VCPU struct */ .macro restore_vgic_v3_state - // Disable SRE_EL1 access. Necessary, otherwise - // ICH_VMCR_EL2.VFIQEn becomes one, and FIQ happens... - msr_s ICC_SRE_EL1, xzr - isb - // Compute the address of struct vgic_cpu add x3, x0, #VCPU_VGIC_CPU // Restore all interesting registers ldr w4, [x3, #VGIC_V3_CPU_HCR] ldr w5, [x3, #VGIC_V3_CPU_VMCR] + ldr w25, [x3, #VGIC_V3_CPU_SRE] + + msr_s ICC_SRE_EL1, x25 + + // make sure SRE is valid before writing the other registers + isb msr_s ICH_HCR_EL2, x4 msr_s ICH_VMCR_EL2, x5 @@ -244,9 +245,12 @@ dsb sy // Prevent the guest from touching the GIC system registers + // if SRE isn't enabled for GICv3 emulation + cbnz x25, 1f mrs_s x5, ICC_SRE_EL2 and x5, x5, #~ICC_SRE_EL2_ENABLE msr_s ICC_SRE_EL2, x5 +1: .endm ENTRY(__save_vgic_v3_state) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 1c0e9dbabe6d..ff04afd0d901 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -245,6 +245,7 @@ struct vgic_v3_cpu_if { #ifdef CONFIG_ARM_GIC_V3 u32 vgic_hcr; u32 vgic_vmcr; + u32 vgic_sre; /* Restored only, change ignored */ u32 vgic_misr; /* Saved only */ u32 vgic_eisr; /* Saved only */ u32 vgic_elrsr; /* Saved only */ diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index ea39bad4b004..52490480b6f9 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c @@ -145,15 +145,19 @@ static void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) static void vgic_v3_enable(struct kvm_vcpu *vcpu) { + struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3; + /* * By forcing VMCR to zero, the GIC will restore the binary * points to their reset values. Anything else resets to zero * anyway. */ - vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = 0; + vgic_v3->vgic_vmcr = 0; + + vgic_v3->vgic_sre = 0; /* Get the show on the road... */ - vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr = ICH_HCR_EN; + vgic_v3->vgic_hcr = ICH_HCR_EN; } static const struct vgic_ops vgic_v3_ops = { From d97f683d0f4b2e63e68869f81ba2ce4ccbb6e5d8 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Wed, 11 Jun 2014 14:11:49 +0200 Subject: [PATCH 24/36] arm/arm64: KVM: refactor MMIO accessors The MMIO accessors for GICD_I[CS]ENABLER, GICD_I[CS]PENDR and GICD_ICFGR behave very similar for GICv2 and GICv3, although the way the affected VCPU is determined differs. Since we need them to access the registers from three different places in the future, we factor out a generic, backend-facing implementation and use small wrappers in the current GICv2 emulation. This will ease adding GICv3 accessors later. Signed-off-by: Andre Przywara Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 126 ++++++++++++++++++++++++++------------------ 1 file changed, 74 insertions(+), 52 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 2126bf5b0035..7589e2c82db2 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -492,64 +492,66 @@ static bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, return false; } -static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) +static bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id, int access) { - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled, - vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); + u32 *reg; + int mode = ACCESS_READ_VALUE | access; + struct kvm_vcpu *target_vcpu = kvm_get_vcpu(kvm, vcpu_id); + + reg = vgic_bitmap_get_reg(&kvm->arch.vgic.irq_enabled, vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, mode); if (mmio->is_write) { - vgic_update_state(vcpu->kvm); + if (access & ACCESS_WRITE_CLEARBIT) { + if (offset < 4) /* Force SGI enabled */ + *reg |= 0xffff; + vgic_retire_disabled_irqs(target_vcpu); + } + vgic_update_state(kvm); return true; } return false; } +static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, + vcpu->vcpu_id, ACCESS_WRITE_SETBIT); +} + static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled, - vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); - if (mmio->is_write) { - if (offset < 4) /* Force SGI enabled */ - *reg |= 0xffff; - vgic_retire_disabled_irqs(vcpu); - vgic_update_state(vcpu->kvm); - return true; - } - - return false; + return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, + vcpu->vcpu_id, ACCESS_WRITE_CLEARBIT); } -static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, +static bool vgic_handle_set_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, - phys_addr_t offset) + phys_addr_t offset, int vcpu_id) { u32 *reg, orig; u32 level_mask; - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + int mode = ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT; + struct vgic_dist *dist = &kvm->arch.vgic; - reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu->vcpu_id, offset); + reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu_id, offset); level_mask = (~(*reg)); /* Mark both level and edge triggered irqs as pending */ - reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset); + reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset); orig = *reg; - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); + vgic_reg_access(mmio, reg, offset, mode); if (mmio->is_write) { /* Set the soft-pending flag only for level-triggered irqs */ reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, - vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); + vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, mode); *reg &= level_mask; /* Ignore writes to SGIs */ @@ -558,31 +560,30 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, *reg |= orig & 0xffff; } - vgic_update_state(vcpu->kvm); + vgic_update_state(kvm); return true; } return false; } -static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, +static bool vgic_handle_clear_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, - phys_addr_t offset) + phys_addr_t offset, int vcpu_id) { u32 *level_active; u32 *reg, orig; - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + int mode = ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT; + struct vgic_dist *dist = &kvm->arch.vgic; - reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset); + reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset); orig = *reg; - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); + vgic_reg_access(mmio, reg, offset, mode); if (mmio->is_write) { /* Re-set level triggered level-active interrupts */ level_active = vgic_bitmap_get_reg(&dist->irq_level, - vcpu->vcpu_id, offset); - reg = vgic_bitmap_get_reg(&dist->irq_pending, - vcpu->vcpu_id, offset); + vcpu_id, offset); + reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset); *reg |= *level_active; /* Ignore writes to SGIs */ @@ -593,17 +594,31 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, /* Clear soft-pending flags */ reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, - vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); + vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, mode); - vgic_update_state(vcpu->kvm); + vgic_update_state(kvm); return true; } - return false; } +static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset, + vcpu->vcpu_id); +} + +static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset, + vcpu->vcpu_id); +} + static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) @@ -726,14 +741,10 @@ static u16 vgic_cfg_compress(u32 val) * LSB is always 0. As such, we only keep the upper bit, and use the * two above functions to compress/expand the bits */ -static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) +static bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, + phys_addr_t offset) { u32 val; - u32 *reg; - - reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, - vcpu->vcpu_id, offset >> 1); if (offset & 4) val = *reg >> 16; @@ -762,6 +773,17 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, return false; } +static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, phys_addr_t offset) +{ + u32 *reg; + + reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, + vcpu->vcpu_id, offset >> 1); + + return vgic_handle_cfg_reg(reg, mmio, offset); +} + static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { From b60da146c135ea6b6c25a0ae925edca038b64344 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 21 Aug 2014 11:08:27 +0100 Subject: [PATCH 25/36] arm/arm64: KVM: refactor/wrap vgic_set/get_attr() vgic_set_attr() and vgic_get_attr() contain both code specific for the emulated GIC as well as code for the userland facing, generic part of the GIC. Split the guest GIC facing code of from the generic part to allow easier splitting later. Signed-off-by: Andre Przywara Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 80 +++++++++++++++++++++++++++++++-------------- 1 file changed, 55 insertions(+), 25 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 7589e2c82db2..06073fac03a9 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -2440,7 +2440,8 @@ out: return ret; } -static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +static int vgic_set_common_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) { int r; @@ -2456,17 +2457,6 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) r = kvm_vgic_addr(dev->kvm, type, &addr, true); return (r == -ENODEV) ? -ENXIO : r; } - - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u32 reg; - - if (get_user(reg, uaddr)) - return -EFAULT; - - return vgic_attr_regs_access(dev, attr, ®, true); - } case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { u32 __user *uaddr = (u32 __user *)(long)attr->addr; u32 val; @@ -2510,7 +2500,33 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) return -ENXIO; } -static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + int ret; + + ret = vgic_set_common_attr(dev, attr); + if (ret != -ENXIO) + return ret; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 reg; + + if (get_user(reg, uaddr)) + return -EFAULT; + + return vgic_attr_regs_access(dev, attr, ®, true); + } + + } + + return -ENXIO; +} + +static int vgic_get_common_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) { int r = -ENXIO; @@ -2528,20 +2544,9 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) return -EFAULT; break; } - - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u32 reg = 0; - - r = vgic_attr_regs_access(dev, attr, ®, false); - if (r) - return r; - r = put_user(reg, uaddr); - break; - } case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { u32 __user *uaddr = (u32 __user *)(long)attr->addr; + r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr); break; } @@ -2551,6 +2556,31 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) return r; } +static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + int ret; + + ret = vgic_get_common_attr(dev, attr); + if (ret != -ENXIO) + return ret; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 reg = 0; + + ret = vgic_attr_regs_access(dev, attr, ®, false); + if (ret) + return ret; + return put_user(reg, uaddr); + } + + } + + return -ENXIO; +} + static int vgic_has_attr_regs(const struct mmio_range *ranges, phys_addr_t offset) { From 832158125d2ef30b364f21e1616495c40c286a4a Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Sat, 7 Jun 2014 00:53:08 +0200 Subject: [PATCH 26/36] arm/arm64: KVM: add vgic.h header file vgic.c is currently a mixture of generic vGIC emulation code and functions specific to emulating a GICv2. To ease the addition of GICv3 later, we create new header file vgic.h, which holds constants and prototypes of commonly used functions. Rename some identifiers to avoid name space clutter. I removed the long-standing comment about using the kvm_io_bus API to tackle the GIC register ranges, as it wouldn't be a win for us anymore. Signed-off-by: Andre Przywara Acked-by: Christoffer Dall ------- As the diff isn't always obvious here (and to aid eventual rebases), here is a list of high-level changes done to the code: * moved definitions and prototypes from vgic.c to vgic.h: - VGIC_ADDR_UNDEF - ACCESS_{READ,WRITE}_* - vgic_init() - vgic_update_state() - vgic_kick_vcpus() - vgic_get_vmcr() - vgic_set_vmcr() - struct mmio_range {} (renamed to struct kvm_mmio_range) * removed static keyword and exported prototype in vgic.h: - vgic_bitmap_get_reg() - vgic_bitmap_set_irq_val() - vgic_bitmap_get_shared_map() - vgic_bytemap_get_reg() - vgic_dist_irq_set_pending() - vgic_dist_irq_clear_pending() - vgic_cpu_irq_clear() - vgic_reg_access() - handle_mmio_raz_wi() - vgic_handle_enable_reg() - vgic_handle_set_pending_reg() - vgic_handle_clear_pending_reg() - vgic_handle_cfg_reg() - vgic_unqueue_irqs() - find_matching_range() (renamed to vgic_find_range) - vgic_handle_mmio_range() - vgic_update_state() - vgic_get_vmcr() - vgic_set_vmcr() - vgic_queue_irq() - vgic_kick_vcpus() - vgic_init() - vgic_v2_init_emulation() - vgic_has_attr_regs() - vgic_set_common_attr() - vgic_get_common_attr() - vgic_destroy() - vgic_create() * moved functions to vgic.h (static inline): - mmio_data_read() - mmio_data_write() - is_in_range() Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 151 +++++++++++++++----------------------------- virt/kvm/arm/vgic.h | 120 +++++++++++++++++++++++++++++++++++ 2 files changed, 170 insertions(+), 101 deletions(-) create mode 100644 virt/kvm/arm/vgic.h diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 06073fac03a9..ce6c998a3f8d 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -75,33 +75,16 @@ * inactive as long as the external input line is held high. */ -#define VGIC_ADDR_UNDEF (-1) -#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) +#include "vgic.h" -#define PRODUCT_ID_KVM 0x4b /* ASCII code K */ -#define IMPLEMENTER_ARM 0x43b #define GICC_ARCH_VERSION_V2 0x2 -#define ACCESS_READ_VALUE (1 << 0) -#define ACCESS_READ_RAZ (0 << 0) -#define ACCESS_READ_MASK(x) ((x) & (1 << 0)) -#define ACCESS_WRITE_IGNORED (0 << 1) -#define ACCESS_WRITE_SETBIT (1 << 1) -#define ACCESS_WRITE_CLEARBIT (2 << 1) -#define ACCESS_WRITE_VALUE (3 << 1) -#define ACCESS_WRITE_MASK(x) ((x) & (3 << 1)) - -static int vgic_init(struct kvm *kvm); static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); -static void vgic_update_state(struct kvm *kvm); -static void vgic_kick_vcpus(struct kvm *kvm); static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi); static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); -static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); -static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); static const struct vgic_ops *vgic_ops; static const struct vgic_params *vgic; @@ -175,8 +158,7 @@ static unsigned long *u64_to_bitmask(u64 *val) return (unsigned long *)val; } -static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, - int cpuid, u32 offset) +u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset) { offset >>= 2; if (!offset) @@ -194,8 +176,8 @@ static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x, return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared); } -static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, - int irq, int val) +void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, + int irq, int val) { unsigned long *reg; @@ -217,7 +199,7 @@ static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid) return x->private + cpuid; } -static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x) +unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x) { return x->shared; } @@ -244,7 +226,7 @@ static void vgic_free_bytemap(struct vgic_bytemap *b) b->shared = NULL; } -static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) +u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) { u32 *reg; @@ -341,14 +323,14 @@ static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq); } -static void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq) +void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1); } -static void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq) +void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; @@ -364,7 +346,7 @@ static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq) vcpu->arch.vgic_cpu.pending_shared); } -static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq) +void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq) { if (irq < VGIC_NR_PRIVATE_IRQS) clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu); @@ -378,16 +360,6 @@ static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq) return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq); } -static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) -{ - return le32_to_cpu(*((u32 *)mmio->data)) & mask; -} - -static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value) -{ - *((u32 *)mmio->data) = cpu_to_le32(value) & mask; -} - /** * vgic_reg_access - access vgic register * @mmio: pointer to the data describing the mmio access @@ -399,8 +371,8 @@ static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value) * modes defined for vgic register access * (read,raz,write-ignored,setbit,clearbit,write) */ -static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, - phys_addr_t offset, int mode) +void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, + phys_addr_t offset, int mode) { int word_offset = (offset & 3) * 8; u32 mask = (1UL << (mmio->len * 8)) - 1; @@ -484,16 +456,16 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu, return false; } -static bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) +bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, + phys_addr_t offset) { vgic_reg_access(mmio, NULL, offset, ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); return false; } -static bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, - phys_addr_t offset, int vcpu_id, int access) +bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id, int access) { u32 *reg; int mode = ACCESS_READ_VALUE | access; @@ -530,9 +502,9 @@ static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu, vcpu->vcpu_id, ACCESS_WRITE_CLEARBIT); } -static bool vgic_handle_set_pending_reg(struct kvm *kvm, - struct kvm_exit_mmio *mmio, - phys_addr_t offset, int vcpu_id) +bool vgic_handle_set_pending_reg(struct kvm *kvm, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id) { u32 *reg, orig; u32 level_mask; @@ -567,9 +539,9 @@ static bool vgic_handle_set_pending_reg(struct kvm *kvm, return false; } -static bool vgic_handle_clear_pending_reg(struct kvm *kvm, - struct kvm_exit_mmio *mmio, - phys_addr_t offset, int vcpu_id) +bool vgic_handle_clear_pending_reg(struct kvm *kvm, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id) { u32 *level_active; u32 *reg, orig; @@ -741,8 +713,8 @@ static u16 vgic_cfg_compress(u32 val) * LSB is always 0. As such, we only keep the upper bit, and use the * two above functions to compress/expand the bits */ -static bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, - phys_addr_t offset) +bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, + phys_addr_t offset) { u32 val; @@ -818,7 +790,7 @@ static void vgic_v2_add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source) * to the distributor but the active state stays in the LRs, because we don't * track the active state on the distributor side. */ -static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) +void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; int i; @@ -943,21 +915,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false); } -/* - * I would have liked to use the kvm_bus_io_*() API instead, but it - * cannot cope with banked registers (only the VM pointer is passed - * around, and we need the vcpu). One of these days, someone please - * fix it! - */ -struct mmio_range { - phys_addr_t base; - unsigned long len; - int bits_per_irq; - bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, - phys_addr_t offset); -}; - -static const struct mmio_range vgic_dist_ranges[] = { +static const struct kvm_mmio_range vgic_dist_ranges[] = { { .base = GIC_DIST_CTRL, .len = 12, @@ -1042,12 +1000,12 @@ static const struct mmio_range vgic_dist_ranges[] = { {} }; -static const -struct mmio_range *find_matching_range(const struct mmio_range *ranges, +const +struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - const struct mmio_range *r = ranges; + const struct kvm_mmio_range *r = ranges; while (r->len) { if (offset >= r->base && @@ -1060,7 +1018,7 @@ struct mmio_range *find_matching_range(const struct mmio_range *ranges, } static bool vgic_validate_access(const struct vgic_dist *dist, - const struct mmio_range *range, + const struct kvm_mmio_range *range, unsigned long offset) { int irq; @@ -1088,7 +1046,7 @@ static bool vgic_validate_access(const struct vgic_dist *dist, static bool call_range_handler(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, unsigned long offset, - const struct mmio_range *range) + const struct kvm_mmio_range *range) { u32 *data32 = (void *)mmio->data; struct kvm_exit_mmio mmio32; @@ -1132,18 +1090,18 @@ static bool call_range_handler(struct kvm_vcpu *vcpu, * * returns true if the MMIO access could be performed */ -static bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, +bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_exit_mmio *mmio, - const struct mmio_range *ranges, + const struct kvm_mmio_range *ranges, unsigned long mmio_base) { - const struct mmio_range *range; + const struct kvm_mmio_range *range; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; bool updated_state; unsigned long offset; offset = mmio->phys_addr - mmio_base; - range = find_matching_range(ranges, mmio, offset); + range = vgic_find_range(ranges, mmio, offset); if (unlikely(!range || !range->handle_mmio)) { pr_warn("Unhandled access %d %08llx %d\n", mmio->is_write, mmio->phys_addr, mmio->len); @@ -1169,12 +1127,6 @@ static bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, return true; } -static inline bool is_in_range(phys_addr_t addr, unsigned long len, - phys_addr_t baseaddr, unsigned long size) -{ - return (addr >= baseaddr) && (addr + len <= baseaddr + size); -} - static bool vgic_v2_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_exit_mmio *mmio) { @@ -1301,7 +1253,7 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) * Update the interrupt state and determine which CPUs have pending * interrupts. Must be called with distributor lock held. */ -static void vgic_update_state(struct kvm *kvm) +void vgic_update_state(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; struct kvm_vcpu *vcpu; @@ -1362,12 +1314,12 @@ static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu) vgic_ops->disable_underflow(vcpu); } -static inline void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) +void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) { vgic_ops->get_vmcr(vcpu, vmcr); } -static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) +void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) { vgic_ops->set_vmcr(vcpu, vmcr); } @@ -1417,7 +1369,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) * Queue an interrupt to a CPU virtual interface. Return true on success, * or false if it wasn't possible to queue it. */ -static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) +bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; @@ -1703,7 +1655,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); } -static void vgic_kick_vcpus(struct kvm *kvm) +void vgic_kick_vcpus(struct kvm *kvm) { struct kvm_vcpu *vcpu; int c; @@ -1956,7 +1908,7 @@ static int vgic_v2_init_model(struct kvm *kvm) * Allocate and initialize the various data structures. Must be called * with kvm->lock held! */ -static int vgic_init(struct kvm *kvm) +int vgic_init(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; struct kvm_vcpu *vcpu; @@ -2096,7 +2048,7 @@ out: return ret; } -static void vgic_v2_init_emulation(struct kvm *kvm) +void vgic_v2_init_emulation(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; @@ -2326,7 +2278,7 @@ static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu, * CPU Interface Register accesses - these are not accessed by the VM, but by * user space for saving and restoring VGIC state. */ -static const struct mmio_range vgic_cpu_ranges[] = { +static const struct kvm_mmio_range vgic_cpu_ranges[] = { { .base = GIC_CPU_CTRL, .len = 12, @@ -2353,7 +2305,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev, struct kvm_device_attr *attr, u32 *reg, bool is_write) { - const struct mmio_range *r = NULL, *ranges; + const struct kvm_mmio_range *r = NULL, *ranges; phys_addr_t offset; int ret, cpuid, c; struct kvm_vcpu *vcpu, *tmp_vcpu; @@ -2394,7 +2346,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev, default: BUG(); } - r = find_matching_range(ranges, &mmio, offset); + r = vgic_find_range(ranges, &mmio, offset); if (unlikely(!r || !r->handle_mmio)) { ret = -ENXIO; @@ -2440,8 +2392,7 @@ out: return ret; } -static int vgic_set_common_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) +int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { int r; @@ -2525,8 +2476,7 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) return -ENXIO; } -static int vgic_get_common_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) +int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { int r = -ENXIO; @@ -2581,13 +2531,12 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) return -ENXIO; } -static int vgic_has_attr_regs(const struct mmio_range *ranges, - phys_addr_t offset) +int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset) { struct kvm_exit_mmio dev_attr_mmio; dev_attr_mmio.len = 4; - if (find_matching_range(ranges, &dev_attr_mmio, offset)) + if (vgic_find_range(ranges, &dev_attr_mmio, offset)) return 0; else return -ENXIO; @@ -2622,12 +2571,12 @@ static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) return -ENXIO; } -static void vgic_destroy(struct kvm_device *dev) +void vgic_destroy(struct kvm_device *dev) { kfree(dev); } -static int vgic_create(struct kvm_device *dev, u32 type) +int vgic_create(struct kvm_device *dev, u32 type) { return kvm_vgic_create(dev->kvm, type); } diff --git a/virt/kvm/arm/vgic.h b/virt/kvm/arm/vgic.h new file mode 100644 index 000000000000..e363b9341873 --- /dev/null +++ b/virt/kvm/arm/vgic.h @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2012-2014 ARM Ltd. + * Author: Marc Zyngier + * + * Derived from virt/kvm/arm/vgic.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __KVM_VGIC_H__ +#define __KVM_VGIC_H__ + +#define VGIC_ADDR_UNDEF (-1) +#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) + +#define PRODUCT_ID_KVM 0x4b /* ASCII code K */ +#define IMPLEMENTER_ARM 0x43b + +#define ACCESS_READ_VALUE (1 << 0) +#define ACCESS_READ_RAZ (0 << 0) +#define ACCESS_READ_MASK(x) ((x) & (1 << 0)) +#define ACCESS_WRITE_IGNORED (0 << 1) +#define ACCESS_WRITE_SETBIT (1 << 1) +#define ACCESS_WRITE_CLEARBIT (2 << 1) +#define ACCESS_WRITE_VALUE (3 << 1) +#define ACCESS_WRITE_MASK(x) ((x) & (3 << 1)) + +unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x); + +void vgic_update_state(struct kvm *kvm); +int vgic_init_common_maps(struct kvm *kvm); + +u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset); +u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset); + +void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq); +void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq); +void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq); +void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, + int irq, int val); + +void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); + +bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq); +void vgic_unqueue_irqs(struct kvm_vcpu *vcpu); + +void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, + phys_addr_t offset, int mode); +bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, + phys_addr_t offset); + +static inline +u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) +{ + return le32_to_cpu(*((u32 *)mmio->data)) & mask; +} + +static inline +void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value) +{ + *((u32 *)mmio->data) = cpu_to_le32(value) & mask; +} + +struct kvm_mmio_range { + phys_addr_t base; + unsigned long len; + int bits_per_irq; + bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, + phys_addr_t offset); +}; + +static inline bool is_in_range(phys_addr_t addr, unsigned long len, + phys_addr_t baseaddr, unsigned long size) +{ + return (addr >= baseaddr) && (addr + len <= baseaddr + size); +} + +const +struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges, + struct kvm_exit_mmio *mmio, + phys_addr_t offset); + +bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_exit_mmio *mmio, + const struct kvm_mmio_range *ranges, + unsigned long mmio_base); + +bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id, int access); + +bool vgic_handle_set_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id); + +bool vgic_handle_clear_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id); + +bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, + phys_addr_t offset); + +void vgic_kick_vcpus(struct kvm *kvm); + +int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset); +int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr); +int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr); + +int vgic_init(struct kvm *kvm); +void vgic_v2_init_emulation(struct kvm *kvm); + +#endif From 1d916229e348c628ddc9cf97528e76d13f52c122 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Sat, 7 Jun 2014 00:53:08 +0200 Subject: [PATCH 27/36] arm/arm64: KVM: split GICv2 specific emulation code from vgic.c vgic.c is currently a mixture of generic vGIC emulation code and functions specific to emulating a GICv2. To ease the addition of GICv3, split off strictly v2 specific parts into a new file vgic-v2-emul.c. Signed-off-by: Andre Przywara Acked-by: Christoffer Dall ------- As the diff isn't always obvious here (and to aid eventual rebases), here is a list of high-level changes done to the code: * added new file to respective arm/arm64 Makefiles * moved GICv2 specific functions to vgic-v2-emul.c: - handle_mmio_misc() - handle_mmio_set_enable_reg() - handle_mmio_clear_enable_reg() - handle_mmio_set_pending_reg() - handle_mmio_clear_pending_reg() - handle_mmio_priority_reg() - vgic_get_target_reg() - vgic_set_target_reg() - handle_mmio_target_reg() - handle_mmio_cfg_reg() - handle_mmio_sgi_reg() - vgic_v2_unqueue_sgi() - read_set_clear_sgi_pend_reg() - write_set_clear_sgi_pend_reg() - handle_mmio_sgi_set() - handle_mmio_sgi_clear() - vgic_v2_handle_mmio() - vgic_get_sgi_sources() - vgic_dispatch_sgi() - vgic_v2_queue_sgi() - vgic_v2_map_resources() - vgic_v2_init() - vgic_v2_add_sgi_source() - vgic_v2_init_model() - vgic_v2_init_emulation() - handle_cpu_mmio_misc() - handle_mmio_abpr() - handle_cpu_mmio_ident() - vgic_attr_regs_access() - vgic_create() (renamed to vgic_v2_create()) - vgic_destroy() (renamed to vgic_v2_destroy()) - vgic_has_attr() (renamed to vgic_v2_has_attr()) - vgic_set_attr() (renamed to vgic_v2_set_attr()) - vgic_get_attr() (renamed to vgic_v2_get_attr()) - struct kvm_mmio_range vgic_dist_ranges[] - struct kvm_mmio_range vgic_cpu_ranges[] - struct kvm_device_ops kvm_arm_vgic_v2_ops {} Signed-off-by: Christoffer Dall --- arch/arm/kvm/Makefile | 1 + arch/arm64/kvm/Makefile | 1 + virt/kvm/arm/vgic-v2-emul.c | 847 ++++++++++++++++++++++++++++++++++++ virt/kvm/arm/vgic.c | 806 +--------------------------------- 4 files changed, 850 insertions(+), 805 deletions(-) create mode 100644 virt/kvm/arm/vgic-v2-emul.c diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index f7057ed045b6..443b8bea43e9 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -22,4 +22,5 @@ obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o +obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 32a096174b94..d9573533a873 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -21,6 +21,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o +kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c new file mode 100644 index 000000000000..19c6210f02cf --- /dev/null +++ b/virt/kvm/arm/vgic-v2-emul.c @@ -0,0 +1,847 @@ +/* + * Contains GICv2 specific emulation code, was in vgic.c before. + * + * Copyright (C) 2012 ARM Ltd. + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include "vgic.h" + +#define GICC_ARCH_VERSION_V2 0x2 + +static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); +static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi) +{ + return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi; +} + +static bool handle_mmio_misc(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, phys_addr_t offset) +{ + u32 reg; + u32 word_offset = offset & 3; + + switch (offset & ~3) { + case 0: /* GICD_CTLR */ + reg = vcpu->kvm->arch.vgic.enabled; + vgic_reg_access(mmio, ®, word_offset, + ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); + if (mmio->is_write) { + vcpu->kvm->arch.vgic.enabled = reg & 1; + vgic_update_state(vcpu->kvm); + return true; + } + break; + + case 4: /* GICD_TYPER */ + reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; + reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1; + vgic_reg_access(mmio, ®, word_offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + break; + + case 8: /* GICD_IIDR */ + reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); + vgic_reg_access(mmio, ®, word_offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + break; + } + + return false; +} + +static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, + vcpu->vcpu_id, ACCESS_WRITE_SETBIT); +} + +static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, + vcpu->vcpu_id, ACCESS_WRITE_CLEARBIT); +} + +static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset, + vcpu->vcpu_id); +} + +static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset, + vcpu->vcpu_id); +} + +static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority, + vcpu->vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); + return false; +} + +#define GICD_ITARGETSR_SIZE 32 +#define GICD_CPUTARGETS_BITS 8 +#define GICD_IRQS_PER_ITARGETSR (GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS) +static u32 vgic_get_target_reg(struct kvm *kvm, int irq) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + int i; + u32 val = 0; + + irq -= VGIC_NR_PRIVATE_IRQS; + + for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) + val |= 1 << (dist->irq_spi_cpu[irq + i] + i * 8); + + return val; +} + +static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int i, c; + unsigned long *bmap; + u32 target; + + irq -= VGIC_NR_PRIVATE_IRQS; + + /* + * Pick the LSB in each byte. This ensures we target exactly + * one vcpu per IRQ. If the byte is null, assume we target + * CPU0. + */ + for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) { + int shift = i * GICD_CPUTARGETS_BITS; + + target = ffs((val >> shift) & 0xffU); + target = target ? (target - 1) : 0; + dist->irq_spi_cpu[irq + i] = target; + kvm_for_each_vcpu(c, vcpu, kvm) { + bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]); + if (c == target) + set_bit(irq + i, bmap); + else + clear_bit(irq + i, bmap); + } + } +} + +static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 reg; + + /* We treat the banked interrupts targets as read-only */ + if (offset < 32) { + u32 roreg; + + roreg = 1 << vcpu->vcpu_id; + roreg |= roreg << 8; + roreg |= roreg << 16; + + vgic_reg_access(mmio, &roreg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + return false; + } + + reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U); + vgic_reg_access(mmio, ®, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); + if (mmio->is_write) { + vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U); + vgic_update_state(vcpu->kvm); + return true; + } + + return false; +} + +static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, phys_addr_t offset) +{ + u32 *reg; + + reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, + vcpu->vcpu_id, offset >> 1); + + return vgic_handle_cfg_reg(reg, mmio, offset); +} + +static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, phys_addr_t offset) +{ + u32 reg; + + vgic_reg_access(mmio, ®, offset, + ACCESS_READ_RAZ | ACCESS_WRITE_VALUE); + if (mmio->is_write) { + vgic_dispatch_sgi(vcpu, reg); + vgic_update_state(vcpu->kvm); + return true; + } + + return false; +} + +/* Handle reads of GICD_CPENDSGIRn and GICD_SPENDSGIRn */ +static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + int sgi; + int min_sgi = (offset & ~0x3); + int max_sgi = min_sgi + 3; + int vcpu_id = vcpu->vcpu_id; + u32 reg = 0; + + /* Copy source SGIs from distributor side */ + for (sgi = min_sgi; sgi <= max_sgi; sgi++) { + u8 sources = *vgic_get_sgi_sources(dist, vcpu_id, sgi); + + reg |= ((u32)sources) << (8 * (sgi - min_sgi)); + } + + mmio_data_write(mmio, ~0, reg); + return false; +} + +static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, bool set) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + int sgi; + int min_sgi = (offset & ~0x3); + int max_sgi = min_sgi + 3; + int vcpu_id = vcpu->vcpu_id; + u32 reg; + bool updated = false; + + reg = mmio_data_read(mmio, ~0); + + /* Clear pending SGIs on the distributor */ + for (sgi = min_sgi; sgi <= max_sgi; sgi++) { + u8 mask = reg >> (8 * (sgi - min_sgi)); + u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi); + + if (set) { + if ((*src & mask) != mask) + updated = true; + *src |= mask; + } else { + if (*src & mask) + updated = true; + *src &= ~mask; + } + } + + if (updated) + vgic_update_state(vcpu->kvm); + + return updated; +} + +static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + if (!mmio->is_write) + return read_set_clear_sgi_pend_reg(vcpu, mmio, offset); + else + return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, true); +} + +static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + if (!mmio->is_write) + return read_set_clear_sgi_pend_reg(vcpu, mmio, offset); + else + return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false); +} + +static const struct kvm_mmio_range vgic_dist_ranges[] = { + { + .base = GIC_DIST_CTRL, + .len = 12, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_misc, + }, + { + .base = GIC_DIST_IGROUP, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, + .handle_mmio = handle_mmio_raz_wi, + }, + { + .base = GIC_DIST_ENABLE_SET, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, + .handle_mmio = handle_mmio_set_enable_reg, + }, + { + .base = GIC_DIST_ENABLE_CLEAR, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, + .handle_mmio = handle_mmio_clear_enable_reg, + }, + { + .base = GIC_DIST_PENDING_SET, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, + .handle_mmio = handle_mmio_set_pending_reg, + }, + { + .base = GIC_DIST_PENDING_CLEAR, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, + .handle_mmio = handle_mmio_clear_pending_reg, + }, + { + .base = GIC_DIST_ACTIVE_SET, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, + .handle_mmio = handle_mmio_raz_wi, + }, + { + .base = GIC_DIST_ACTIVE_CLEAR, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, + .handle_mmio = handle_mmio_raz_wi, + }, + { + .base = GIC_DIST_PRI, + .len = VGIC_MAX_IRQS, + .bits_per_irq = 8, + .handle_mmio = handle_mmio_priority_reg, + }, + { + .base = GIC_DIST_TARGET, + .len = VGIC_MAX_IRQS, + .bits_per_irq = 8, + .handle_mmio = handle_mmio_target_reg, + }, + { + .base = GIC_DIST_CONFIG, + .len = VGIC_MAX_IRQS / 4, + .bits_per_irq = 2, + .handle_mmio = handle_mmio_cfg_reg, + }, + { + .base = GIC_DIST_SOFTINT, + .len = 4, + .handle_mmio = handle_mmio_sgi_reg, + }, + { + .base = GIC_DIST_SGI_PENDING_CLEAR, + .len = VGIC_NR_SGIS, + .handle_mmio = handle_mmio_sgi_clear, + }, + { + .base = GIC_DIST_SGI_PENDING_SET, + .len = VGIC_NR_SGIS, + .handle_mmio = handle_mmio_sgi_set, + }, + {} +}; + +static bool vgic_v2_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_exit_mmio *mmio) +{ + unsigned long base = vcpu->kvm->arch.vgic.vgic_dist_base; + + if (!is_in_range(mmio->phys_addr, mmio->len, base, + KVM_VGIC_V2_DIST_SIZE)) + return false; + + /* GICv2 does not support accesses wider than 32 bits */ + if (mmio->len > 4) { + kvm_inject_dabt(vcpu, mmio->phys_addr); + return true; + } + + return vgic_handle_mmio_range(vcpu, run, mmio, vgic_dist_ranges, base); +} + +static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) +{ + struct kvm *kvm = vcpu->kvm; + struct vgic_dist *dist = &kvm->arch.vgic; + int nrcpus = atomic_read(&kvm->online_vcpus); + u8 target_cpus; + int sgi, mode, c, vcpu_id; + + vcpu_id = vcpu->vcpu_id; + + sgi = reg & 0xf; + target_cpus = (reg >> 16) & 0xff; + mode = (reg >> 24) & 3; + + switch (mode) { + case 0: + if (!target_cpus) + return; + break; + + case 1: + target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff; + break; + + case 2: + target_cpus = 1 << vcpu_id; + break; + } + + kvm_for_each_vcpu(c, vcpu, kvm) { + if (target_cpus & 1) { + /* Flag the SGI as pending */ + vgic_dist_irq_set_pending(vcpu, sgi); + *vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id; + kvm_debug("SGI%d from CPU%d to CPU%d\n", + sgi, vcpu_id, c); + } + + target_cpus >>= 1; + } +} + +static bool vgic_v2_queue_sgi(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + unsigned long sources; + int vcpu_id = vcpu->vcpu_id; + int c; + + sources = *vgic_get_sgi_sources(dist, vcpu_id, irq); + + for_each_set_bit(c, &sources, dist->nr_cpus) { + if (vgic_queue_irq(vcpu, c, irq)) + clear_bit(c, &sources); + } + + *vgic_get_sgi_sources(dist, vcpu_id, irq) = sources; + + /* + * If the sources bitmap has been cleared it means that we + * could queue all the SGIs onto link registers (see the + * clear_bit above), and therefore we are done with them in + * our emulated gic and can get rid of them. + */ + if (!sources) { + vgic_dist_irq_clear_pending(vcpu, irq); + vgic_cpu_irq_clear(vcpu, irq); + return true; + } + + return false; +} + +/** + * kvm_vgic_map_resources - Configure global VGIC state before running any VCPUs + * @kvm: pointer to the kvm struct + * + * Map the virtual CPU interface into the VM before running any VCPUs. We + * can't do this at creation time, because user space must first set the + * virtual CPU interface address in the guest physical address space. + */ +static int vgic_v2_map_resources(struct kvm *kvm, + const struct vgic_params *params) +{ + int ret = 0; + + if (!irqchip_in_kernel(kvm)) + return 0; + + mutex_lock(&kvm->lock); + + if (vgic_ready(kvm)) + goto out; + + if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) || + IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) { + kvm_err("Need to set vgic cpu and dist addresses first\n"); + ret = -ENXIO; + goto out; + } + + /* + * Initialize the vgic if this hasn't already been done on demand by + * accessing the vgic state from userspace. + */ + ret = vgic_init(kvm); + if (ret) { + kvm_err("Unable to allocate maps\n"); + goto out; + } + + ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, + params->vcpu_base, KVM_VGIC_V2_CPU_SIZE, + true); + if (ret) { + kvm_err("Unable to remap VGIC CPU to VCPU\n"); + goto out; + } + + kvm->arch.vgic.ready = true; +out: + if (ret) + kvm_vgic_destroy(kvm); + mutex_unlock(&kvm->lock); + return ret; +} + +static void vgic_v2_add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + *vgic_get_sgi_sources(dist, vcpu->vcpu_id, irq) |= 1 << source; +} + +static int vgic_v2_init_model(struct kvm *kvm) +{ + int i; + + for (i = VGIC_NR_PRIVATE_IRQS; i < kvm->arch.vgic.nr_irqs; i += 4) + vgic_set_target_reg(kvm, 0, i); + + return 0; +} + +void vgic_v2_init_emulation(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + + dist->vm_ops.handle_mmio = vgic_v2_handle_mmio; + dist->vm_ops.queue_sgi = vgic_v2_queue_sgi; + dist->vm_ops.add_sgi_source = vgic_v2_add_sgi_source; + dist->vm_ops.init_model = vgic_v2_init_model; + dist->vm_ops.map_resources = vgic_v2_map_resources; + + kvm->arch.max_vcpus = VGIC_V2_MAX_CPUS; +} + +static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, phys_addr_t offset) +{ + bool updated = false; + struct vgic_vmcr vmcr; + u32 *vmcr_field; + u32 reg; + + vgic_get_vmcr(vcpu, &vmcr); + + switch (offset & ~0x3) { + case GIC_CPU_CTRL: + vmcr_field = &vmcr.ctlr; + break; + case GIC_CPU_PRIMASK: + vmcr_field = &vmcr.pmr; + break; + case GIC_CPU_BINPOINT: + vmcr_field = &vmcr.bpr; + break; + case GIC_CPU_ALIAS_BINPOINT: + vmcr_field = &vmcr.abpr; + break; + default: + BUG(); + } + + if (!mmio->is_write) { + reg = *vmcr_field; + mmio_data_write(mmio, ~0, reg); + } else { + reg = mmio_data_read(mmio, ~0); + if (reg != *vmcr_field) { + *vmcr_field = reg; + vgic_set_vmcr(vcpu, &vmcr); + updated = true; + } + } + return updated; +} + +static bool handle_mmio_abpr(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, phys_addr_t offset) +{ + return handle_cpu_mmio_misc(vcpu, mmio, GIC_CPU_ALIAS_BINPOINT); +} + +static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 reg; + + if (mmio->is_write) + return false; + + /* GICC_IIDR */ + reg = (PRODUCT_ID_KVM << 20) | + (GICC_ARCH_VERSION_V2 << 16) | + (IMPLEMENTER_ARM << 0); + mmio_data_write(mmio, ~0, reg); + return false; +} + +/* + * CPU Interface Register accesses - these are not accessed by the VM, but by + * user space for saving and restoring VGIC state. + */ +static const struct kvm_mmio_range vgic_cpu_ranges[] = { + { + .base = GIC_CPU_CTRL, + .len = 12, + .handle_mmio = handle_cpu_mmio_misc, + }, + { + .base = GIC_CPU_ALIAS_BINPOINT, + .len = 4, + .handle_mmio = handle_mmio_abpr, + }, + { + .base = GIC_CPU_ACTIVEPRIO, + .len = 16, + .handle_mmio = handle_mmio_raz_wi, + }, + { + .base = GIC_CPU_IDENT, + .len = 4, + .handle_mmio = handle_cpu_mmio_ident, + }, +}; + +static int vgic_attr_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + u32 *reg, bool is_write) +{ + const struct kvm_mmio_range *r = NULL, *ranges; + phys_addr_t offset; + int ret, cpuid, c; + struct kvm_vcpu *vcpu, *tmp_vcpu; + struct vgic_dist *vgic; + struct kvm_exit_mmio mmio; + + offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; + cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> + KVM_DEV_ARM_VGIC_CPUID_SHIFT; + + mutex_lock(&dev->kvm->lock); + + ret = vgic_init(dev->kvm); + if (ret) + goto out; + + if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) { + ret = -EINVAL; + goto out; + } + + vcpu = kvm_get_vcpu(dev->kvm, cpuid); + vgic = &dev->kvm->arch.vgic; + + mmio.len = 4; + mmio.is_write = is_write; + if (is_write) + mmio_data_write(&mmio, ~0, *reg); + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + mmio.phys_addr = vgic->vgic_dist_base + offset; + ranges = vgic_dist_ranges; + break; + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + mmio.phys_addr = vgic->vgic_cpu_base + offset; + ranges = vgic_cpu_ranges; + break; + default: + BUG(); + } + r = vgic_find_range(ranges, &mmio, offset); + + if (unlikely(!r || !r->handle_mmio)) { + ret = -ENXIO; + goto out; + } + + + spin_lock(&vgic->lock); + + /* + * Ensure that no other VCPU is running by checking the vcpu->cpu + * field. If no other VPCUs are running we can safely access the VGIC + * state, because even if another VPU is run after this point, that + * VCPU will not touch the vgic state, because it will block on + * getting the vgic->lock in kvm_vgic_sync_hwstate(). + */ + kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) { + if (unlikely(tmp_vcpu->cpu != -1)) { + ret = -EBUSY; + goto out_vgic_unlock; + } + } + + /* + * Move all pending IRQs from the LRs on all VCPUs so the pending + * state can be properly represented in the register state accessible + * through this API. + */ + kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) + vgic_unqueue_irqs(tmp_vcpu); + + offset -= r->base; + r->handle_mmio(vcpu, &mmio, offset); + + if (!is_write) + *reg = mmio_data_read(&mmio, ~0); + + ret = 0; +out_vgic_unlock: + spin_unlock(&vgic->lock); +out: + mutex_unlock(&dev->kvm->lock); + return ret; +} + +static int vgic_v2_create(struct kvm_device *dev, u32 type) +{ + return kvm_vgic_create(dev->kvm, type); +} + +static void vgic_v2_destroy(struct kvm_device *dev) +{ + kfree(dev); +} + +static int vgic_v2_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + int ret; + + ret = vgic_set_common_attr(dev, attr); + if (ret != -ENXIO) + return ret; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 reg; + + if (get_user(reg, uaddr)) + return -EFAULT; + + return vgic_attr_regs_access(dev, attr, ®, true); + } + + } + + return -ENXIO; +} + +static int vgic_v2_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + int ret; + + ret = vgic_get_common_attr(dev, attr); + if (ret != -ENXIO) + return ret; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 reg = 0; + + ret = vgic_attr_regs_access(dev, attr, ®, false); + if (ret) + return ret; + return put_user(reg, uaddr); + } + + } + + return -ENXIO; +} + +static int vgic_v2_has_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + phys_addr_t offset; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: + switch (attr->attr) { + case KVM_VGIC_V2_ADDR_TYPE_DIST: + case KVM_VGIC_V2_ADDR_TYPE_CPU: + return 0; + } + break; + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; + return vgic_has_attr_regs(vgic_dist_ranges, offset); + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; + return vgic_has_attr_regs(vgic_cpu_ranges, offset); + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: + return 0; + case KVM_DEV_ARM_VGIC_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_CTRL_INIT: + return 0; + } + } + return -ENXIO; +} + +struct kvm_device_ops kvm_arm_vgic_v2_ops = { + .name = "kvm-arm-vgic-v2", + .create = vgic_v2_create, + .destroy = vgic_v2_destroy, + .set_attr = vgic_v2_set_attr, + .get_attr = vgic_v2_get_attr, + .has_attr = vgic_v2_has_attr, +}; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index ce6c998a3f8d..0e84292c2197 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -77,12 +77,8 @@ #include "vgic.h" -#define GICC_ARCH_VERSION_V2 0x2 - static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); -static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi); -static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); @@ -421,41 +417,6 @@ void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, } } -static bool handle_mmio_misc(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 reg; - u32 word_offset = offset & 3; - - switch (offset & ~3) { - case 0: /* GICD_CTLR */ - reg = vcpu->kvm->arch.vgic.enabled; - vgic_reg_access(mmio, ®, word_offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - vcpu->kvm->arch.vgic.enabled = reg & 1; - vgic_update_state(vcpu->kvm); - return true; - } - break; - - case 4: /* GICD_TYPER */ - reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; - reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1; - vgic_reg_access(mmio, ®, word_offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - break; - - case 8: /* GICD_IIDR */ - reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); - vgic_reg_access(mmio, ®, word_offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - break; - } - - return false; -} - bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { @@ -486,22 +447,6 @@ bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, return false; } -static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id, ACCESS_WRITE_SETBIT); -} - -static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id, ACCESS_WRITE_CLEARBIT); -} - bool vgic_handle_set_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, phys_addr_t offset, int vcpu_id) @@ -575,109 +520,6 @@ bool vgic_handle_clear_pending_reg(struct kvm *kvm, return false; } -static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id); -} - -static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id); -} - -static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority, - vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - return false; -} - -#define GICD_ITARGETSR_SIZE 32 -#define GICD_CPUTARGETS_BITS 8 -#define GICD_IRQS_PER_ITARGETSR (GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS) -static u32 vgic_get_target_reg(struct kvm *kvm, int irq) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - int i; - u32 val = 0; - - irq -= VGIC_NR_PRIVATE_IRQS; - - for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) - val |= 1 << (dist->irq_spi_cpu[irq + i] + i * 8); - - return val; -} - -static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int i, c; - unsigned long *bmap; - u32 target; - - irq -= VGIC_NR_PRIVATE_IRQS; - - /* - * Pick the LSB in each byte. This ensures we target exactly - * one vcpu per IRQ. If the byte is null, assume we target - * CPU0. - */ - for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) { - int shift = i * GICD_CPUTARGETS_BITS; - target = ffs((val >> shift) & 0xffU); - target = target ? (target - 1) : 0; - dist->irq_spi_cpu[irq + i] = target; - kvm_for_each_vcpu(c, vcpu, kvm) { - bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]); - if (c == target) - set_bit(irq + i, bmap); - else - clear_bit(irq + i, bmap); - } - } -} - -static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 reg; - - /* We treat the banked interrupts targets as read-only */ - if (offset < 32) { - u32 roreg = 1 << vcpu->vcpu_id; - roreg |= roreg << 8; - roreg |= roreg << 16; - - vgic_reg_access(mmio, &roreg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - return false; - } - - reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U); - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U); - vgic_update_state(vcpu->kvm); - return true; - } - - return false; -} - static u32 vgic_cfg_expand(u16 val) { u32 res = 0; @@ -745,39 +587,6 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, return false; } -static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 *reg; - - reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, - vcpu->vcpu_id, offset >> 1); - - return vgic_handle_cfg_reg(reg, mmio, offset); -} - -static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 reg; - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - vgic_dispatch_sgi(vcpu, reg); - vgic_update_state(vcpu->kvm); - return true; - } - - return false; -} - -static void vgic_v2_add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - *vgic_get_sgi_sources(dist, vcpu->vcpu_id, irq) |= 1 << source; -} - /** * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs @@ -838,168 +647,6 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) } } -/* Handle reads of GICD_CPENDSGIRn and GICD_SPENDSGIRn */ -static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int sgi; - int min_sgi = (offset & ~0x3); - int max_sgi = min_sgi + 3; - int vcpu_id = vcpu->vcpu_id; - u32 reg = 0; - - /* Copy source SGIs from distributor side */ - for (sgi = min_sgi; sgi <= max_sgi; sgi++) { - int shift = 8 * (sgi - min_sgi); - reg |= ((u32)*vgic_get_sgi_sources(dist, vcpu_id, sgi)) << shift; - } - - mmio_data_write(mmio, ~0, reg); - return false; -} - -static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset, bool set) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int sgi; - int min_sgi = (offset & ~0x3); - int max_sgi = min_sgi + 3; - int vcpu_id = vcpu->vcpu_id; - u32 reg; - bool updated = false; - - reg = mmio_data_read(mmio, ~0); - - /* Clear pending SGIs on the distributor */ - for (sgi = min_sgi; sgi <= max_sgi; sgi++) { - u8 mask = reg >> (8 * (sgi - min_sgi)); - u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi); - if (set) { - if ((*src & mask) != mask) - updated = true; - *src |= mask; - } else { - if (*src & mask) - updated = true; - *src &= ~mask; - } - } - - if (updated) - vgic_update_state(vcpu->kvm); - - return updated; -} - -static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - if (!mmio->is_write) - return read_set_clear_sgi_pend_reg(vcpu, mmio, offset); - else - return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, true); -} - -static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - if (!mmio->is_write) - return read_set_clear_sgi_pend_reg(vcpu, mmio, offset); - else - return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false); -} - -static const struct kvm_mmio_range vgic_dist_ranges[] = { - { - .base = GIC_DIST_CTRL, - .len = 12, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_misc, - }, - { - .base = GIC_DIST_IGROUP, - .len = VGIC_MAX_IRQS / 8, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GIC_DIST_ENABLE_SET, - .len = VGIC_MAX_IRQS / 8, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_set_enable_reg, - }, - { - .base = GIC_DIST_ENABLE_CLEAR, - .len = VGIC_MAX_IRQS / 8, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_clear_enable_reg, - }, - { - .base = GIC_DIST_PENDING_SET, - .len = VGIC_MAX_IRQS / 8, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_set_pending_reg, - }, - { - .base = GIC_DIST_PENDING_CLEAR, - .len = VGIC_MAX_IRQS / 8, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_clear_pending_reg, - }, - { - .base = GIC_DIST_ACTIVE_SET, - .len = VGIC_MAX_IRQS / 8, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GIC_DIST_ACTIVE_CLEAR, - .len = VGIC_MAX_IRQS / 8, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GIC_DIST_PRI, - .len = VGIC_MAX_IRQS, - .bits_per_irq = 8, - .handle_mmio = handle_mmio_priority_reg, - }, - { - .base = GIC_DIST_TARGET, - .len = VGIC_MAX_IRQS, - .bits_per_irq = 8, - .handle_mmio = handle_mmio_target_reg, - }, - { - .base = GIC_DIST_CONFIG, - .len = VGIC_MAX_IRQS / 4, - .bits_per_irq = 2, - .handle_mmio = handle_mmio_cfg_reg, - }, - { - .base = GIC_DIST_SOFTINT, - .len = 4, - .handle_mmio = handle_mmio_sgi_reg, - }, - { - .base = GIC_DIST_SGI_PENDING_CLEAR, - .len = VGIC_NR_SGIS, - .handle_mmio = handle_mmio_sgi_clear, - }, - { - .base = GIC_DIST_SGI_PENDING_SET, - .len = VGIC_NR_SGIS, - .handle_mmio = handle_mmio_sgi_set, - }, - {} -}; - const struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges, struct kvm_exit_mmio *mmio, @@ -1127,24 +774,6 @@ bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, return true; } -static bool vgic_v2_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - unsigned long base = vcpu->kvm->arch.vgic.vgic_dist_base; - - if (!is_in_range(mmio->phys_addr, mmio->len, base, - KVM_VGIC_V2_DIST_SIZE)) - return false; - - /* GICv2 does not support accesses wider than 32 bits */ - if (mmio->len > 4) { - kvm_inject_dabt(vcpu, mmio->phys_addr); - return true; - } - - return vgic_handle_mmio_range(vcpu, run, mmio, vgic_dist_ranges, base); -} - /** * vgic_handle_mmio - handle an in-kernel MMIO access for the GIC emulation * @vcpu: pointer to the vcpu performing the access @@ -1169,52 +798,6 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, return vcpu->kvm->arch.vgic.vm_ops.handle_mmio(vcpu, run, mmio); } -static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi) -{ - return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi; -} - -static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) -{ - struct kvm *kvm = vcpu->kvm; - struct vgic_dist *dist = &kvm->arch.vgic; - int nrcpus = atomic_read(&kvm->online_vcpus); - u8 target_cpus; - int sgi, mode, c, vcpu_id; - - vcpu_id = vcpu->vcpu_id; - - sgi = reg & 0xf; - target_cpus = (reg >> 16) & 0xff; - mode = (reg >> 24) & 3; - - switch (mode) { - case 0: - if (!target_cpus) - return; - break; - - case 1: - target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff; - break; - - case 2: - target_cpus = 1 << vcpu_id; - break; - } - - kvm_for_each_vcpu(c, vcpu, kvm) { - if (target_cpus & 1) { - /* Flag the SGI as pending */ - vgic_dist_irq_set_pending(vcpu, sgi); - *vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id; - kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); - } - - target_cpus >>= 1; - } -} - static int vgic_nr_shared_irqs(struct vgic_dist *dist) { return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS; @@ -1368,6 +951,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) /* * Queue an interrupt to a CPU virtual interface. Return true on success, * or false if it wasn't possible to queue it. + * sgi_source must be zero for any non-SGI interrupts. */ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) { @@ -1418,37 +1002,6 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) return true; } -static bool vgic_v2_queue_sgi(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - unsigned long sources; - int vcpu_id = vcpu->vcpu_id; - int c; - - sources = *vgic_get_sgi_sources(dist, vcpu_id, irq); - - for_each_set_bit(c, &sources, dist->nr_cpus) { - if (vgic_queue_irq(vcpu, c, irq)) - clear_bit(c, &sources); - } - - *vgic_get_sgi_sources(dist, vcpu_id, irq) = sources; - - /* - * If the sources bitmap has been cleared it means that we - * could queue all the SGIs onto link registers (see the - * clear_bit above), and therefore we are done with them in - * our emulated gic and can get rid of them. - */ - if (!sources) { - vgic_dist_irq_clear_pending(vcpu, irq); - vgic_cpu_irq_clear(vcpu, irq); - return true; - } - - return false; -} - static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) { if (!vgic_can_sample_irq(vcpu, irq)) @@ -1894,16 +1447,6 @@ void kvm_vgic_destroy(struct kvm *kvm) dist->nr_cpus = 0; } -static int vgic_v2_init_model(struct kvm *kvm) -{ - int i; - - for (i = VGIC_NR_PRIVATE_IRQS; i < kvm->arch.vgic.nr_irqs; i += 4) - vgic_set_target_reg(kvm, 0, i); - - return 0; -} - /* * Allocate and initialize the various data structures. Must be called * with kvm->lock held! @@ -1994,73 +1537,6 @@ out: return ret; } -/** - * kvm_vgic_map_resources - Configure global VGIC state before running any VCPUs - * @kvm: pointer to the kvm struct - * - * Map the virtual CPU interface into the VM before running any VCPUs. We - * can't do this at creation time, because user space must first set the - * virtual CPU interface address in the guest physical address space. - */ -static int vgic_v2_map_resources(struct kvm *kvm, - const struct vgic_params *params) -{ - int ret = 0; - - if (!irqchip_in_kernel(kvm)) - return 0; - - mutex_lock(&kvm->lock); - - if (vgic_ready(kvm)) - goto out; - - if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) || - IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) { - kvm_err("Need to set vgic cpu and dist addresses first\n"); - ret = -ENXIO; - goto out; - } - - /* - * Initialize the vgic if this hasn't already been done on demand by - * accessing the vgic state from userspace. - */ - ret = vgic_init(kvm); - if (ret) { - kvm_err("Unable to allocate maps\n"); - goto out; - } - - ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, - params->vcpu_base, KVM_VGIC_V2_CPU_SIZE, - true); - if (ret) { - kvm_err("Unable to remap VGIC CPU to VCPU\n"); - goto out; - } - - kvm->arch.vgic.ready = true; -out: - if (ret) - kvm_vgic_destroy(kvm); - mutex_unlock(&kvm->lock); - return ret; -} - -void vgic_v2_init_emulation(struct kvm *kvm) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - - dist->vm_ops.handle_mmio = vgic_v2_handle_mmio; - dist->vm_ops.queue_sgi = vgic_v2_queue_sgi; - dist->vm_ops.add_sgi_source = vgic_v2_add_sgi_source; - dist->vm_ops.init_model = vgic_v2_init_model; - dist->vm_ops.map_resources = vgic_v2_map_resources; - - kvm->arch.max_vcpus = VGIC_V2_MAX_CPUS; -} - static int init_vgic_model(struct kvm *kvm, int type) { switch (type) { @@ -2210,188 +1686,6 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) return r; } -static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - bool updated = false; - struct vgic_vmcr vmcr; - u32 *vmcr_field; - u32 reg; - - vgic_get_vmcr(vcpu, &vmcr); - - switch (offset & ~0x3) { - case GIC_CPU_CTRL: - vmcr_field = &vmcr.ctlr; - break; - case GIC_CPU_PRIMASK: - vmcr_field = &vmcr.pmr; - break; - case GIC_CPU_BINPOINT: - vmcr_field = &vmcr.bpr; - break; - case GIC_CPU_ALIAS_BINPOINT: - vmcr_field = &vmcr.abpr; - break; - default: - BUG(); - } - - if (!mmio->is_write) { - reg = *vmcr_field; - mmio_data_write(mmio, ~0, reg); - } else { - reg = mmio_data_read(mmio, ~0); - if (reg != *vmcr_field) { - *vmcr_field = reg; - vgic_set_vmcr(vcpu, &vmcr); - updated = true; - } - } - return updated; -} - -static bool handle_mmio_abpr(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - return handle_cpu_mmio_misc(vcpu, mmio, GIC_CPU_ALIAS_BINPOINT); -} - -static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 reg; - - if (mmio->is_write) - return false; - - /* GICC_IIDR */ - reg = (PRODUCT_ID_KVM << 20) | - (GICC_ARCH_VERSION_V2 << 16) | - (IMPLEMENTER_ARM << 0); - mmio_data_write(mmio, ~0, reg); - return false; -} - -/* - * CPU Interface Register accesses - these are not accessed by the VM, but by - * user space for saving and restoring VGIC state. - */ -static const struct kvm_mmio_range vgic_cpu_ranges[] = { - { - .base = GIC_CPU_CTRL, - .len = 12, - .handle_mmio = handle_cpu_mmio_misc, - }, - { - .base = GIC_CPU_ALIAS_BINPOINT, - .len = 4, - .handle_mmio = handle_mmio_abpr, - }, - { - .base = GIC_CPU_ACTIVEPRIO, - .len = 16, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GIC_CPU_IDENT, - .len = 4, - .handle_mmio = handle_cpu_mmio_ident, - }, -}; - -static int vgic_attr_regs_access(struct kvm_device *dev, - struct kvm_device_attr *attr, - u32 *reg, bool is_write) -{ - const struct kvm_mmio_range *r = NULL, *ranges; - phys_addr_t offset; - int ret, cpuid, c; - struct kvm_vcpu *vcpu, *tmp_vcpu; - struct vgic_dist *vgic; - struct kvm_exit_mmio mmio; - - offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; - cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> - KVM_DEV_ARM_VGIC_CPUID_SHIFT; - - mutex_lock(&dev->kvm->lock); - - ret = vgic_init(dev->kvm); - if (ret) - goto out; - - if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) { - ret = -EINVAL; - goto out; - } - - vcpu = kvm_get_vcpu(dev->kvm, cpuid); - vgic = &dev->kvm->arch.vgic; - - mmio.len = 4; - mmio.is_write = is_write; - if (is_write) - mmio_data_write(&mmio, ~0, *reg); - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - mmio.phys_addr = vgic->vgic_dist_base + offset; - ranges = vgic_dist_ranges; - break; - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: - mmio.phys_addr = vgic->vgic_cpu_base + offset; - ranges = vgic_cpu_ranges; - break; - default: - BUG(); - } - r = vgic_find_range(ranges, &mmio, offset); - - if (unlikely(!r || !r->handle_mmio)) { - ret = -ENXIO; - goto out; - } - - - spin_lock(&vgic->lock); - - /* - * Ensure that no other VCPU is running by checking the vcpu->cpu - * field. If no other VPCUs are running we can safely access the VGIC - * state, because even if another VPU is run after this point, that - * VCPU will not touch the vgic state, because it will block on - * getting the vgic->lock in kvm_vgic_sync_hwstate(). - */ - kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) { - if (unlikely(tmp_vcpu->cpu != -1)) { - ret = -EBUSY; - goto out_vgic_unlock; - } - } - - /* - * Move all pending IRQs from the LRs on all VCPUs so the pending - * state can be properly represented in the register state accessible - * through this API. - */ - kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) - vgic_unqueue_irqs(tmp_vcpu); - - offset -= r->base; - r->handle_mmio(vcpu, &mmio, offset); - - if (!is_write) - *reg = mmio_data_read(&mmio, ~0); - - ret = 0; -out_vgic_unlock: - spin_unlock(&vgic->lock); -out: - mutex_unlock(&dev->kvm->lock); - return ret; -} - int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { int r; @@ -2451,31 +1745,6 @@ int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr) return -ENXIO; } -static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) -{ - int ret; - - ret = vgic_set_common_attr(dev, attr); - if (ret != -ENXIO) - return ret; - - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u32 reg; - - if (get_user(reg, uaddr)) - return -EFAULT; - - return vgic_attr_regs_access(dev, attr, ®, true); - } - - } - - return -ENXIO; -} - int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { int r = -ENXIO; @@ -2506,31 +1775,6 @@ int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr) return r; } -static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) -{ - int ret; - - ret = vgic_get_common_attr(dev, attr); - if (ret != -ENXIO) - return ret; - - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u32 reg = 0; - - ret = vgic_attr_regs_access(dev, attr, ®, false); - if (ret) - return ret; - return put_user(reg, uaddr); - } - - } - - return -ENXIO; -} - int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset) { struct kvm_exit_mmio dev_attr_mmio; @@ -2542,54 +1786,6 @@ int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset) return -ENXIO; } -static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) -{ - phys_addr_t offset; - - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_ADDR: - switch (attr->attr) { - case KVM_VGIC_V2_ADDR_TYPE_DIST: - case KVM_VGIC_V2_ADDR_TYPE_CPU: - return 0; - } - break; - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; - return vgic_has_attr_regs(vgic_dist_ranges, offset); - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: - offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; - return vgic_has_attr_regs(vgic_cpu_ranges, offset); - case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: - return 0; - case KVM_DEV_ARM_VGIC_GRP_CTRL: - switch (attr->attr) { - case KVM_DEV_ARM_VGIC_CTRL_INIT: - return 0; - } - } - return -ENXIO; -} - -void vgic_destroy(struct kvm_device *dev) -{ - kfree(dev); -} - -int vgic_create(struct kvm_device *dev, u32 type) -{ - return kvm_vgic_create(dev->kvm, type); -} - -struct kvm_device_ops kvm_arm_vgic_v2_ops = { - .name = "kvm-arm-vgic", - .create = vgic_create, - .destroy = vgic_destroy, - .set_attr = vgic_set_attr, - .get_attr = vgic_get_attr, - .has_attr = vgic_has_attr, -}; - static void vgic_init_maintenance_interrupt(void *info) { enable_percpu_irq(vgic->maint_irq, 0); From 9fedf146778e6d1c26319ebaf56131a4f3a6be03 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 13 Nov 2014 16:21:35 +0000 Subject: [PATCH 28/36] arm/arm64: KVM: add opaque private pointer to MMIO data For a GICv2 there is always only one (v)CPU involved: the one that does the access. On a GICv3 the access to a CPU redistributor is memory-mapped, but not banked, so the (v)CPU affected is determined by looking at the MMIO address region being accessed. To allow passing the affected CPU into the accessors later, extend struct kvm_exit_mmio to add an opaque private pointer parameter. The current GICv2 emulation just does not use it. Signed-off-by: Andre Przywara Acked-by: Christoffer Dall Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmio.h | 1 + arch/arm64/include/asm/kvm_mmio.h | 1 + virt/kvm/arm/vgic.c | 1 + 3 files changed, 3 insertions(+) diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h index adcc0d7d3175..3f83db2f6cf0 100644 --- a/arch/arm/include/asm/kvm_mmio.h +++ b/arch/arm/include/asm/kvm_mmio.h @@ -37,6 +37,7 @@ struct kvm_exit_mmio { u8 data[8]; u32 len; bool is_write; + void *private; }; static inline void kvm_prepare_mmio(struct kvm_run *run, diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h index fc2f689c0694..9f52beb7cb13 100644 --- a/arch/arm64/include/asm/kvm_mmio.h +++ b/arch/arm64/include/asm/kvm_mmio.h @@ -40,6 +40,7 @@ struct kvm_exit_mmio { u8 data[8]; u32 len; bool is_write; + void *private; }; static inline void kvm_prepare_mmio(struct kvm_run *run, diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 0e84292c2197..b6e17c886ce2 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -709,6 +709,7 @@ static bool call_range_handler(struct kvm_vcpu *vcpu, mmio32.len = 4; mmio32.is_write = mmio->is_write; + mmio32.private = mmio->private; mmio32.phys_addr = mmio->phys_addr + 4; if (mmio->is_write) From a0675c25d6392c2197b796a60c4a2a0138c86355 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Sat, 7 Jun 2014 00:54:51 +0200 Subject: [PATCH 29/36] arm/arm64: KVM: add virtual GICv3 distributor emulation With everything separated and prepared, we implement a model of a GICv3 distributor and redistributors by using the existing framework to provide handler functions for each register group. Currently we limit the emulation to a model enforcing a single security state, with SRE==1 (forcing system register access) and ARE==1 (allowing more than 8 VCPUs). We share some of the functions provided for GICv2 emulation, but take the different ways of addressing (v)CPUs into account. Save and restore is currently not implemented. Similar to the split-off of the GICv2 specific code, the new emulation code goes into a new file (vgic-v3-emul.c). Signed-off-by: Andre Przywara Signed-off-by: Christoffer Dall --- arch/arm64/kvm/Makefile | 1 + include/kvm/arm_vgic.h | 9 +- include/linux/irqchip/arm-gic-v3.h | 32 + include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 2 + virt/kvm/arm/vgic-v3-emul.c | 922 +++++++++++++++++++++++++++++ virt/kvm/arm/vgic.c | 11 +- virt/kvm/arm/vgic.h | 3 + 8 files changed, 978 insertions(+), 3 deletions(-) create mode 100644 virt/kvm/arm/vgic-v3-emul.c diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index d9573533a873..4e6e09ee4033 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -24,5 +24,6 @@ kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o +kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3-emul.o kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index ff04afd0d901..98c30168bce4 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -162,7 +162,11 @@ struct vgic_dist { /* Distributor and vcpu interface mapping in the guest */ phys_addr_t vgic_dist_base; - phys_addr_t vgic_cpu_base; + /* GICv2 and GICv3 use different mapped register blocks */ + union { + phys_addr_t vgic_cpu_base; + phys_addr_t vgic_redist_base; + }; /* Distributor enabled */ u32 enabled; @@ -224,6 +228,9 @@ struct vgic_dist { */ struct vgic_bitmap *irq_spi_target; + /* Target MPIDR for each IRQ (needed for GICv3 IROUTERn) only */ + u32 *irq_spi_mpidr; + /* Bitmap indicating which CPU has something pending */ unsigned long *irq_pending_on_cpu; diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 1e8b0cf30792..3fb4d8588a26 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -33,6 +33,7 @@ #define GICD_SETSPI_SR 0x0050 #define GICD_CLRSPI_SR 0x0058 #define GICD_SEIR 0x0068 +#define GICD_IGROUPR 0x0080 #define GICD_ISENABLER 0x0100 #define GICD_ICENABLER 0x0180 #define GICD_ISPENDR 0x0200 @@ -41,14 +42,37 @@ #define GICD_ICACTIVER 0x0380 #define GICD_IPRIORITYR 0x0400 #define GICD_ICFGR 0x0C00 +#define GICD_IGRPMODR 0x0D00 +#define GICD_NSACR 0x0E00 #define GICD_IROUTER 0x6000 +#define GICD_IDREGS 0xFFD0 #define GICD_PIDR2 0xFFE8 +/* + * Those registers are actually from GICv2, but the spec demands that they + * are implemented as RES0 if ARE is 1 (which we do in KVM's emulated GICv3). + */ +#define GICD_ITARGETSR 0x0800 +#define GICD_SGIR 0x0F00 +#define GICD_CPENDSGIR 0x0F10 +#define GICD_SPENDSGIR 0x0F20 + #define GICD_CTLR_RWP (1U << 31) +#define GICD_CTLR_DS (1U << 6) #define GICD_CTLR_ARE_NS (1U << 4) #define GICD_CTLR_ENABLE_G1A (1U << 1) #define GICD_CTLR_ENABLE_G1 (1U << 0) +/* + * In systems with a single security state (what we emulate in KVM) + * the meaning of the interrupt group enable bits is slightly different + */ +#define GICD_CTLR_ENABLE_SS_G1 (1U << 1) +#define GICD_CTLR_ENABLE_SS_G0 (1U << 0) + +#define GICD_TYPER_LPIS (1U << 17) +#define GICD_TYPER_MBIS (1U << 16) + #define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1) #define GICD_TYPER_IRQS(typer) ((((typer) & 0x1f) + 1) * 32) #define GICD_TYPER_LPIS (1U << 17) @@ -60,6 +84,8 @@ #define GIC_PIDR2_ARCH_GICv3 0x30 #define GIC_PIDR2_ARCH_GICv4 0x40 +#define GIC_V3_DIST_SIZE 0x10000 + /* * Re-Distributor registers, offsets from RD_base */ @@ -78,6 +104,7 @@ #define GICR_SYNCR 0x00C0 #define GICR_MOVLPIR 0x0100 #define GICR_MOVALLR 0x0110 +#define GICR_IDREGS GICD_IDREGS #define GICR_PIDR2 GICD_PIDR2 #define GICR_CTLR_ENABLE_LPIS (1UL << 0) @@ -104,6 +131,7 @@ /* * Re-Distributor registers, offsets from SGI_base */ +#define GICR_IGROUPR0 GICD_IGROUPR #define GICR_ISENABLER0 GICD_ISENABLER #define GICR_ICENABLER0 GICD_ICENABLER #define GICR_ISPENDR0 GICD_ISPENDR @@ -112,11 +140,15 @@ #define GICR_ICACTIVER0 GICD_ICACTIVER #define GICR_IPRIORITYR0 GICD_IPRIORITYR #define GICR_ICFGR0 GICD_ICFGR +#define GICR_IGRPMODR0 GICD_IGRPMODR +#define GICR_NSACR GICD_NSACR #define GICR_TYPER_PLPIS (1U << 0) #define GICR_TYPER_VLPIS (1U << 1) #define GICR_TYPER_LAST (1U << 4) +#define GIC_V3_REDIST_SIZE 0x20000 + #define LPI_PROP_GROUP1 (1 << 1) #define LPI_PROP_ENABLED (1 << 0) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 25d7ce31a5d4..0ef2daa199d8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1052,6 +1052,7 @@ void kvm_unregister_device_ops(u32 type); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; extern struct kvm_device_ops kvm_arm_vgic_v2_ops; +extern struct kvm_device_ops kvm_arm_vgic_v3_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index a37fd1224f36..b4e6f1e70f03 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -952,6 +952,8 @@ enum kvm_device_type { #define KVM_DEV_TYPE_ARM_VGIC_V2 KVM_DEV_TYPE_ARM_VGIC_V2 KVM_DEV_TYPE_FLIC, #define KVM_DEV_TYPE_FLIC KVM_DEV_TYPE_FLIC + KVM_DEV_TYPE_ARM_VGIC_V3, +#define KVM_DEV_TYPE_ARM_VGIC_V3 KVM_DEV_TYPE_ARM_VGIC_V3 KVM_DEV_TYPE_MAX, }; diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c new file mode 100644 index 000000000000..8db1db597223 --- /dev/null +++ b/virt/kvm/arm/vgic-v3-emul.c @@ -0,0 +1,922 @@ +/* + * GICv3 distributor and redistributor emulation + * + * GICv3 emulation is currently only supported on a GICv3 host (because + * we rely on the hardware's CPU interface virtualization support), but + * supports both hardware with or without the optional GICv2 backwards + * compatibility features. + * + * Limitations of the emulation: + * (RAZ/WI: read as zero, write ignore, RAO/WI: read as one, write ignore) + * - We do not support LPIs (yet). TYPER.LPIS is reported as 0 and is RAZ/WI. + * - We do not support the message based interrupts (MBIs) triggered by + * writes to the GICD_{SET,CLR}SPI_* registers. TYPER.MBIS is reported as 0. + * - We do not support the (optional) backwards compatibility feature. + * GICD_CTLR.ARE resets to 1 and is RAO/WI. If the _host_ GIC supports + * the compatiblity feature, you can use a GICv2 in the guest, though. + * - We only support a single security state. GICD_CTLR.DS is 1 and is RAO/WI. + * - Priorities are not emulated (same as the GICv2 emulation). Linux + * as a guest is fine with this, because it does not use priorities. + * - We only support Group1 interrupts. Again Linux uses only those. + * + * Copyright (C) 2014 ARM Ltd. + * Author: Andre Przywara + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include "vgic.h" + +static bool handle_mmio_rao_wi(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, phys_addr_t offset) +{ + u32 reg = 0xffffffff; + + vgic_reg_access(mmio, ®, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + + return false; +} + +static bool handle_mmio_ctlr(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, phys_addr_t offset) +{ + u32 reg = 0; + + /* + * Force ARE and DS to 1, the guest cannot change this. + * For the time being we only support Group1 interrupts. + */ + if (vcpu->kvm->arch.vgic.enabled) + reg = GICD_CTLR_ENABLE_SS_G1; + reg |= GICD_CTLR_ARE_NS | GICD_CTLR_DS; + + vgic_reg_access(mmio, ®, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); + if (mmio->is_write) { + if (reg & GICD_CTLR_ENABLE_SS_G0) + kvm_info("guest tried to enable unsupported Group0 interrupts\n"); + vcpu->kvm->arch.vgic.enabled = !!(reg & GICD_CTLR_ENABLE_SS_G1); + vgic_update_state(vcpu->kvm); + return true; + } + return false; +} + +/* + * As this implementation does not provide compatibility + * with GICv2 (ARE==1), we report zero CPUs in bits [5..7]. + * Also LPIs and MBIs are not supported, so we set the respective bits to 0. + * Also we report at most 2**10=1024 interrupt IDs (to match 1024 SPIs). + */ +#define INTERRUPT_ID_BITS 10 +static bool handle_mmio_typer(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, phys_addr_t offset) +{ + u32 reg; + + reg = (min(vcpu->kvm->arch.vgic.nr_irqs, 1024) >> 5) - 1; + + reg |= (INTERRUPT_ID_BITS - 1) << 19; + + vgic_reg_access(mmio, ®, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + + return false; +} + +static bool handle_mmio_iidr(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, phys_addr_t offset) +{ + u32 reg; + + reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); + vgic_reg_access(mmio, ®, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + + return false; +} + +static bool handle_mmio_set_enable_reg_dist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8)) + return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, + vcpu->vcpu_id, + ACCESS_WRITE_SETBIT); + + vgic_reg_access(mmio, NULL, offset, + ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); + return false; +} + +static bool handle_mmio_clear_enable_reg_dist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8)) + return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, + vcpu->vcpu_id, + ACCESS_WRITE_CLEARBIT); + + vgic_reg_access(mmio, NULL, offset, + ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); + return false; +} + +static bool handle_mmio_set_pending_reg_dist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8)) + return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset, + vcpu->vcpu_id); + + vgic_reg_access(mmio, NULL, offset, + ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); + return false; +} + +static bool handle_mmio_clear_pending_reg_dist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8)) + return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset, + vcpu->vcpu_id); + + vgic_reg_access(mmio, NULL, offset, + ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); + return false; +} + +static bool handle_mmio_priority_reg_dist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 *reg; + + if (unlikely(offset < VGIC_NR_PRIVATE_IRQS)) { + vgic_reg_access(mmio, NULL, offset, + ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); + return false; + } + + reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority, + vcpu->vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); + return false; +} + +static bool handle_mmio_cfg_reg_dist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 *reg; + + if (unlikely(offset < VGIC_NR_PRIVATE_IRQS / 4)) { + vgic_reg_access(mmio, NULL, offset, + ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); + return false; + } + + reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, + vcpu->vcpu_id, offset >> 1); + + return vgic_handle_cfg_reg(reg, mmio, offset); +} + +/* + * We use a compressed version of the MPIDR (all 32 bits in one 32-bit word) + * when we store the target MPIDR written by the guest. + */ +static u32 compress_mpidr(unsigned long mpidr) +{ + u32 ret; + + ret = MPIDR_AFFINITY_LEVEL(mpidr, 0); + ret |= MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8; + ret |= MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16; + ret |= MPIDR_AFFINITY_LEVEL(mpidr, 3) << 24; + + return ret; +} + +static unsigned long uncompress_mpidr(u32 value) +{ + unsigned long mpidr; + + mpidr = ((value >> 0) & 0xFF) << MPIDR_LEVEL_SHIFT(0); + mpidr |= ((value >> 8) & 0xFF) << MPIDR_LEVEL_SHIFT(1); + mpidr |= ((value >> 16) & 0xFF) << MPIDR_LEVEL_SHIFT(2); + mpidr |= (u64)((value >> 24) & 0xFF) << MPIDR_LEVEL_SHIFT(3); + + return mpidr; +} + +/* + * Lookup the given MPIDR value to get the vcpu_id (if there is one) + * and store that in the irq_spi_cpu[] array. + * This limits the number of VCPUs to 255 for now, extending the data + * type (or storing kvm_vcpu pointers) should lift the limit. + * Store the original MPIDR value in an extra array to support read-as-written. + * Unallocated MPIDRs are translated to a special value and caught + * before any array accesses. + */ +static bool handle_mmio_route_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + struct kvm *kvm = vcpu->kvm; + struct vgic_dist *dist = &kvm->arch.vgic; + int spi; + u32 reg; + int vcpu_id; + unsigned long *bmap, mpidr; + + /* + * The upper 32 bits of each 64 bit register are zero, + * as we don't support Aff3. + */ + if ((offset & 4)) { + vgic_reg_access(mmio, NULL, offset, + ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); + return false; + } + + /* This region only covers SPIs, so no handling of private IRQs here. */ + spi = offset / 8; + + /* get the stored MPIDR for this IRQ */ + mpidr = uncompress_mpidr(dist->irq_spi_mpidr[spi]); + reg = mpidr; + + vgic_reg_access(mmio, ®, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); + + if (!mmio->is_write) + return false; + + /* + * Now clear the currently assigned vCPU from the map, making room + * for the new one to be written below + */ + vcpu = kvm_mpidr_to_vcpu(kvm, mpidr); + if (likely(vcpu)) { + vcpu_id = vcpu->vcpu_id; + bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]); + __clear_bit(spi, bmap); + } + + dist->irq_spi_mpidr[spi] = compress_mpidr(reg); + vcpu = kvm_mpidr_to_vcpu(kvm, reg & MPIDR_HWID_BITMASK); + + /* + * The spec says that non-existent MPIDR values should not be + * forwarded to any existent (v)CPU, but should be able to become + * pending anyway. We simply keep the irq_spi_target[] array empty, so + * the interrupt will never be injected. + * irq_spi_cpu[irq] gets a magic value in this case. + */ + if (likely(vcpu)) { + vcpu_id = vcpu->vcpu_id; + dist->irq_spi_cpu[spi] = vcpu_id; + bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]); + __set_bit(spi, bmap); + } else { + dist->irq_spi_cpu[spi] = VCPU_NOT_ALLOCATED; + } + + vgic_update_state(kvm); + + return true; +} + +/* + * We should be careful about promising too much when a guest reads + * this register. Don't claim to be like any hardware implementation, + * but just report the GIC as version 3 - which is what a Linux guest + * would check. + */ +static bool handle_mmio_idregs(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 reg = 0; + + switch (offset + GICD_IDREGS) { + case GICD_PIDR2: + reg = 0x3b; + break; + } + + vgic_reg_access(mmio, ®, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + + return false; +} + +static const struct kvm_mmio_range vgic_v3_dist_ranges[] = { + { + .base = GICD_CTLR, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_ctlr, + }, + { + .base = GICD_TYPER, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_typer, + }, + { + .base = GICD_IIDR, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_iidr, + }, + { + /* this register is optional, it is RAZ/WI if not implemented */ + .base = GICD_STATUSR, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_raz_wi, + }, + { + /* this write only register is WI when TYPER.MBIS=0 */ + .base = GICD_SETSPI_NSR, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_raz_wi, + }, + { + /* this write only register is WI when TYPER.MBIS=0 */ + .base = GICD_CLRSPI_NSR, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_raz_wi, + }, + { + /* this is RAZ/WI when DS=1 */ + .base = GICD_SETSPI_SR, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_raz_wi, + }, + { + /* this is RAZ/WI when DS=1 */ + .base = GICD_CLRSPI_SR, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_raz_wi, + }, + { + .base = GICD_IGROUPR, + .len = 0x80, + .bits_per_irq = 1, + .handle_mmio = handle_mmio_rao_wi, + }, + { + .base = GICD_ISENABLER, + .len = 0x80, + .bits_per_irq = 1, + .handle_mmio = handle_mmio_set_enable_reg_dist, + }, + { + .base = GICD_ICENABLER, + .len = 0x80, + .bits_per_irq = 1, + .handle_mmio = handle_mmio_clear_enable_reg_dist, + }, + { + .base = GICD_ISPENDR, + .len = 0x80, + .bits_per_irq = 1, + .handle_mmio = handle_mmio_set_pending_reg_dist, + }, + { + .base = GICD_ICPENDR, + .len = 0x80, + .bits_per_irq = 1, + .handle_mmio = handle_mmio_clear_pending_reg_dist, + }, + { + .base = GICD_ISACTIVER, + .len = 0x80, + .bits_per_irq = 1, + .handle_mmio = handle_mmio_raz_wi, + }, + { + .base = GICD_ICACTIVER, + .len = 0x80, + .bits_per_irq = 1, + .handle_mmio = handle_mmio_raz_wi, + }, + { + .base = GICD_IPRIORITYR, + .len = 0x400, + .bits_per_irq = 8, + .handle_mmio = handle_mmio_priority_reg_dist, + }, + { + /* TARGETSRn is RES0 when ARE=1 */ + .base = GICD_ITARGETSR, + .len = 0x400, + .bits_per_irq = 8, + .handle_mmio = handle_mmio_raz_wi, + }, + { + .base = GICD_ICFGR, + .len = 0x100, + .bits_per_irq = 2, + .handle_mmio = handle_mmio_cfg_reg_dist, + }, + { + /* this is RAZ/WI when DS=1 */ + .base = GICD_IGRPMODR, + .len = 0x80, + .bits_per_irq = 1, + .handle_mmio = handle_mmio_raz_wi, + }, + { + /* this is RAZ/WI when DS=1 */ + .base = GICD_NSACR, + .len = 0x100, + .bits_per_irq = 2, + .handle_mmio = handle_mmio_raz_wi, + }, + { + /* this is RAZ/WI when ARE=1 */ + .base = GICD_SGIR, + .len = 0x04, + .handle_mmio = handle_mmio_raz_wi, + }, + { + /* this is RAZ/WI when ARE=1 */ + .base = GICD_CPENDSGIR, + .len = 0x10, + .handle_mmio = handle_mmio_raz_wi, + }, + { + /* this is RAZ/WI when ARE=1 */ + .base = GICD_SPENDSGIR, + .len = 0x10, + .handle_mmio = handle_mmio_raz_wi, + }, + { + .base = GICD_IROUTER + 0x100, + .len = 0x1ee0, + .bits_per_irq = 64, + .handle_mmio = handle_mmio_route_reg, + }, + { + .base = GICD_IDREGS, + .len = 0x30, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_idregs, + }, + {}, +}; + +static bool handle_mmio_set_enable_reg_redist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + struct kvm_vcpu *redist_vcpu = mmio->private; + + return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, + redist_vcpu->vcpu_id, + ACCESS_WRITE_SETBIT); +} + +static bool handle_mmio_clear_enable_reg_redist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + struct kvm_vcpu *redist_vcpu = mmio->private; + + return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, + redist_vcpu->vcpu_id, + ACCESS_WRITE_CLEARBIT); +} + +static bool handle_mmio_set_pending_reg_redist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + struct kvm_vcpu *redist_vcpu = mmio->private; + + return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset, + redist_vcpu->vcpu_id); +} + +static bool handle_mmio_clear_pending_reg_redist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + struct kvm_vcpu *redist_vcpu = mmio->private; + + return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset, + redist_vcpu->vcpu_id); +} + +static bool handle_mmio_priority_reg_redist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + struct kvm_vcpu *redist_vcpu = mmio->private; + u32 *reg; + + reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority, + redist_vcpu->vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); + return false; +} + +static bool handle_mmio_cfg_reg_redist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + struct kvm_vcpu *redist_vcpu = mmio->private; + + u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, + redist_vcpu->vcpu_id, offset >> 1); + + return vgic_handle_cfg_reg(reg, mmio, offset); +} + +static const struct kvm_mmio_range vgic_redist_sgi_ranges[] = { + { + .base = GICR_IGROUPR0, + .len = 0x04, + .bits_per_irq = 1, + .handle_mmio = handle_mmio_rao_wi, + }, + { + .base = GICR_ISENABLER0, + .len = 0x04, + .bits_per_irq = 1, + .handle_mmio = handle_mmio_set_enable_reg_redist, + }, + { + .base = GICR_ICENABLER0, + .len = 0x04, + .bits_per_irq = 1, + .handle_mmio = handle_mmio_clear_enable_reg_redist, + }, + { + .base = GICR_ISPENDR0, + .len = 0x04, + .bits_per_irq = 1, + .handle_mmio = handle_mmio_set_pending_reg_redist, + }, + { + .base = GICR_ICPENDR0, + .len = 0x04, + .bits_per_irq = 1, + .handle_mmio = handle_mmio_clear_pending_reg_redist, + }, + { + .base = GICR_ISACTIVER0, + .len = 0x04, + .bits_per_irq = 1, + .handle_mmio = handle_mmio_raz_wi, + }, + { + .base = GICR_ICACTIVER0, + .len = 0x04, + .bits_per_irq = 1, + .handle_mmio = handle_mmio_raz_wi, + }, + { + .base = GICR_IPRIORITYR0, + .len = 0x20, + .bits_per_irq = 8, + .handle_mmio = handle_mmio_priority_reg_redist, + }, + { + .base = GICR_ICFGR0, + .len = 0x08, + .bits_per_irq = 2, + .handle_mmio = handle_mmio_cfg_reg_redist, + }, + { + .base = GICR_IGRPMODR0, + .len = 0x04, + .bits_per_irq = 1, + .handle_mmio = handle_mmio_raz_wi, + }, + { + .base = GICR_NSACR, + .len = 0x04, + .handle_mmio = handle_mmio_raz_wi, + }, + {}, +}; + +static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + /* since we don't support LPIs, this register is zero for now */ + vgic_reg_access(mmio, NULL, offset, + ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); + return false; +} + +static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 reg; + u64 mpidr; + struct kvm_vcpu *redist_vcpu = mmio->private; + int target_vcpu_id = redist_vcpu->vcpu_id; + + /* the upper 32 bits contain the affinity value */ + if ((offset & ~3) == 4) { + mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu); + reg = compress_mpidr(mpidr); + + vgic_reg_access(mmio, ®, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + return false; + } + + reg = redist_vcpu->vcpu_id << 8; + if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1) + reg |= GICR_TYPER_LAST; + vgic_reg_access(mmio, ®, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + return false; +} + +static const struct kvm_mmio_range vgic_redist_ranges[] = { + { + .base = GICR_CTLR, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_ctlr_redist, + }, + { + .base = GICR_TYPER, + .len = 0x08, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_typer_redist, + }, + { + .base = GICR_IIDR, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_iidr, + }, + { + .base = GICR_WAKER, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_raz_wi, + }, + { + .base = GICR_IDREGS, + .len = 0x30, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_idregs, + }, + {}, +}; + +/* + * This function splits accesses between the distributor and the two + * redistributor parts (private/SPI). As each redistributor is accessible + * from any CPU, we have to determine the affected VCPU by taking the faulting + * address into account. We then pass this VCPU to the handler function via + * the private parameter. + */ +#define SGI_BASE_OFFSET SZ_64K +static bool vgic_v3_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_exit_mmio *mmio) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + unsigned long dbase = dist->vgic_dist_base; + unsigned long rdbase = dist->vgic_redist_base; + int nrcpus = atomic_read(&vcpu->kvm->online_vcpus); + int vcpu_id; + const struct kvm_mmio_range *mmio_range; + + if (is_in_range(mmio->phys_addr, mmio->len, dbase, GIC_V3_DIST_SIZE)) { + return vgic_handle_mmio_range(vcpu, run, mmio, + vgic_v3_dist_ranges, dbase); + } + + if (!is_in_range(mmio->phys_addr, mmio->len, rdbase, + GIC_V3_REDIST_SIZE * nrcpus)) + return false; + + vcpu_id = (mmio->phys_addr - rdbase) / GIC_V3_REDIST_SIZE; + rdbase += (vcpu_id * GIC_V3_REDIST_SIZE); + mmio->private = kvm_get_vcpu(vcpu->kvm, vcpu_id); + + if (mmio->phys_addr >= rdbase + SGI_BASE_OFFSET) { + rdbase += SGI_BASE_OFFSET; + mmio_range = vgic_redist_sgi_ranges; + } else { + mmio_range = vgic_redist_ranges; + } + return vgic_handle_mmio_range(vcpu, run, mmio, mmio_range, rdbase); +} + +static bool vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, int irq) +{ + if (vgic_queue_irq(vcpu, 0, irq)) { + vgic_dist_irq_clear_pending(vcpu, irq); + vgic_cpu_irq_clear(vcpu, irq); + return true; + } + + return false; +} + +static int vgic_v3_map_resources(struct kvm *kvm, + const struct vgic_params *params) +{ + int ret = 0; + struct vgic_dist *dist = &kvm->arch.vgic; + + if (!irqchip_in_kernel(kvm)) + return 0; + + mutex_lock(&kvm->lock); + + if (vgic_ready(kvm)) + goto out; + + if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || + IS_VGIC_ADDR_UNDEF(dist->vgic_redist_base)) { + kvm_err("Need to set vgic distributor addresses first\n"); + ret = -ENXIO; + goto out; + } + + /* + * For a VGICv3 we require the userland to explicitly initialize + * the VGIC before we need to use it. + */ + if (!vgic_initialized(kvm)) { + ret = -EBUSY; + goto out; + } + + kvm->arch.vgic.ready = true; +out: + if (ret) + kvm_vgic_destroy(kvm); + mutex_unlock(&kvm->lock); + return ret; +} + +static int vgic_v3_init_model(struct kvm *kvm) +{ + int i; + u32 mpidr; + struct vgic_dist *dist = &kvm->arch.vgic; + int nr_spis = dist->nr_irqs - VGIC_NR_PRIVATE_IRQS; + + dist->irq_spi_mpidr = kcalloc(nr_spis, sizeof(dist->irq_spi_mpidr[0]), + GFP_KERNEL); + + if (!dist->irq_spi_mpidr) + return -ENOMEM; + + /* Initialize the target VCPUs for each IRQ to VCPU 0 */ + mpidr = compress_mpidr(kvm_vcpu_get_mpidr_aff(kvm_get_vcpu(kvm, 0))); + for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i++) { + dist->irq_spi_cpu[i - VGIC_NR_PRIVATE_IRQS] = 0; + dist->irq_spi_mpidr[i - VGIC_NR_PRIVATE_IRQS] = mpidr; + vgic_bitmap_set_irq_val(dist->irq_spi_target, 0, i, 1); + } + + return 0; +} + +/* GICv3 does not keep track of SGI sources anymore. */ +static void vgic_v3_add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source) +{ +} + +void vgic_v3_init_emulation(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + + dist->vm_ops.handle_mmio = vgic_v3_handle_mmio; + dist->vm_ops.queue_sgi = vgic_v3_queue_sgi; + dist->vm_ops.add_sgi_source = vgic_v3_add_sgi_source; + dist->vm_ops.init_model = vgic_v3_init_model; + dist->vm_ops.map_resources = vgic_v3_map_resources; + + kvm->arch.max_vcpus = KVM_MAX_VCPUS; +} + +static int vgic_v3_create(struct kvm_device *dev, u32 type) +{ + return kvm_vgic_create(dev->kvm, type); +} + +static void vgic_v3_destroy(struct kvm_device *dev) +{ + kfree(dev); +} + +static int vgic_v3_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + int ret; + + ret = vgic_set_common_attr(dev, attr); + if (ret != -ENXIO) + return ret; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + return -ENXIO; + } + + return -ENXIO; +} + +static int vgic_v3_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + int ret; + + ret = vgic_get_common_attr(dev, attr); + if (ret != -ENXIO) + return ret; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + return -ENXIO; + } + + return -ENXIO; +} + +static int vgic_v3_has_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: + switch (attr->attr) { + case KVM_VGIC_V2_ADDR_TYPE_DIST: + case KVM_VGIC_V2_ADDR_TYPE_CPU: + return -ENXIO; + } + break; + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + return -ENXIO; + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: + return 0; + case KVM_DEV_ARM_VGIC_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_CTRL_INIT: + return 0; + } + } + return -ENXIO; +} + +struct kvm_device_ops kvm_arm_vgic_v3_ops = { + .name = "kvm-arm-vgic-v3", + .create = vgic_v3_create, + .destroy = vgic_v3_destroy, + .set_attr = vgic_v3_set_attr, + .get_attr = vgic_v3_get_attr, + .has_attr = vgic_v3_has_attr, +}; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index b6e17c886ce2..6d23e57c3561 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1249,7 +1249,7 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, struct kvm_vcpu *vcpu; int edge_triggered, level_triggered; int enabled; - bool ret = true; + bool ret = true, can_inject = true; spin_lock(&dist->lock); @@ -1264,6 +1264,11 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, if (irq_num >= VGIC_NR_PRIVATE_IRQS) { cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS]; + if (cpuid == VCPU_NOT_ALLOCATED) { + /* Pretend we use CPU0, and prevent injection */ + cpuid = 0; + can_inject = false; + } vcpu = kvm_get_vcpu(kvm, cpuid); } @@ -1286,7 +1291,7 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, enabled = vgic_irq_is_enabled(vcpu, irq_num); - if (!enabled) { + if (!enabled || !can_inject) { ret = false; goto out; } @@ -1439,6 +1444,7 @@ void kvm_vgic_destroy(struct kvm *kvm) } kfree(dist->irq_sgi_sources); kfree(dist->irq_spi_cpu); + kfree(dist->irq_spi_mpidr); kfree(dist->irq_spi_target); kfree(dist->irq_pending_on_cpu); dist->irq_sgi_sources = NULL; @@ -1594,6 +1600,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) kvm->arch.vgic.vctrl_base = vgic->vctrl_base; kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; + kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF; out_unlock: for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { diff --git a/virt/kvm/arm/vgic.h b/virt/kvm/arm/vgic.h index e363b9341873..1e83bdf5f499 100644 --- a/virt/kvm/arm/vgic.h +++ b/virt/kvm/arm/vgic.h @@ -35,6 +35,8 @@ #define ACCESS_WRITE_VALUE (3 << 1) #define ACCESS_WRITE_MASK(x) ((x) & (3 << 1)) +#define VCPU_NOT_ALLOCATED ((u8)-1) + unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x); void vgic_update_state(struct kvm *kvm); @@ -116,5 +118,6 @@ int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr); int vgic_init(struct kvm *kvm); void vgic_v2_init_emulation(struct kvm *kvm); +void vgic_v3_init_emulation(struct kvm *kvm); #endif From 7e5802781c3e109558ddfd8b02155ad24d872ee7 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Wed, 12 Nov 2014 13:46:06 +0000 Subject: [PATCH 30/36] arm64: GICv3: introduce symbolic names for GICv3 ICC_SGI1R_EL1 fields The gic_send_sgi() function used hardcoded bit shift values to generate the ICC_SGI1R_EL1 register value. Replace this with symbolic names to allow reusing them later. Signed-off-by: Andre Przywara Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- drivers/irqchip/irq-gic-v3.c | 14 +++++++++----- include/linux/irqchip/arm-gic-v3.h | 12 ++++++++++++ 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 1a146ccee701..2ab290bec655 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -481,15 +481,19 @@ out: return tlist; } +#define MPIDR_TO_SGI_AFFINITY(cluster_id, level) \ + (MPIDR_AFFINITY_LEVEL(cluster_id, level) \ + << ICC_SGI1R_AFFINITY_## level ##_SHIFT) + static void gic_send_sgi(u64 cluster_id, u16 tlist, unsigned int irq) { u64 val; - val = (MPIDR_AFFINITY_LEVEL(cluster_id, 3) << 48 | - MPIDR_AFFINITY_LEVEL(cluster_id, 2) << 32 | - irq << 24 | - MPIDR_AFFINITY_LEVEL(cluster_id, 1) << 16 | - tlist); + val = (MPIDR_TO_SGI_AFFINITY(cluster_id, 3) | + MPIDR_TO_SGI_AFFINITY(cluster_id, 2) | + irq << ICC_SGI1R_SGI_ID_SHIFT | + MPIDR_TO_SGI_AFFINITY(cluster_id, 1) | + tlist << ICC_SGI1R_TARGET_LIST_SHIFT); pr_debug("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val); gic_write_sgi1r(val); diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 3fb4d8588a26..800544bc7bfd 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -280,6 +280,18 @@ #define ICC_SRE_EL2_SRE (1 << 0) #define ICC_SRE_EL2_ENABLE (1 << 3) +#define ICC_SGI1R_TARGET_LIST_SHIFT 0 +#define ICC_SGI1R_TARGET_LIST_MASK (0xffff << ICC_SGI1R_TARGET_LIST_SHIFT) +#define ICC_SGI1R_AFFINITY_1_SHIFT 16 +#define ICC_SGI1R_AFFINITY_1_MASK (0xff << ICC_SGI1R_AFFINITY_1_SHIFT) +#define ICC_SGI1R_SGI_ID_SHIFT 24 +#define ICC_SGI1R_SGI_ID_MASK (0xff << ICC_SGI1R_SGI_ID_SHIFT) +#define ICC_SGI1R_AFFINITY_2_SHIFT 32 +#define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_1_SHIFT) +#define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40 +#define ICC_SGI1R_AFFINITY_3_SHIFT 48 +#define ICC_SGI1R_AFFINITY_3_MASK (0xffULL << ICC_SGI1R_AFFINITY_1_SHIFT) + /* * System register definitions */ From 6d52f35af10cf24d59b43f3fd8c938ad23cab543 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 3 Jun 2014 10:13:13 +0200 Subject: [PATCH 31/36] arm64: KVM: add SGI generation register emulation While the generation of a (virtual) inter-processor interrupt (SGI) on a GICv2 works by writing to a MMIO register, GICv3 uses the system register ICC_SGI1R_EL1 to trigger them. Add a trap handler function that calls the new SGI register handler in the GICv3 code. As ICC_SRE_EL1.SRE at this point is still always 0, this will not trap yet, but will only be used later when all the data structures have been initialized properly. Signed-off-by: Andre Przywara Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- arch/arm64/kvm/sys_regs.c | 27 +++++++++ include/kvm/arm_vgic.h | 1 + virt/kvm/arm/vgic-v3-emul.c | 111 ++++++++++++++++++++++++++++++++++++ 3 files changed, 139 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 136e6797676b..8c30f265198e 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -165,6 +165,27 @@ static bool access_sctlr(struct kvm_vcpu *vcpu, return true; } +/* + * Trap handler for the GICv3 SGI generation system register. + * Forward the request to the VGIC emulation. + * The cp15_64 code makes sure this automatically works + * for both AArch64 and AArch32 accesses. + */ +static bool access_gic_sgi(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u64 val; + + if (!p->is_write) + return read_from_write_only(vcpu, p); + + val = *vcpu_reg(vcpu, p->Rt); + vgic_v3_dispatch_sgi(vcpu, val); + + return true; +} + static bool trap_raz_wi(struct kvm_vcpu *vcpu, const struct sys_reg_params *p, const struct sys_reg_desc *r) @@ -434,6 +455,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000), NULL, reset_val, VBAR_EL1, 0 }, + /* ICC_SGI1R_EL1 */ + { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1011), Op2(0b101), + access_gic_sgi }, /* ICC_SRE_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1100), Op2(0b101), trap_raz_wi }, @@ -666,6 +690,8 @@ static const struct sys_reg_desc cp14_64_regs[] = { * register). */ static const struct sys_reg_desc cp15_regs[] = { + { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, + { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR }, { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, @@ -713,6 +739,7 @@ static const struct sys_reg_desc cp15_regs[] = { static const struct sys_reg_desc cp15_64_regs[] = { { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, + { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, }; diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 98c30168bce4..b9b2e05a39ef 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -307,6 +307,7 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level); +void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_exit_mmio *mmio); diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c index 8db1db597223..2d2199d85b74 100644 --- a/virt/kvm/arm/vgic-v3-emul.c +++ b/virt/kvm/arm/vgic-v3-emul.c @@ -841,6 +841,117 @@ void vgic_v3_init_emulation(struct kvm *kvm) kvm->arch.max_vcpus = KVM_MAX_VCPUS; } +/* + * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI + * generation register ICC_SGI1R_EL1) with a given VCPU. + * If the VCPU's MPIDR matches, return the level0 affinity, otherwise + * return -1. + */ +static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu) +{ + unsigned long affinity; + int level0; + + /* + * Split the current VCPU's MPIDR into affinity level 0 and the + * rest as this is what we have to compare against. + */ + affinity = kvm_vcpu_get_mpidr_aff(vcpu); + level0 = MPIDR_AFFINITY_LEVEL(affinity, 0); + affinity &= ~MPIDR_LEVEL_MASK; + + /* bail out if the upper three levels don't match */ + if (sgi_aff != affinity) + return -1; + + /* Is this VCPU's bit set in the mask ? */ + if (!(sgi_cpu_mask & BIT(level0))) + return -1; + + return level0; +} + +#define SGI_AFFINITY_LEVEL(reg, level) \ + ((((reg) & ICC_SGI1R_AFFINITY_## level ##_MASK) \ + >> ICC_SGI1R_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level)) + +/** + * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs + * @vcpu: The VCPU requesting a SGI + * @reg: The value written into the ICC_SGI1R_EL1 register by that VCPU + * + * With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register. + * This will trap in sys_regs.c and call this function. + * This ICC_SGI1R_EL1 register contains the upper three affinity levels of the + * target processors as well as a bitmask of 16 Aff0 CPUs. + * If the interrupt routing mode bit is not set, we iterate over all VCPUs to + * check for matching ones. If this bit is set, we signal all, but not the + * calling VCPU. + */ +void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *c_vcpu; + struct vgic_dist *dist = &kvm->arch.vgic; + u16 target_cpus; + u64 mpidr; + int sgi, c; + int vcpu_id = vcpu->vcpu_id; + bool broadcast; + int updated = 0; + + sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT; + broadcast = reg & BIT(ICC_SGI1R_IRQ_ROUTING_MODE_BIT); + target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT; + mpidr = SGI_AFFINITY_LEVEL(reg, 3); + mpidr |= SGI_AFFINITY_LEVEL(reg, 2); + mpidr |= SGI_AFFINITY_LEVEL(reg, 1); + + /* + * We take the dist lock here, because we come from the sysregs + * code path and not from the MMIO one (which already takes the lock). + */ + spin_lock(&dist->lock); + + /* + * We iterate over all VCPUs to find the MPIDRs matching the request. + * If we have handled one CPU, we clear it's bit to detect early + * if we are already finished. This avoids iterating through all + * VCPUs when most of the times we just signal a single VCPU. + */ + kvm_for_each_vcpu(c, c_vcpu, kvm) { + + /* Exit early if we have dealt with all requested CPUs */ + if (!broadcast && target_cpus == 0) + break; + + /* Don't signal the calling VCPU */ + if (broadcast && c == vcpu_id) + continue; + + if (!broadcast) { + int level0; + + level0 = match_mpidr(mpidr, target_cpus, c_vcpu); + if (level0 == -1) + continue; + + /* remove this matching VCPU from the mask */ + target_cpus &= ~BIT(level0); + } + + /* Flag the SGI as pending */ + vgic_dist_irq_set_pending(c_vcpu, sgi); + updated = 1; + kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); + } + if (updated) + vgic_update_state(vcpu->kvm); + spin_unlock(&dist->lock); + if (updated) + vgic_kick_vcpus(vcpu->kvm); +} + static int vgic_v3_create(struct kvm_device *dev, u32 type) { return kvm_vgic_create(dev->kvm, type); From b5d84ff600a244b655bd4f657f5350f29b0ce611 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 3 Jun 2014 10:26:03 +0200 Subject: [PATCH 32/36] arm/arm64: KVM: enable kernel side of GICv3 emulation With all the necessary GICv3 emulation code in place, we can now connect the code to the GICv3 backend in the kernel. The LR register handling is different depending on the emulated GIC model, so provide different implementations for each. Also allow non-v2-compatible GICv3 implementations (which don't provide MMIO regions for the virtual CPU interface in the DT), but restrict those hosts to support GICv3 guests only. If the device tree provides a GICv2 compatible GICV resource entry, but that one is faulty, just disable the GICv2 emulation and let the user use at least the GICv3 emulation for guests. To provide proper support for the legacy KVM_CREATE_IRQCHIP ioctl, note virtual GICv2 compatibility in struct vgic_params and use it on creating a VGICv2. Signed-off-by: Andre Przywara Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 2 ++ virt/kvm/arm/vgic-v2.c | 1 + virt/kvm/arm/vgic-v3.c | 76 ++++++++++++++++++++++++++++-------------- virt/kvm/arm/vgic.c | 14 ++++++++ 4 files changed, 68 insertions(+), 25 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index b9b2e05a39ef..39039d5f09a8 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -134,6 +134,8 @@ struct vgic_params { /* Virtual control interface base address */ void __iomem *vctrl_base; int max_gic_vcpus; + /* Only needed for the legacy KVM_CREATE_IRQCHIP */ + bool can_emulate_gicv2; }; struct vgic_vm_ops { diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index e8b82b289844..a0a7b5d1a070 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c @@ -229,6 +229,7 @@ int vgic_v2_probe(struct device_node *vgic_node, goto out_unmap; } + vgic->can_emulate_gicv2 = true; kvm_register_device_ops(&kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2); vgic->vcpu_base = vcpu_res.start; diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index 52490480b6f9..3a62d8a9a2c6 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c @@ -34,6 +34,7 @@ #define GICH_LR_VIRTUALID (0x3ffUL << 0) #define GICH_LR_PHYSID_CPUID_SHIFT (10) #define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT) +#define ICH_LR_VIRTUALID_MASK (BIT_ULL(32) - 1) /* * LRs are stored in reverse order in memory. make sure we index them @@ -48,12 +49,17 @@ static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr) struct vgic_lr lr_desc; u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)]; - lr_desc.irq = val & GICH_LR_VIRTUALID; - if (lr_desc.irq <= 15) - lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7; + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) + lr_desc.irq = val & ICH_LR_VIRTUALID_MASK; else - lr_desc.source = 0; - lr_desc.state = 0; + lr_desc.irq = val & GICH_LR_VIRTUALID; + + lr_desc.source = 0; + if (lr_desc.irq <= 15 && + vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) + lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7; + + lr_desc.state = 0; if (val & ICH_LR_PENDING_BIT) lr_desc.state |= LR_STATE_PENDING; @@ -68,8 +74,20 @@ static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr) static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc) { - u64 lr_val = (((u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | - lr_desc.irq); + u64 lr_val; + + lr_val = lr_desc.irq; + + /* + * Currently all guest IRQs are Group1, as Group0 would result + * in a FIQ in the guest, which it wouldn't expect. + * Eventually we want to make this configurable, so we may revisit + * this in the future. + */ + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) + lr_val |= ICH_LR_GROUP; + else + lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT; if (lr_desc.state & LR_STATE_PENDING) lr_val |= ICH_LR_PENDING_BIT; @@ -154,7 +172,15 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu) */ vgic_v3->vgic_vmcr = 0; - vgic_v3->vgic_sre = 0; + /* + * If we are emulating a GICv3, we do it in an non-GICv2-compatible + * way, so we force SRE to 1 to demonstrate this to the guest. + * This goes with the spec allowing the value to be RAO/WI. + */ + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) + vgic_v3->vgic_sre = ICC_SRE_EL1_SRE; + else + vgic_v3->vgic_sre = 0; /* Get the show on the road... */ vgic_v3->vgic_hcr = ICH_HCR_EN; @@ -209,34 +235,34 @@ int vgic_v3_probe(struct device_node *vgic_node, * maximum of 16 list registers. Just ignore bit 4... */ vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1; + vgic->can_emulate_gicv2 = false; if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx)) gicv_idx = 1; gicv_idx += 3; /* Also skip GICD, GICC, GICH */ if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) { - kvm_err("Cannot obtain GICV region\n"); - ret = -ENXIO; - goto out; - } - - if (!PAGE_ALIGNED(vcpu_res.start)) { - kvm_err("GICV physical address 0x%llx not page aligned\n", + kvm_info("GICv3: no GICV resource entry\n"); + vgic->vcpu_base = 0; + } else if (!PAGE_ALIGNED(vcpu_res.start)) { + pr_warn("GICV physical address 0x%llx not page aligned\n", (unsigned long long)vcpu_res.start); - ret = -ENXIO; - goto out; - } - - if (!PAGE_ALIGNED(resource_size(&vcpu_res))) { - kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n", + vgic->vcpu_base = 0; + } else if (!PAGE_ALIGNED(resource_size(&vcpu_res))) { + pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n", (unsigned long long)resource_size(&vcpu_res), PAGE_SIZE); - ret = -ENXIO; - goto out; + vgic->vcpu_base = 0; + } else { + vgic->vcpu_base = vcpu_res.start; + vgic->can_emulate_gicv2 = true; + kvm_register_device_ops(&kvm_arm_vgic_v2_ops, + KVM_DEV_TYPE_ARM_VGIC_V2); } - kvm_register_device_ops(&kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2); + if (vgic->vcpu_base == 0) + kvm_info("disabling GICv2 emulation\n"); + kvm_register_device_ops(&kvm_arm_vgic_v3_ops, KVM_DEV_TYPE_ARM_VGIC_V3); - vgic->vcpu_base = vcpu_res.start; vgic->vctrl_base = NULL; vgic->type = VGIC_V3; vgic->max_gic_vcpus = KVM_MAX_VCPUS; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 6d23e57c3561..2efba8231375 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1550,6 +1550,11 @@ static int init_vgic_model(struct kvm *kvm, int type) case KVM_DEV_TYPE_ARM_VGIC_V2: vgic_v2_init_emulation(kvm); break; +#ifdef CONFIG_ARM_GIC_V3 + case KVM_DEV_TYPE_ARM_VGIC_V3: + vgic_v3_init_emulation(kvm); + break; +#endif default: return -ENODEV; } @@ -1572,6 +1577,15 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) goto out; } + /* + * This function is also called by the KVM_CREATE_IRQCHIP handler, + * which had no chance yet to check the availability of the GICv2 + * emulation. So check this here again. KVM_CREATE_DEVICE does + * the proper checks already. + */ + if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2) + return -ENODEV; + /* * Any time a vcpu is run, vcpu_load is called which tries to grab the * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure From ac3d373564d9744068d867a0eb16da2ff8d5ee9d Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 3 Jun 2014 10:26:30 +0200 Subject: [PATCH 33/36] arm/arm64: KVM: allow userland to request a virtual GICv3 With all of the GICv3 code in place now we allow userland to ask the kernel for using a virtual GICv3 in the guest. Also we provide the necessary support for guests setting the memory addresses for the virtual distributor and redistributors. This requires some userland code to make use of that feature and explicitly ask for a virtual GICv3. Document that KVM_CREATE_IRQCHIP only works for GICv2, but is considered legacy and using KVM_CREATE_DEVICE is preferred. Signed-off-by: Andre Przywara Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 13 ++++-- .../virtual/kvm/devices/arm-vgic.txt | 22 ++++++++- arch/arm64/include/uapi/asm/kvm.h | 7 +++ include/kvm/arm_vgic.h | 4 +- virt/kvm/arm/vgic-v3-emul.c | 3 ++ virt/kvm/arm/vgic.c | 46 +++++++++++++------ 6 files changed, 73 insertions(+), 22 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 0007fef4ed81..f4b19d78782b 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -612,11 +612,14 @@ Type: vm ioctl Parameters: none Returns: 0 on success, -1 on error -Creates an interrupt controller model in the kernel. On x86, creates a virtual -ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a -local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23 -only go to the IOAPIC. On ARM/arm64, a GIC is -created. On s390, a dummy irq routing table is created. +Creates an interrupt controller model in the kernel. +On x86, creates a virtual ioapic, a virtual PIC (two PICs, nested), and sets up +future vcpus to have a local APIC. IRQ routing for GSIs 0-15 is set to both +PIC and IOAPIC; GSI 16-23 only go to the IOAPIC. +On ARM/arm64, a GICv2 is created. Any other GIC versions require the usage of +KVM_CREATE_DEVICE, which also supports creating a GICv2. Using +KVM_CREATE_DEVICE is preferred over KVM_CREATE_IRQCHIP for GICv2. +On s390, a dummy irq routing table is created. Note that on s390 the KVM_CAP_S390_IRQCHIP vm capability needs to be enabled before KVM_CREATE_IRQCHIP can be used. diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt index 30f5427cf9b0..5d4fd4b944cd 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt @@ -3,22 +3,38 @@ ARM Virtual Generic Interrupt Controller (VGIC) Device types supported: KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0 + KVM_DEV_TYPE_ARM_VGIC_V3 ARM Generic Interrupt Controller v3.0 Only one VGIC instance may be instantiated through either this API or the legacy KVM_CREATE_IRQCHIP api. The created VGIC will act as the VM interrupt controller, requiring emulated user-space devices to inject interrupts to the VGIC instead of directly to CPUs. +Creating a guest GICv3 device requires a host GICv3 as well. +GICv3 implementations with hardware compatibility support allow a guest GICv2 +as well. + Groups: KVM_DEV_ARM_VGIC_GRP_ADDR Attributes: KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit) Base address in the guest physical address space of the GIC distributor - register mappings. + register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2. KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit) Base address in the guest physical address space of the GIC virtual cpu - interface register mappings. + interface register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2. + + KVM_VGIC_V3_ADDR_TYPE_DIST (rw, 64-bit) + Base address in the guest physical address space of the GICv3 distributor + register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V3. + + KVM_VGIC_V3_ADDR_TYPE_REDIST (rw, 64-bit) + Base address in the guest physical address space of the GICv3 + redistributor register mappings. There are two 64K pages for each + VCPU and all of the redistributor pages are contiguous. + Only valid for KVM_DEV_TYPE_ARM_VGIC_V3. + KVM_DEV_ARM_VGIC_GRP_DIST_REGS Attributes: @@ -36,6 +52,7 @@ Groups: the register. Limitations: - Priorities are not implemented, and registers are RAZ/WI + - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2. Errors: -ENODEV: Getting or setting this register is not yet supported -EBUSY: One or more VCPUs are running @@ -68,6 +85,7 @@ Groups: Limitations: - Priorities are not implemented, and registers are RAZ/WI + - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2. Errors: -ENODEV: Getting or setting this register is not yet supported -EBUSY: One or more VCPUs are running diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 480af3461068..3ef77a466018 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -78,6 +78,13 @@ struct kvm_regs { #define KVM_VGIC_V2_DIST_SIZE 0x1000 #define KVM_VGIC_V2_CPU_SIZE 0x2000 +/* Supported VGICv3 address types */ +#define KVM_VGIC_V3_ADDR_TYPE_DIST 2 +#define KVM_VGIC_V3_ADDR_TYPE_REDIST 3 + +#define KVM_VGIC_V3_DIST_SIZE SZ_64K +#define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K) + #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ #define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */ #define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */ diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 39039d5f09a8..7c55dd5dd2c9 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -36,8 +36,8 @@ #define VGIC_V2_MAX_CPUS 8 /* Sanity checks... */ -#if (KVM_MAX_VCPUS > 8) -#error Invalid number of CPU interfaces +#if (KVM_MAX_VCPUS > 255) +#error Too many KVM VCPUs, the VGIC only supports up to 255 VCPUs for now #endif #if (VGIC_NR_IRQS_LEGACY & 31) diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c index 2d2199d85b74..b3f154631515 100644 --- a/virt/kvm/arm/vgic-v3-emul.c +++ b/virt/kvm/arm/vgic-v3-emul.c @@ -1007,6 +1007,9 @@ static int vgic_v3_has_attr(struct kvm_device *dev, case KVM_VGIC_V2_ADDR_TYPE_DIST: case KVM_VGIC_V2_ADDR_TYPE_CPU: return -ENXIO; + case KVM_VGIC_V3_ADDR_TYPE_DIST: + case KVM_VGIC_V3_ADDR_TYPE_REDIST: + return 0; } break; case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 2efba8231375..184c6dbd5165 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1667,7 +1667,7 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr, /** * kvm_vgic_addr - set or get vgic VM base addresses * @kvm: pointer to the vm struct - * @type: the VGIC addr type, one of KVM_VGIC_V2_ADDR_TYPE_XXX + * @type: the VGIC addr type, one of KVM_VGIC_V[23]_ADDR_TYPE_XXX * @addr: pointer to address value * @write: if true set the address in the VM address space, if false read the * address @@ -1681,29 +1681,49 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) { int r = 0; struct vgic_dist *vgic = &kvm->arch.vgic; + int type_needed; + phys_addr_t *addr_ptr, block_size; mutex_lock(&kvm->lock); switch (type) { case KVM_VGIC_V2_ADDR_TYPE_DIST: - if (write) { - r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base, - *addr, KVM_VGIC_V2_DIST_SIZE); - } else { - *addr = vgic->vgic_dist_base; - } + type_needed = KVM_DEV_TYPE_ARM_VGIC_V2; + addr_ptr = &vgic->vgic_dist_base; + block_size = KVM_VGIC_V2_DIST_SIZE; break; case KVM_VGIC_V2_ADDR_TYPE_CPU: - if (write) { - r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base, - *addr, KVM_VGIC_V2_CPU_SIZE); - } else { - *addr = vgic->vgic_cpu_base; - } + type_needed = KVM_DEV_TYPE_ARM_VGIC_V2; + addr_ptr = &vgic->vgic_cpu_base; + block_size = KVM_VGIC_V2_CPU_SIZE; break; +#ifdef CONFIG_ARM_GIC_V3 + case KVM_VGIC_V3_ADDR_TYPE_DIST: + type_needed = KVM_DEV_TYPE_ARM_VGIC_V3; + addr_ptr = &vgic->vgic_dist_base; + block_size = KVM_VGIC_V3_DIST_SIZE; + break; + case KVM_VGIC_V3_ADDR_TYPE_REDIST: + type_needed = KVM_DEV_TYPE_ARM_VGIC_V3; + addr_ptr = &vgic->vgic_redist_base; + block_size = KVM_VGIC_V3_REDIST_SIZE; + break; +#endif default: r = -ENODEV; + goto out; } + if (vgic->vgic_model != type_needed) { + r = -ENODEV; + goto out; + } + + if (write) + r = vgic_ioaddr_assign(kvm, addr_ptr, *addr, block_size); + else + *addr = *addr_ptr; + +out: mutex_unlock(&kvm->lock); return r; } From 4fa96afd94a9bb29135d6e2e8fdb527e58f4fd5c Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 13 Jan 2015 12:02:13 +0000 Subject: [PATCH 34/36] arm/arm64: KVM: force alignment of VGIC dist/CPU/redist addresses Although the GIC architecture requires us to map the MMIO regions only at page aligned addresses, we currently do not enforce this from the kernel side. Restrict any vGICv2 regions to be 4K aligned and any GICv3 regions to be 64K aligned. Document this requirement. Signed-off-by: Andre Przywara Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/devices/arm-vgic.txt | 4 ++++ virt/kvm/arm/vgic.c | 16 +++++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt index 5d4fd4b944cd..3fb905429e8a 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt @@ -20,20 +20,24 @@ Groups: KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit) Base address in the guest physical address space of the GIC distributor register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2. + This address needs to be 4K aligned and the region covers 4 KByte. KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit) Base address in the guest physical address space of the GIC virtual cpu interface register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2. + This address needs to be 4K aligned and the region covers 4 KByte. KVM_VGIC_V3_ADDR_TYPE_DIST (rw, 64-bit) Base address in the guest physical address space of the GICv3 distributor register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V3. + This address needs to be 64K aligned and the region covers 64 KByte. KVM_VGIC_V3_ADDR_TYPE_REDIST (rw, 64-bit) Base address in the guest physical address space of the GICv3 redistributor register mappings. There are two 64K pages for each VCPU and all of the redistributor pages are contiguous. Only valid for KVM_DEV_TYPE_ARM_VGIC_V3. + This address needs to be 64K aligned. KVM_DEV_ARM_VGIC_GRP_DIST_REGS diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 184c6dbd5165..0cc6ab6005a0 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1683,6 +1683,7 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) struct vgic_dist *vgic = &kvm->arch.vgic; int type_needed; phys_addr_t *addr_ptr, block_size; + phys_addr_t alignment; mutex_lock(&kvm->lock); switch (type) { @@ -1690,22 +1691,26 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) type_needed = KVM_DEV_TYPE_ARM_VGIC_V2; addr_ptr = &vgic->vgic_dist_base; block_size = KVM_VGIC_V2_DIST_SIZE; + alignment = SZ_4K; break; case KVM_VGIC_V2_ADDR_TYPE_CPU: type_needed = KVM_DEV_TYPE_ARM_VGIC_V2; addr_ptr = &vgic->vgic_cpu_base; block_size = KVM_VGIC_V2_CPU_SIZE; + alignment = SZ_4K; break; #ifdef CONFIG_ARM_GIC_V3 case KVM_VGIC_V3_ADDR_TYPE_DIST: type_needed = KVM_DEV_TYPE_ARM_VGIC_V3; addr_ptr = &vgic->vgic_dist_base; block_size = KVM_VGIC_V3_DIST_SIZE; + alignment = SZ_64K; break; case KVM_VGIC_V3_ADDR_TYPE_REDIST: type_needed = KVM_DEV_TYPE_ARM_VGIC_V3; addr_ptr = &vgic->vgic_redist_base; block_size = KVM_VGIC_V3_REDIST_SIZE; + alignment = SZ_64K; break; #endif default: @@ -1718,10 +1723,15 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) goto out; } - if (write) - r = vgic_ioaddr_assign(kvm, addr_ptr, *addr, block_size); - else + if (write) { + if (!IS_ALIGNED(*addr, alignment)) + r = -EINVAL; + else + r = vgic_ioaddr_assign(kvm, addr_ptr, *addr, + block_size); + } else { *addr = *addr_ptr; + } out: mutex_unlock(&kvm->lock); From 227ea818f26ec0d881853bdbae7684a32b1391ba Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 23 Jan 2015 10:49:31 +0100 Subject: [PATCH 35/36] arm/arm64: KVM: Fixup incorrect config symbol in comment A comment in the dirty page logging patch series mentioned incorrectly spelled config symbols, just fix them up to match the real thing. Reported-by: Paul Bolle Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index eb945973b3be..74aeabaa3c4d 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1042,8 +1042,8 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) /* * Release kvm_mmu_lock periodically if the memory region is * large. Otherwise, we may see kernel panics with - * CONFIG_DETECT_HUNG_TASK, CONFIG_LOCK_DETECTOR, - * CONFIG_LOCK_DEP. Additionally, holding the lock too long + * CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR, + * CONFIG_LOCKDEP. Additionally, holding the lock too long * will also starve other vCPUs. */ if (need_resched() || spin_needbreak(&kvm->mmu_lock)) From 4b990589952f0e30aa860184ac6c76219a74632e Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 23 Jan 2015 10:50:23 +0100 Subject: [PATCH 36/36] KVM: Remove unused config symbol The dirty patch logging series introduced both HAVE_KVM_ARCH_DIRTY_LOG_PROTECT and KVM_GENERIC_DIRTYLOG_READ_PROTECT config symbols, but only KVM_GENERIC_DIRTYLOG_READ_PROTECT is used. Just remove the unused one. (The config symbol was renamed during the development of the patch series and the old name just creeped in by accident.() Reported-by: Paul Bolle Signed-off-by: Christoffer Dall --- virt/kvm/Kconfig | 3 --- 1 file changed, 3 deletions(-) diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 314950c51d9f..50d110654b42 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -41,8 +41,5 @@ config KVM_VFIO config HAVE_KVM_ARCH_TLB_FLUSH_ALL bool -config HAVE_KVM_ARCH_DIRTY_LOG_PROTECT - bool - config KVM_GENERIC_DIRTYLOG_READ_PROTECT bool