* s390: Support for runtime instrumentation within guests,

support of 248 VCPUs.
 
 * ARM: rewrite of the arm64 world switch in C, support for
 16-bit VM identifiers.  Performance counter virtualization
 missed the boat.
 
 * x86: Support for more Hyper-V features (synthetic interrupt
 controller), MMU cleanups
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.22 (GNU/Linux)
 
 iQEcBAABAgAGBQJWlSKwAAoJEL/70l94x66DY0UIAK5vp4zfQoQOJC4KP4Xgxwdu
 kpnK2Boz3/74o1b0y5+eJZoUZCsXCVLtmP5uhmMxUYWDgByFG2X8ZDhPFwB5FYLT
 2dN+Lr4tsolgIfRdHZtrT6Svp9SDL039bWTdscnbR6l37/j9FRWvpKdhI3orloFD
 /i4CSW2dVIq1/9Xctwu/rtcOEesEx4Cad+6YV3/530eVAXFzE908nXfmqJNZTocY
 YCGcmrMVCOu0ng5QM4xSzmmYjKMLUcRs+QzZWkVBzdJtTgwZUr09yj7I2dZ1yj/i
 cxYrJy6shSwE74XkXsmvG+au3C5u3vX4tnXjBFErnPJ99oqzHatVnFWNRhj4dLQ=
 =PIj1
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini:
 "PPC changes will come next week.

   - s390: Support for runtime instrumentation within guests, support of
     248 VCPUs.

   - ARM: rewrite of the arm64 world switch in C, support for 16-bit VM
     identifiers.  Performance counter virtualization missed the boat.

   - x86: Support for more Hyper-V features (synthetic interrupt
     controller), MMU cleanups"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (115 commits)
  kvm: x86: Fix vmwrite to SECONDARY_VM_EXEC_CONTROL
  kvm/x86: Hyper-V SynIC timers tracepoints
  kvm/x86: Hyper-V SynIC tracepoints
  kvm/x86: Update SynIC timers on guest entry only
  kvm/x86: Skip SynIC vector check for QEMU side
  kvm/x86: Hyper-V fix SynIC timer disabling condition
  kvm/x86: Reorg stimer_expiration() to better control timer restart
  kvm/x86: Hyper-V unify stimer_start() and stimer_restart()
  kvm/x86: Drop stimer_stop() function
  kvm/x86: Hyper-V timers fix incorrect logical operation
  KVM: move architecture-dependent requests to arch/
  KVM: renumber vcpu->request bits
  KVM: document which architecture uses each request bit
  KVM: Remove unused KVM_REQ_KICK to save a bit in vcpu->requests
  kvm: x86: Check kvm_write_guest return value in kvm_write_wall_clock
  KVM: s390: implement the RI support of guest
  kvm/s390: drop unpaired smp_mb
  kvm: x86: fix comment about {mmu,nested_mmu}.gva_to_gpa
  KVM: x86: MMU: Use clear_page() instead of init_shadow_page_table()
  arm/arm64: KVM: Detect vGIC presence at runtime
  ...
This commit is contained in:
Linus Torvalds 2016-01-12 13:22:12 -08:00
commit 1baa5efbeb
87 changed files with 4043 additions and 2393 deletions

View File

@ -1451,6 +1451,7 @@ struct kvm_irq_routing_entry {
struct kvm_irq_routing_irqchip irqchip;
struct kvm_irq_routing_msi msi;
struct kvm_irq_routing_s390_adapter adapter;
struct kvm_irq_routing_hv_sint hv_sint;
__u32 pad[8];
} u;
};
@ -1459,6 +1460,7 @@ struct kvm_irq_routing_entry {
#define KVM_IRQ_ROUTING_IRQCHIP 1
#define KVM_IRQ_ROUTING_MSI 2
#define KVM_IRQ_ROUTING_S390_ADAPTER 3
#define KVM_IRQ_ROUTING_HV_SINT 4
No flags are specified so far, the corresponding field must be set to zero.
@ -1482,6 +1484,10 @@ struct kvm_irq_routing_s390_adapter {
__u32 adapter_id;
};
struct kvm_irq_routing_hv_sint {
__u32 vcpu;
__u32 sint;
};
4.53 KVM_ASSIGN_SET_MSIX_NR (deprecated)
@ -3331,6 +3337,28 @@ the userspace IOAPIC should process the EOI and retrigger the interrupt if
it is still asserted. Vector is the LAPIC interrupt vector for which the
EOI was received.
struct kvm_hyperv_exit {
#define KVM_EXIT_HYPERV_SYNIC 1
__u32 type;
union {
struct {
__u32 msr;
__u64 control;
__u64 evt_page;
__u64 msg_page;
} synic;
} u;
};
/* KVM_EXIT_HYPERV */
struct kvm_hyperv_exit hyperv;
Indicates that the VCPU exits into userspace to process some tasks
related to Hyper-V emulation.
Valid values for 'type' are:
KVM_EXIT_HYPERV_SYNIC -- synchronously notify user-space about
Hyper-V SynIC state change. Notification is used to remap SynIC
event/message pages and to enable/disable SynIC messages/events processing
in userspace.
/* Fix the size of the union. */
char padding[256];
};
@ -3685,3 +3713,16 @@ available, means that that the kernel has an implementation of the
H_RANDOM hypercall backed by a hardware random-number generator.
If present, the kernel H_RANDOM handler can be enabled for guest use
with the KVM_CAP_PPC_ENABLE_HCALL capability.
8.2 KVM_CAP_HYPERV_SYNIC
Architectures: x86
This capability, if KVM_CHECK_EXTENSION indicates that it is
available, means that that the kernel has an implementation of the
Hyper-V Synthetic interrupt controller(SynIC). Hyper-V SynIC is
used to support Windows Hyper-V based guest paravirt drivers(VMBus).
In order to use SynIC, it has to be activated by setting this
capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this
will disable the use of APIC hardware virtualization even if supported
by the CPU, as it's incompatible with SynIC auto-EOI behavior.

View File

@ -37,7 +37,8 @@ Returns: -EFAULT if the given address is not accessible
Allows userspace to query the actual limit and set a new limit for
the maximum guest memory size. The limit will be rounded up to
2048 MB, 4096 GB, 8192 TB respectively, as this limit is governed by
the number of page table levels.
the number of page table levels. In the case that there is no limit we will set
the limit to KVM_S390_NO_MEM_LIMIT (U64_MAX).
2. GROUP: KVM_S390_VM_CPU_MODEL
Architectures: s390

View File

@ -203,10 +203,10 @@ Shadow pages contain the following information:
page cannot be destroyed. See role.invalid.
parent_ptes:
The reverse mapping for the pte/ptes pointing at this page's spt. If
parent_ptes bit 0 is zero, only one spte points at this pages and
parent_ptes bit 0 is zero, only one spte points at this page and
parent_ptes points at this single spte, otherwise, there exists multiple
sptes pointing at this page and (parent_ptes & ~0x1) points at a data
structure with a list of parent_ptes.
structure with a list of parent sptes.
unsync:
If true, then the translations in this page may not match the guest's
translation. This is equivalent to the state of the tlb when a pte is

View File

@ -6089,6 +6089,7 @@ M: Marc Zyngier <marc.zyngier@arm.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: kvmarm@lists.cs.columbia.edu
W: http://systems.cs.columbia.edu/projects/kvm-arm
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git
S: Supported
F: arch/arm/include/uapi/asm/kvm*
F: arch/arm/include/asm/kvm*

View File

@ -19,6 +19,7 @@
#ifndef __ARM_KVM_ARM_H__
#define __ARM_KVM_ARM_H__
#include <linux/const.h>
#include <linux/types.h>
/* Hyp Configuration Register (HCR) bits */
@ -132,10 +133,9 @@
* space.
*/
#define KVM_PHYS_SHIFT (40)
#define KVM_PHYS_SIZE (1ULL << KVM_PHYS_SHIFT)
#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1ULL)
#define PTRS_PER_S2_PGD (1ULL << (KVM_PHYS_SHIFT - 30))
#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
#define KVM_PHYS_SIZE (_AC(1, ULL) << KVM_PHYS_SHIFT)
#define KVM_PHYS_MASK (KVM_PHYS_SIZE - _AC(1, ULL))
#define PTRS_PER_S2_PGD (_AC(1, ULL) << (KVM_PHYS_SHIFT - 30))
/* Virtualization Translation Control Register (VTCR) bits */
#define VTCR_SH0 (3 << 12)
@ -162,17 +162,17 @@
#define VTTBR_X (5 - KVM_T0SZ)
#endif
#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
#define VTTBR_VMID_SHIFT (48LLU)
#define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT)
#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
#define VTTBR_VMID_SHIFT _AC(48, ULL)
#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
/* Hyp Syndrome Register (HSR) bits */
#define HSR_EC_SHIFT (26)
#define HSR_EC (0x3fU << HSR_EC_SHIFT)
#define HSR_IL (1U << 25)
#define HSR_EC (_AC(0x3f, UL) << HSR_EC_SHIFT)
#define HSR_IL (_AC(1, UL) << 25)
#define HSR_ISS (HSR_IL - 1)
#define HSR_ISV_SHIFT (24)
#define HSR_ISV (1U << HSR_ISV_SHIFT)
#define HSR_ISV (_AC(1, UL) << HSR_ISV_SHIFT)
#define HSR_SRT_SHIFT (16)
#define HSR_SRT_MASK (0xf << HSR_SRT_SHIFT)
#define HSR_FSC (0x3f)
@ -180,9 +180,9 @@
#define HSR_SSE (1 << 21)
#define HSR_WNR (1 << 6)
#define HSR_CV_SHIFT (24)
#define HSR_CV (1U << HSR_CV_SHIFT)
#define HSR_CV (_AC(1, UL) << HSR_CV_SHIFT)
#define HSR_COND_SHIFT (20)
#define HSR_COND (0xfU << HSR_COND_SHIFT)
#define HSR_COND (_AC(0xf, UL) << HSR_COND_SHIFT)
#define FSC_FAULT (0x04)
#define FSC_ACCESS (0x08)
@ -210,13 +210,13 @@
#define HSR_EC_DABT (0x24)
#define HSR_EC_DABT_HYP (0x25)
#define HSR_WFI_IS_WFE (1U << 0)
#define HSR_WFI_IS_WFE (_AC(1, UL) << 0)
#define HSR_HVC_IMM_MASK ((1UL << 16) - 1)
#define HSR_HVC_IMM_MASK ((_AC(1, UL) << 16) - 1)
#define HSR_DABT_S1PTW (1U << 7)
#define HSR_DABT_CM (1U << 8)
#define HSR_DABT_EA (1U << 9)
#define HSR_DABT_S1PTW (_AC(1, UL) << 7)
#define HSR_DABT_CM (_AC(1, UL) << 8)
#define HSR_DABT_EA (_AC(1, UL) << 9)
#define kvm_arm_exception_type \
{0, "RESET" }, \

View File

@ -150,6 +150,12 @@ struct kvm_vcpu_stat {
u32 halt_successful_poll;
u32 halt_attempted_poll;
u32 halt_wakeup;
u32 hvc_exit_stat;
u64 wfe_exit_stat;
u64 wfi_exit_stat;
u64 mmio_exit_user;
u64 mmio_exit_kernel;
u64 exits;
};
int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);

View File

@ -279,6 +279,11 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
pgd_t *merged_hyp_pgd,
unsigned long hyp_idmap_start) { }
static inline unsigned int kvm_get_vmid_bits(void)
{
return 8;
}
#endif /* !__ASSEMBLY__ */
#endif /* __ARM_KVM_MMU_H__ */

View File

@ -44,6 +44,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_coproc.h>
#include <asm/kvm_psci.h>
#include <asm/sections.h>
#ifdef REQUIRES_VIRT
__asm__(".arch_extension virt");
@ -58,9 +59,12 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
/* The VMID used in the VTTBR */
static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
static u8 kvm_next_vmid;
static u32 kvm_next_vmid;
static unsigned int kvm_vmid_bits __read_mostly;
static DEFINE_SPINLOCK(kvm_vmid_lock);
static bool vgic_present;
static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
{
BUG_ON(preemptible());
@ -132,7 +136,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.vmid_gen = 0;
/* The maximum number of VCPUs is limited by the host's GIC model */
kvm->arch.max_vcpus = kvm_vgic_get_max_vcpus();
kvm->arch.max_vcpus = vgic_present ?
kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
return ret;
out_free_stage2_pgd:
@ -172,6 +177,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
int r;
switch (ext) {
case KVM_CAP_IRQCHIP:
r = vgic_present;
break;
case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_USER_MEMORY:
@ -433,11 +440,12 @@ static void update_vttbr(struct kvm *kvm)
kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen);
kvm->arch.vmid = kvm_next_vmid;
kvm_next_vmid++;
kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
/* update vttbr to be used with the new vmid */
pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm));
BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
kvm->arch.vttbr = pgd_phys | vmid;
spin_unlock(&kvm_vmid_lock);
@ -603,6 +611,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
vcpu->mode = OUTSIDE_GUEST_MODE;
vcpu->stat.exits++;
/*
* Back from guest
*************************************************************/
@ -913,6 +922,8 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
switch (dev_id) {
case KVM_ARM_DEVICE_VGIC_V2:
if (!vgic_present)
return -ENXIO;
return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
default:
return -ENODEV;
@ -927,6 +938,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
switch (ioctl) {
case KVM_CREATE_IRQCHIP: {
if (!vgic_present)
return -ENXIO;
return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
}
case KVM_ARM_SET_DEVICE_ADDR: {
@ -1067,6 +1080,12 @@ static int init_hyp_mode(void)
goto out_free_mappings;
}
err = create_hyp_mappings(__start_rodata, __end_rodata);
if (err) {
kvm_err("Cannot map rodata section\n");
goto out_free_mappings;
}
/*
* Map the Hyp stack pages
*/
@ -1111,8 +1130,17 @@ static int init_hyp_mode(void)
* Init HYP view of VGIC
*/
err = kvm_vgic_hyp_init();
if (err)
switch (err) {
case 0:
vgic_present = true;
break;
case -ENODEV:
case -ENXIO:
vgic_present = false;
break;
default:
goto out_free_context;
}
/*
* Init HYP architected timer support
@ -1127,6 +1155,10 @@ static int init_hyp_mode(void)
kvm_perf_init();
/* set size of VMID supported by CPU */
kvm_vmid_bits = kvm_get_vmid_bits();
kvm_info("%d-bit VMID\n", kvm_vmid_bits);
kvm_info("Hyp mode initialized successfully\n");
return 0;

View File

@ -275,6 +275,40 @@ static u32 exc_vector_base(struct kvm_vcpu *vcpu)
return vbar;
}
/*
* Switch to an exception mode, updating both CPSR and SPSR. Follow
* the logic described in AArch32.EnterMode() from the ARMv8 ARM.
*/
static void kvm_update_psr(struct kvm_vcpu *vcpu, unsigned long mode)
{
unsigned long cpsr = *vcpu_cpsr(vcpu);
u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
*vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | mode;
switch (mode) {
case FIQ_MODE:
*vcpu_cpsr(vcpu) |= PSR_F_BIT;
/* Fall through */
case ABT_MODE:
case IRQ_MODE:
*vcpu_cpsr(vcpu) |= PSR_A_BIT;
/* Fall through */
default:
*vcpu_cpsr(vcpu) |= PSR_I_BIT;
}
*vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT);
if (sctlr & SCTLR_TE)
*vcpu_cpsr(vcpu) |= PSR_T_BIT;
if (sctlr & SCTLR_EE)
*vcpu_cpsr(vcpu) |= PSR_E_BIT;
/* Note: These now point to the mode banked copies */
*vcpu_spsr(vcpu) = cpsr;
}
/**
* kvm_inject_undefined - inject an undefined exception into the guest
* @vcpu: The VCPU to receive the undefined exception
@ -286,29 +320,13 @@ static u32 exc_vector_base(struct kvm_vcpu *vcpu)
*/
void kvm_inject_undefined(struct kvm_vcpu *vcpu)
{
unsigned long new_lr_value;
unsigned long new_spsr_value;
unsigned long cpsr = *vcpu_cpsr(vcpu);
u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
bool is_thumb = (cpsr & PSR_T_BIT);
u32 vect_offset = 4;
u32 return_offset = (is_thumb) ? 2 : 4;
new_spsr_value = cpsr;
new_lr_value = *vcpu_pc(vcpu) - return_offset;
*vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | UND_MODE;
*vcpu_cpsr(vcpu) |= PSR_I_BIT;
*vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT);
if (sctlr & SCTLR_TE)
*vcpu_cpsr(vcpu) |= PSR_T_BIT;
if (sctlr & SCTLR_EE)
*vcpu_cpsr(vcpu) |= PSR_E_BIT;
/* Note: These now point to UND banked copies */
*vcpu_spsr(vcpu) = cpsr;
*vcpu_reg(vcpu, 14) = new_lr_value;
kvm_update_psr(vcpu, UND_MODE);
*vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) - return_offset;
/* Branch to exception vector */
*vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset;
@ -320,30 +338,14 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
*/
static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr)
{
unsigned long new_lr_value;
unsigned long new_spsr_value;
unsigned long cpsr = *vcpu_cpsr(vcpu);
u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
bool is_thumb = (cpsr & PSR_T_BIT);
u32 vect_offset;
u32 return_offset = (is_thumb) ? 4 : 0;
bool is_lpae;
new_spsr_value = cpsr;
new_lr_value = *vcpu_pc(vcpu) + return_offset;
*vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | ABT_MODE;
*vcpu_cpsr(vcpu) |= PSR_I_BIT | PSR_A_BIT;
*vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT);
if (sctlr & SCTLR_TE)
*vcpu_cpsr(vcpu) |= PSR_T_BIT;
if (sctlr & SCTLR_EE)
*vcpu_cpsr(vcpu) |= PSR_E_BIT;
/* Note: These now point to ABT banked copies */
*vcpu_spsr(vcpu) = cpsr;
*vcpu_reg(vcpu, 14) = new_lr_value;
kvm_update_psr(vcpu, ABT_MODE);
*vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
if (is_pabt)
vect_offset = 12;

View File

@ -33,6 +33,12 @@
#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
struct kvm_stats_debugfs_item debugfs_entries[] = {
VCPU_STAT(hvc_exit_stat),
VCPU_STAT(wfe_exit_stat),
VCPU_STAT(wfi_exit_stat),
VCPU_STAT(mmio_exit_user),
VCPU_STAT(mmio_exit_kernel),
VCPU_STAT(exits),
{ NULL }
};

View File

@ -42,6 +42,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
kvm_vcpu_hvc_get_imm(vcpu));
vcpu->stat.hvc_exit_stat++;
ret = kvm_psci_call(vcpu);
if (ret < 0) {
@ -89,9 +90,11 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) {
trace_kvm_wfx(*vcpu_pc(vcpu), true);
vcpu->stat.wfe_exit_stat++;
kvm_vcpu_on_spin(vcpu);
} else {
trace_kvm_wfx(*vcpu_pc(vcpu), false);
vcpu->stat.wfi_exit_stat++;
kvm_vcpu_block(vcpu);
}

View File

@ -210,8 +210,11 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
if (!ret) {
/* We handled the access successfully in the kernel. */
vcpu->stat.mmio_exit_kernel++;
kvm_handle_mmio_return(vcpu, run);
return 1;
} else {
vcpu->stat.mmio_exit_user++;
}
run->exit_reason = KVM_EXIT_MMIO;

View File

@ -656,9 +656,9 @@ static void *kvm_alloc_hwpgd(void)
* kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
* @kvm: The KVM struct pointer for the VM.
*
* Allocates the 1st level table only of size defined by S2_PGD_ORDER (can
* support either full 40-bit input addresses or limited to 32-bit input
* addresses). Clears the allocated pages.
* Allocates only the stage-2 HW PGD level table(s) (can support either full
* 40-bit input addresses or limited to 32-bit input addresses). Clears the
* allocated pages.
*
* Note we don't need locking here as this is only called when the VM is
* created, which can only be done once.

View File

@ -125,6 +125,7 @@
#define VTCR_EL2_SL0_LVL1 (1 << 6)
#define VTCR_EL2_T0SZ_MASK 0x3f
#define VTCR_EL2_T0SZ_40B 24
#define VTCR_EL2_VS 19
/*
* We configure the Stage-2 page tables to always restrict the IPA space to be
@ -169,7 +170,7 @@
#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
#define VTTBR_VMID_SHIFT (UL(48))
#define VTTBR_VMID_MASK (UL(0xFF) << VTTBR_VMID_SHIFT)
#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
/* Hyp System Trap Register */
#define HSTR_EL2_T(x) (1 << x)

View File

@ -20,82 +20,6 @@
#include <asm/virt.h>
/*
* 0 is reserved as an invalid value.
* Order *must* be kept in sync with the hyp switch code.
*/
#define MPIDR_EL1 1 /* MultiProcessor Affinity Register */
#define CSSELR_EL1 2 /* Cache Size Selection Register */
#define SCTLR_EL1 3 /* System Control Register */
#define ACTLR_EL1 4 /* Auxiliary Control Register */
#define CPACR_EL1 5 /* Coprocessor Access Control */
#define TTBR0_EL1 6 /* Translation Table Base Register 0 */
#define TTBR1_EL1 7 /* Translation Table Base Register 1 */
#define TCR_EL1 8 /* Translation Control Register */
#define ESR_EL1 9 /* Exception Syndrome Register */
#define AFSR0_EL1 10 /* Auxilary Fault Status Register 0 */
#define AFSR1_EL1 11 /* Auxilary Fault Status Register 1 */
#define FAR_EL1 12 /* Fault Address Register */
#define MAIR_EL1 13 /* Memory Attribute Indirection Register */
#define VBAR_EL1 14 /* Vector Base Address Register */
#define CONTEXTIDR_EL1 15 /* Context ID Register */
#define TPIDR_EL0 16 /* Thread ID, User R/W */
#define TPIDRRO_EL0 17 /* Thread ID, User R/O */
#define TPIDR_EL1 18 /* Thread ID, Privileged */
#define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */
#define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */
#define PAR_EL1 21 /* Physical Address Register */
#define MDSCR_EL1 22 /* Monitor Debug System Control Register */
#define MDCCINT_EL1 23 /* Monitor Debug Comms Channel Interrupt Enable Reg */
/* 32bit specific registers. Keep them at the end of the range */
#define DACR32_EL2 24 /* Domain Access Control Register */
#define IFSR32_EL2 25 /* Instruction Fault Status Register */
#define FPEXC32_EL2 26 /* Floating-Point Exception Control Register */
#define DBGVCR32_EL2 27 /* Debug Vector Catch Register */
#define NR_SYS_REGS 28
/* 32bit mapping */
#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */
#define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */
#define c1_SCTLR (SCTLR_EL1 * 2) /* System Control Register */
#define c1_ACTLR (ACTLR_EL1 * 2) /* Auxiliary Control Register */
#define c1_CPACR (CPACR_EL1 * 2) /* Coprocessor Access Control */
#define c2_TTBR0 (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */
#define c2_TTBR0_high (c2_TTBR0 + 1) /* TTBR0 top 32 bits */
#define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */
#define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */
#define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */
#define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */
#define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */
#define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */
#define c5_ADFSR (AFSR0_EL1 * 2) /* Auxiliary Data Fault Status R */
#define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */
#define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */
#define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */
#define c7_PAR (PAR_EL1 * 2) /* Physical Address Register */
#define c7_PAR_high (c7_PAR + 1) /* PAR top 32 bits */
#define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */
#define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */
#define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */
#define c13_CID (CONTEXTIDR_EL1 * 2) /* Context ID Register */
#define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */
#define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */
#define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */
#define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
#define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
#define cp14_DBGDSCRext (MDSCR_EL1 * 2)
#define cp14_DBGBCR0 (DBGBCR0_EL1 * 2)
#define cp14_DBGBVR0 (DBGBVR0_EL1 * 2)
#define cp14_DBGBXVR0 (cp14_DBGBVR0 + 1)
#define cp14_DBGWCR0 (DBGWCR0_EL1 * 2)
#define cp14_DBGWVR0 (DBGWVR0_EL1 * 2)
#define cp14_DBGDCCINT (MDCCINT_EL1 * 2)
#define NR_COPRO_REGS (NR_SYS_REGS * 2)
#define ARM_EXCEPTION_IRQ 0
#define ARM_EXCEPTION_TRAP 1

View File

@ -26,7 +26,6 @@
#include <asm/esr.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmio.h>
#include <asm/ptrace.h>
#include <asm/cputype.h>

View File

@ -25,7 +25,6 @@
#include <linux/types.h>
#include <linux/kvm_types.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmio.h>
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
@ -85,6 +84,86 @@ struct kvm_vcpu_fault_info {
u64 hpfar_el2; /* Hyp IPA Fault Address Register */
};
/*
* 0 is reserved as an invalid value.
* Order should be kept in sync with the save/restore code.
*/
enum vcpu_sysreg {
__INVALID_SYSREG__,
MPIDR_EL1, /* MultiProcessor Affinity Register */
CSSELR_EL1, /* Cache Size Selection Register */
SCTLR_EL1, /* System Control Register */
ACTLR_EL1, /* Auxiliary Control Register */
CPACR_EL1, /* Coprocessor Access Control */
TTBR0_EL1, /* Translation Table Base Register 0 */
TTBR1_EL1, /* Translation Table Base Register 1 */
TCR_EL1, /* Translation Control Register */
ESR_EL1, /* Exception Syndrome Register */
AFSR0_EL1, /* Auxilary Fault Status Register 0 */
AFSR1_EL1, /* Auxilary Fault Status Register 1 */
FAR_EL1, /* Fault Address Register */
MAIR_EL1, /* Memory Attribute Indirection Register */
VBAR_EL1, /* Vector Base Address Register */
CONTEXTIDR_EL1, /* Context ID Register */
TPIDR_EL0, /* Thread ID, User R/W */
TPIDRRO_EL0, /* Thread ID, User R/O */
TPIDR_EL1, /* Thread ID, Privileged */
AMAIR_EL1, /* Aux Memory Attribute Indirection Register */
CNTKCTL_EL1, /* Timer Control Register (EL1) */
PAR_EL1, /* Physical Address Register */
MDSCR_EL1, /* Monitor Debug System Control Register */
MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */
/* 32bit specific registers. Keep them at the end of the range */
DACR32_EL2, /* Domain Access Control Register */
IFSR32_EL2, /* Instruction Fault Status Register */
FPEXC32_EL2, /* Floating-Point Exception Control Register */
DBGVCR32_EL2, /* Debug Vector Catch Register */
NR_SYS_REGS /* Nothing after this line! */
};
/* 32bit mapping */
#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */
#define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */
#define c1_SCTLR (SCTLR_EL1 * 2) /* System Control Register */
#define c1_ACTLR (ACTLR_EL1 * 2) /* Auxiliary Control Register */
#define c1_CPACR (CPACR_EL1 * 2) /* Coprocessor Access Control */
#define c2_TTBR0 (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */
#define c2_TTBR0_high (c2_TTBR0 + 1) /* TTBR0 top 32 bits */
#define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */
#define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */
#define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */
#define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */
#define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */
#define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */
#define c5_ADFSR (AFSR0_EL1 * 2) /* Auxiliary Data Fault Status R */
#define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */
#define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */
#define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */
#define c7_PAR (PAR_EL1 * 2) /* Physical Address Register */
#define c7_PAR_high (c7_PAR + 1) /* PAR top 32 bits */
#define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */
#define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */
#define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */
#define c13_CID (CONTEXTIDR_EL1 * 2) /* Context ID Register */
#define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */
#define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */
#define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */
#define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
#define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
#define cp14_DBGDSCRext (MDSCR_EL1 * 2)
#define cp14_DBGBCR0 (DBGBCR0_EL1 * 2)
#define cp14_DBGBVR0 (DBGBVR0_EL1 * 2)
#define cp14_DBGBXVR0 (cp14_DBGBVR0 + 1)
#define cp14_DBGWCR0 (DBGWCR0_EL1 * 2)
#define cp14_DBGWVR0 (DBGWVR0_EL1 * 2)
#define cp14_DBGDCCINT (MDCCINT_EL1 * 2)
#define NR_COPRO_REGS (NR_SYS_REGS * 2)
struct kvm_cpu_context {
struct kvm_regs gp_regs;
union {
@ -197,6 +276,12 @@ struct kvm_vcpu_stat {
u32 halt_successful_poll;
u32 halt_attempted_poll;
u32 halt_wakeup;
u32 hvc_exit_stat;
u64 wfe_exit_stat;
u64 wfi_exit_stat;
u64 mmio_exit_user;
u64 mmio_exit_kernel;
u64 exits;
};
int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);

View File

@ -19,7 +19,6 @@
#define __ARM64_KVM_MMIO_H__
#include <linux/kvm_host.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_arm.h>
/*

View File

@ -20,6 +20,7 @@
#include <asm/page.h>
#include <asm/memory.h>
#include <asm/cpufeature.h>
/*
* As we only have the TTBR0_EL2 register, we cannot express
@ -158,7 +159,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
#define PTRS_PER_S2_PGD_SHIFT (KVM_PHYS_SHIFT - PGDIR_SHIFT)
#endif
#define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT)
#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
#define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
@ -302,5 +302,12 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
merged_hyp_pgd[idmap_idx] = __pgd(__pa(boot_hyp_pgd) | PMD_TYPE_TABLE);
}
static inline unsigned int kvm_get_vmid_bits(void)
{
int reg = read_system_reg(SYS_ID_AA64MMFR1_EL1);
return (cpuid_feature_extract_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
}
#endif /* __ASSEMBLY__ */
#endif /* __ARM64_KVM_MMU_H__ */

View File

@ -20,6 +20,8 @@
#ifndef __ASM_SYSREG_H
#define __ASM_SYSREG_H
#include <linux/stringify.h>
#include <asm/opcodes.h>
/*
@ -208,6 +210,8 @@
#else
#include <linux/types.h>
asm(
" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n"
" .equ __reg_num_x\\num, \\num\n"
@ -232,6 +236,23 @@ static inline void config_sctlr_el1(u32 clear, u32 set)
val |= set;
asm volatile("msr sctlr_el1, %0" : : "r" (val));
}
/*
* Unlike read_cpuid, calls to read_sysreg are never expected to be
* optimized away or replaced with synthetic values.
*/
#define read_sysreg(r) ({ \
u64 __val; \
asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \
__val; \
})
#define write_sysreg(v, r) do { \
u64 __val = (u64)v; \
asm volatile("msr " __stringify(r) ", %0" \
: : "r" (__val)); \
} while (0)
#endif
#endif /* __ASM_SYSREG_H */

View File

@ -109,49 +109,11 @@ int main(void)
DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs));
DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs));
DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs));
DEFINE(CPU_SP_EL1, offsetof(struct kvm_regs, sp_el1));
DEFINE(CPU_ELR_EL1, offsetof(struct kvm_regs, elr_el1));
DEFINE(CPU_SPSR, offsetof(struct kvm_regs, spsr));
DEFINE(CPU_SYSREGS, offsetof(struct kvm_cpu_context, sys_regs));
DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2));
DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2));
DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags));
DEFINE(VCPU_DEBUG_PTR, offsetof(struct kvm_vcpu, arch.debug_ptr));
DEFINE(DEBUG_BCR, offsetof(struct kvm_guest_debug_arch, dbg_bcr));
DEFINE(DEBUG_BVR, offsetof(struct kvm_guest_debug_arch, dbg_bvr));
DEFINE(DEBUG_WCR, offsetof(struct kvm_guest_debug_arch, dbg_wcr));
DEFINE(DEBUG_WVR, offsetof(struct kvm_guest_debug_arch, dbg_wvr));
DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
DEFINE(VCPU_MDCR_EL2, offsetof(struct kvm_vcpu, arch.mdcr_el2));
DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines));
DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context));
DEFINE(VCPU_HOST_DEBUG_STATE, offsetof(struct kvm_vcpu, arch.host_debug_state));
DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff));
DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled));
DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu));
DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr));
DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
DEFINE(VGIC_V3_CPU_SRE, offsetof(struct vgic_cpu, vgic_v3.vgic_sre));
DEFINE(VGIC_V3_CPU_HCR, offsetof(struct vgic_cpu, vgic_v3.vgic_hcr));
DEFINE(VGIC_V3_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr));
DEFINE(VGIC_V3_CPU_MISR, offsetof(struct vgic_cpu, vgic_v3.vgic_misr));
DEFINE(VGIC_V3_CPU_EISR, offsetof(struct vgic_cpu, vgic_v3.vgic_eisr));
DEFINE(VGIC_V3_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v3.vgic_elrsr));
DEFINE(VGIC_V3_CPU_AP0R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap0r));
DEFINE(VGIC_V3_CPU_AP1R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap1r));
DEFINE(VGIC_V3_CPU_LR, offsetof(struct vgic_cpu, vgic_v3.vgic_lr));
DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr));
DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr));
DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base));
#endif
#ifdef CONFIG_CPU_PM
DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx));

View File

@ -10,6 +10,7 @@ KVM=../../../virt/kvm
ARM=../../../arch/arm/kvm
obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
obj-$(CONFIG_KVM_ARM_HOST) += hyp/
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
@ -22,8 +23,6 @@ kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generi
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o
kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v2-switch.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v3-switch.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o

View File

@ -28,13 +28,21 @@
#include <asm/cputype.h>
#include <asm/uaccess.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_coproc.h>
#include "trace.h"
#define VM_STAT(x) { #x, offsetof(struct kvm, stat.x), KVM_STAT_VM }
#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
struct kvm_stats_debugfs_item debugfs_entries[] = {
VCPU_STAT(hvc_exit_stat),
VCPU_STAT(wfe_exit_stat),
VCPU_STAT(wfi_exit_stat),
VCPU_STAT(mmio_exit_user),
VCPU_STAT(mmio_exit_kernel),
VCPU_STAT(exits),
{ NULL }
};

View File

@ -23,6 +23,7 @@
#include <linux/kvm_host.h>
#include <asm/esr.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_coproc.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
@ -39,6 +40,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0),
kvm_vcpu_hvc_get_imm(vcpu));
vcpu->stat.hvc_exit_stat++;
ret = kvm_psci_call(vcpu);
if (ret < 0) {
@ -71,9 +73,11 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
vcpu->stat.wfe_exit_stat++;
kvm_vcpu_on_spin(vcpu);
} else {
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
vcpu->stat.wfi_exit_stat++;
kvm_vcpu_block(vcpu);
}

View File

@ -94,6 +94,15 @@ __do_hyp_init:
*/
mrs x5, ID_AA64MMFR0_EL1
bfi x4, x5, #16, #3
/*
* Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS bit in
* VTCR_EL2.
*/
mrs x5, ID_AA64MMFR1_EL1
ubfx x5, x5, #5, #1
lsl x5, x5, #VTCR_EL2_VS
orr x4, x4, x5
msr vtcr_el2, x4
mrs x4, mair_el1

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,14 @@
#
# Makefile for Kernel-based Virtual Machine module, HYP part
#
obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += entry.o
obj-$(CONFIG_KVM_ARM_HOST) += switch.o
obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o
obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o

View File

@ -0,0 +1,140 @@
/*
* Copyright (C) 2015 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/compiler.h>
#include <linux/kvm_host.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
#include "hyp.h"
#define read_debug(r,n) read_sysreg(r##n##_el1)
#define write_debug(v,r,n) write_sysreg(v, r##n##_el1)
#define save_debug(ptr,reg,nr) \
switch (nr) { \
case 15: ptr[15] = read_debug(reg, 15); \
case 14: ptr[14] = read_debug(reg, 14); \
case 13: ptr[13] = read_debug(reg, 13); \
case 12: ptr[12] = read_debug(reg, 12); \
case 11: ptr[11] = read_debug(reg, 11); \
case 10: ptr[10] = read_debug(reg, 10); \
case 9: ptr[9] = read_debug(reg, 9); \
case 8: ptr[8] = read_debug(reg, 8); \
case 7: ptr[7] = read_debug(reg, 7); \
case 6: ptr[6] = read_debug(reg, 6); \
case 5: ptr[5] = read_debug(reg, 5); \
case 4: ptr[4] = read_debug(reg, 4); \
case 3: ptr[3] = read_debug(reg, 3); \
case 2: ptr[2] = read_debug(reg, 2); \
case 1: ptr[1] = read_debug(reg, 1); \
default: ptr[0] = read_debug(reg, 0); \
}
#define restore_debug(ptr,reg,nr) \
switch (nr) { \
case 15: write_debug(ptr[15], reg, 15); \
case 14: write_debug(ptr[14], reg, 14); \
case 13: write_debug(ptr[13], reg, 13); \
case 12: write_debug(ptr[12], reg, 12); \
case 11: write_debug(ptr[11], reg, 11); \
case 10: write_debug(ptr[10], reg, 10); \
case 9: write_debug(ptr[9], reg, 9); \
case 8: write_debug(ptr[8], reg, 8); \
case 7: write_debug(ptr[7], reg, 7); \
case 6: write_debug(ptr[6], reg, 6); \
case 5: write_debug(ptr[5], reg, 5); \
case 4: write_debug(ptr[4], reg, 4); \
case 3: write_debug(ptr[3], reg, 3); \
case 2: write_debug(ptr[2], reg, 2); \
case 1: write_debug(ptr[1], reg, 1); \
default: write_debug(ptr[0], reg, 0); \
}
void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
struct kvm_guest_debug_arch *dbg,
struct kvm_cpu_context *ctxt)
{
u64 aa64dfr0;
int brps, wrps;
if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
return;
aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
brps = (aa64dfr0 >> 12) & 0xf;
wrps = (aa64dfr0 >> 20) & 0xf;
save_debug(dbg->dbg_bcr, dbgbcr, brps);
save_debug(dbg->dbg_bvr, dbgbvr, brps);
save_debug(dbg->dbg_wcr, dbgwcr, wrps);
save_debug(dbg->dbg_wvr, dbgwvr, wrps);
ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1);
}
void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
struct kvm_guest_debug_arch *dbg,
struct kvm_cpu_context *ctxt)
{
u64 aa64dfr0;
int brps, wrps;
if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
return;
aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
brps = (aa64dfr0 >> 12) & 0xf;
wrps = (aa64dfr0 >> 20) & 0xf;
restore_debug(dbg->dbg_bcr, dbgbcr, brps);
restore_debug(dbg->dbg_bvr, dbgbvr, brps);
restore_debug(dbg->dbg_wcr, dbgwcr, wrps);
restore_debug(dbg->dbg_wvr, dbgwvr, wrps);
write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1);
}
void __hyp_text __debug_cond_save_host_state(struct kvm_vcpu *vcpu)
{
/* If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY is set, perform
* a full save/restore cycle. */
if ((vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_KDE) ||
(vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_MDE))
vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
__debug_save_state(vcpu, &vcpu->arch.host_debug_state,
kern_hyp_va(vcpu->arch.host_cpu_context));
}
void __hyp_text __debug_cond_restore_host_state(struct kvm_vcpu *vcpu)
{
__debug_restore_state(vcpu, &vcpu->arch.host_debug_state,
kern_hyp_va(vcpu->arch.host_cpu_context));
if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY;
}
static u32 __hyp_text __debug_read_mdcr_el2(void)
{
return read_sysreg(mdcr_el2);
}
__alias(__debug_read_mdcr_el2) u32 __kvm_get_mdcr_el2(void);

160
arch/arm64/kvm/hyp/entry.S Normal file
View File

@ -0,0 +1,160 @@
/*
* Copyright (C) 2015 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/assembler.h>
#include <asm/fpsimdmacros.h>
#include <asm/kvm.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x)
#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
.text
.pushsection .hyp.text, "ax"
.macro save_callee_saved_regs ctxt
stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
stp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)]
stp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)]
stp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)]
.endm
.macro restore_callee_saved_regs ctxt
ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
ldp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)]
ldp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)]
ldp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)]
.endm
/*
* u64 __guest_enter(struct kvm_vcpu *vcpu,
* struct kvm_cpu_context *host_ctxt);
*/
ENTRY(__guest_enter)
// x0: vcpu
// x1: host/guest context
// x2-x18: clobbered by macros
// Store the host regs
save_callee_saved_regs x1
// Preserve vcpu & host_ctxt for use at exit time
stp x0, x1, [sp, #-16]!
add x1, x0, #VCPU_CONTEXT
// Prepare x0-x1 for later restore by pushing them onto the stack
ldp x2, x3, [x1, #CPU_XREG_OFFSET(0)]
stp x2, x3, [sp, #-16]!
// x2-x18
ldp x2, x3, [x1, #CPU_XREG_OFFSET(2)]
ldp x4, x5, [x1, #CPU_XREG_OFFSET(4)]
ldp x6, x7, [x1, #CPU_XREG_OFFSET(6)]
ldp x8, x9, [x1, #CPU_XREG_OFFSET(8)]
ldp x10, x11, [x1, #CPU_XREG_OFFSET(10)]
ldp x12, x13, [x1, #CPU_XREG_OFFSET(12)]
ldp x14, x15, [x1, #CPU_XREG_OFFSET(14)]
ldp x16, x17, [x1, #CPU_XREG_OFFSET(16)]
ldr x18, [x1, #CPU_XREG_OFFSET(18)]
// x19-x29, lr
restore_callee_saved_regs x1
// Last bits of the 64bit state
ldp x0, x1, [sp], #16
// Do not touch any register after this!
eret
ENDPROC(__guest_enter)
ENTRY(__guest_exit)
// x0: vcpu
// x1: return code
// x2-x3: free
// x4-x29,lr: vcpu regs
// vcpu x0-x3 on the stack
add x2, x0, #VCPU_CONTEXT
stp x4, x5, [x2, #CPU_XREG_OFFSET(4)]
stp x6, x7, [x2, #CPU_XREG_OFFSET(6)]
stp x8, x9, [x2, #CPU_XREG_OFFSET(8)]
stp x10, x11, [x2, #CPU_XREG_OFFSET(10)]
stp x12, x13, [x2, #CPU_XREG_OFFSET(12)]
stp x14, x15, [x2, #CPU_XREG_OFFSET(14)]
stp x16, x17, [x2, #CPU_XREG_OFFSET(16)]
str x18, [x2, #CPU_XREG_OFFSET(18)]
ldp x6, x7, [sp], #16 // x2, x3
ldp x4, x5, [sp], #16 // x0, x1
stp x4, x5, [x2, #CPU_XREG_OFFSET(0)]
stp x6, x7, [x2, #CPU_XREG_OFFSET(2)]
save_callee_saved_regs x2
// Restore vcpu & host_ctxt from the stack
// (preserving return code in x1)
ldp x0, x2, [sp], #16
// Now restore the host regs
restore_callee_saved_regs x2
mov x0, x1
ret
ENDPROC(__guest_exit)
ENTRY(__fpsimd_guest_restore)
stp x4, lr, [sp, #-16]!
mrs x2, cptr_el2
bic x2, x2, #CPTR_EL2_TFP
msr cptr_el2, x2
isb
mrs x3, tpidr_el2
ldr x0, [x3, #VCPU_HOST_CONTEXT]
kern_hyp_va x0
add x0, x0, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
bl __fpsimd_save_state
add x2, x3, #VCPU_CONTEXT
add x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
bl __fpsimd_restore_state
// Skip restoring fpexc32 for AArch64 guests
mrs x1, hcr_el2
tbnz x1, #HCR_RW_SHIFT, 1f
ldr x4, [x3, #VCPU_FPEXC32_EL2]
msr fpexc32_el2, x4
1:
ldp x4, lr, [sp], #16
ldp x2, x3, [sp], #16
ldp x0, x1, [sp], #16
eret
ENDPROC(__fpsimd_guest_restore)

View File

@ -0,0 +1,33 @@
/*
* Copyright (C) 2015 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/linkage.h>
#include <asm/fpsimdmacros.h>
.text
.pushsection .hyp.text, "ax"
ENTRY(__fpsimd_save_state)
fpsimd_save x0, 1
ret
ENDPROC(__fpsimd_save_state)
ENTRY(__fpsimd_restore_state)
fpsimd_restore x0, 1
ret
ENDPROC(__fpsimd_restore_state)

View File

@ -0,0 +1,212 @@
/*
* Copyright (C) 2015 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/linkage.h>
#include <asm/alternative.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/cpufeature.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
.text
.pushsection .hyp.text, "ax"
.macro save_x0_to_x3
stp x0, x1, [sp, #-16]!
stp x2, x3, [sp, #-16]!
.endm
.macro restore_x0_to_x3
ldp x2, x3, [sp], #16
ldp x0, x1, [sp], #16
.endm
el1_sync: // Guest trapped into EL2
save_x0_to_x3
mrs x1, esr_el2
lsr x2, x1, #ESR_ELx_EC_SHIFT
cmp x2, #ESR_ELx_EC_HVC64
b.ne el1_trap
mrs x3, vttbr_el2 // If vttbr is valid, the 64bit guest
cbnz x3, el1_trap // called HVC
/* Here, we're pretty sure the host called HVC. */
restore_x0_to_x3
/* Check for __hyp_get_vectors */
cbnz x0, 1f
mrs x0, vbar_el2
b 2f
1: stp lr, xzr, [sp, #-16]!
/*
* Compute the function address in EL2, and shuffle the parameters.
*/
kern_hyp_va x0
mov lr, x0
mov x0, x1
mov x1, x2
mov x2, x3
blr lr
ldp lr, xzr, [sp], #16
2: eret
el1_trap:
/*
* x1: ESR
* x2: ESR_EC
*/
/* Guest accessed VFP/SIMD registers, save host, restore Guest */
cmp x2, #ESR_ELx_EC_FP_ASIMD
b.eq __fpsimd_guest_restore
cmp x2, #ESR_ELx_EC_DABT_LOW
mov x0, #ESR_ELx_EC_IABT_LOW
ccmp x2, x0, #4, ne
b.ne 1f // Not an abort we care about
/* This is an abort. Check for permission fault */
alternative_if_not ARM64_WORKAROUND_834220
and x2, x1, #ESR_ELx_FSC_TYPE
cmp x2, #FSC_PERM
b.ne 1f // Not a permission fault
alternative_else
nop // Use the permission fault path to
nop // check for a valid S1 translation,
nop // regardless of the ESR value.
alternative_endif
/*
* Check for Stage-1 page table walk, which is guaranteed
* to give a valid HPFAR_EL2.
*/
tbnz x1, #7, 1f // S1PTW is set
/* Preserve PAR_EL1 */
mrs x3, par_el1
stp x3, xzr, [sp, #-16]!
/*
* Permission fault, HPFAR_EL2 is invalid.
* Resolve the IPA the hard way using the guest VA.
* Stage-1 translation already validated the memory access rights.
* As such, we can use the EL1 translation regime, and don't have
* to distinguish between EL0 and EL1 access.
*/
mrs x2, far_el2
at s1e1r, x2
isb
/* Read result */
mrs x3, par_el1
ldp x0, xzr, [sp], #16 // Restore PAR_EL1 from the stack
msr par_el1, x0
tbnz x3, #0, 3f // Bail out if we failed the translation
ubfx x3, x3, #12, #36 // Extract IPA
lsl x3, x3, #4 // and present it like HPFAR
b 2f
1: mrs x3, hpfar_el2
mrs x2, far_el2
2: mrs x0, tpidr_el2
str w1, [x0, #VCPU_ESR_EL2]
str x2, [x0, #VCPU_FAR_EL2]
str x3, [x0, #VCPU_HPFAR_EL2]
mov x1, #ARM_EXCEPTION_TRAP
b __guest_exit
/*
* Translation failed. Just return to the guest and
* let it fault again. Another CPU is probably playing
* behind our back.
*/
3: restore_x0_to_x3
eret
el1_irq:
save_x0_to_x3
mrs x0, tpidr_el2
mov x1, #ARM_EXCEPTION_IRQ
b __guest_exit
ENTRY(__hyp_do_panic)
mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
PSR_MODE_EL1h)
msr spsr_el2, lr
ldr lr, =panic
msr elr_el2, lr
eret
ENDPROC(__hyp_do_panic)
.macro invalid_vector label, target = __hyp_panic
.align 2
\label:
b \target
ENDPROC(\label)
.endm
/* None of these should ever happen */
invalid_vector el2t_sync_invalid
invalid_vector el2t_irq_invalid
invalid_vector el2t_fiq_invalid
invalid_vector el2t_error_invalid
invalid_vector el2h_sync_invalid
invalid_vector el2h_irq_invalid
invalid_vector el2h_fiq_invalid
invalid_vector el2h_error_invalid
invalid_vector el1_sync_invalid
invalid_vector el1_irq_invalid
invalid_vector el1_fiq_invalid
invalid_vector el1_error_invalid
.ltorg
.align 11
ENTRY(__kvm_hyp_vector)
ventry el2t_sync_invalid // Synchronous EL2t
ventry el2t_irq_invalid // IRQ EL2t
ventry el2t_fiq_invalid // FIQ EL2t
ventry el2t_error_invalid // Error EL2t
ventry el2h_sync_invalid // Synchronous EL2h
ventry el2h_irq_invalid // IRQ EL2h
ventry el2h_fiq_invalid // FIQ EL2h
ventry el2h_error_invalid // Error EL2h
ventry el1_sync // Synchronous 64-bit EL1
ventry el1_irq // IRQ 64-bit EL1
ventry el1_fiq_invalid // FIQ 64-bit EL1
ventry el1_error_invalid // Error 64-bit EL1
ventry el1_sync // Synchronous 32-bit EL1
ventry el1_irq // IRQ 32-bit EL1
ventry el1_fiq_invalid // FIQ 32-bit EL1
ventry el1_error_invalid // Error 32-bit EL1
ENDPROC(__kvm_hyp_vector)

90
arch/arm64/kvm/hyp/hyp.h Normal file
View File

@ -0,0 +1,90 @@
/*
* Copyright (C) 2015 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ARM64_KVM_HYP_H__
#define __ARM64_KVM_HYP_H__
#include <linux/compiler.h>
#include <linux/kvm_host.h>
#include <asm/kvm_mmu.h>
#include <asm/sysreg.h>
#define __hyp_text __section(.hyp.text) notrace
#define kern_hyp_va(v) (typeof(v))((unsigned long)(v) & HYP_PAGE_OFFSET_MASK)
#define hyp_kern_va(v) (typeof(v))((unsigned long)(v) - HYP_PAGE_OFFSET \
+ PAGE_OFFSET)
/**
* hyp_alternate_select - Generates patchable code sequences that are
* used to switch between two implementations of a function, depending
* on the availability of a feature.
*
* @fname: a symbol name that will be defined as a function returning a
* function pointer whose type will match @orig and @alt
* @orig: A pointer to the default function, as returned by @fname when
* @cond doesn't hold
* @alt: A pointer to the alternate function, as returned by @fname
* when @cond holds
* @cond: a CPU feature (as described in asm/cpufeature.h)
*/
#define hyp_alternate_select(fname, orig, alt, cond) \
typeof(orig) * __hyp_text fname(void) \
{ \
typeof(alt) *val = orig; \
asm volatile(ALTERNATIVE("nop \n", \
"mov %0, %1 \n", \
cond) \
: "+r" (val) : "r" (alt)); \
return val; \
}
void __vgic_v2_save_state(struct kvm_vcpu *vcpu);
void __vgic_v2_restore_state(struct kvm_vcpu *vcpu);
void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
void __timer_save_state(struct kvm_vcpu *vcpu);
void __timer_restore_state(struct kvm_vcpu *vcpu);
void __sysreg_save_state(struct kvm_cpu_context *ctxt);
void __sysreg_restore_state(struct kvm_cpu_context *ctxt);
void __sysreg32_save_state(struct kvm_vcpu *vcpu);
void __sysreg32_restore_state(struct kvm_vcpu *vcpu);
void __debug_save_state(struct kvm_vcpu *vcpu,
struct kvm_guest_debug_arch *dbg,
struct kvm_cpu_context *ctxt);
void __debug_restore_state(struct kvm_vcpu *vcpu,
struct kvm_guest_debug_arch *dbg,
struct kvm_cpu_context *ctxt);
void __debug_cond_save_host_state(struct kvm_vcpu *vcpu);
void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu);
void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
static inline bool __fpsimd_enabled(void)
{
return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP);
}
u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
void __noreturn __hyp_do_panic(unsigned long, ...);
#endif /* __ARM64_KVM_HYP_H__ */

175
arch/arm64/kvm/hyp/switch.c Normal file
View File

@ -0,0 +1,175 @@
/*
* Copyright (C) 2015 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "hyp.h"
static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
{
u64 val;
/*
* We are about to set CPTR_EL2.TFP to trap all floating point
* register accesses to EL2, however, the ARM ARM clearly states that
* traps are only taken to EL2 if the operation would not otherwise
* trap to EL1. Therefore, always make sure that for 32-bit guests,
* we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
*/
val = vcpu->arch.hcr_el2;
if (!(val & HCR_RW)) {
write_sysreg(1 << 30, fpexc32_el2);
isb();
}
write_sysreg(val, hcr_el2);
/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
write_sysreg(1 << 15, hstr_el2);
write_sysreg(CPTR_EL2_TTA | CPTR_EL2_TFP, cptr_el2);
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
}
static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
{
write_sysreg(HCR_RW, hcr_el2);
write_sysreg(0, hstr_el2);
write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2);
write_sysreg(0, cptr_el2);
}
static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
write_sysreg(kvm->arch.vttbr, vttbr_el2);
}
static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
{
write_sysreg(0, vttbr_el2);
}
static hyp_alternate_select(__vgic_call_save_state,
__vgic_v2_save_state, __vgic_v3_save_state,
ARM64_HAS_SYSREG_GIC_CPUIF);
static hyp_alternate_select(__vgic_call_restore_state,
__vgic_v2_restore_state, __vgic_v3_restore_state,
ARM64_HAS_SYSREG_GIC_CPUIF);
static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu)
{
__vgic_call_save_state()(vcpu);
write_sysreg(read_sysreg(hcr_el2) & ~HCR_INT_OVERRIDE, hcr_el2);
}
static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
{
u64 val;
val = read_sysreg(hcr_el2);
val |= HCR_INT_OVERRIDE;
val |= vcpu->arch.irq_lines;
write_sysreg(val, hcr_el2);
__vgic_call_restore_state()(vcpu);
}
static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *host_ctxt;
struct kvm_cpu_context *guest_ctxt;
bool fp_enabled;
u64 exit_code;
vcpu = kern_hyp_va(vcpu);
write_sysreg(vcpu, tpidr_el2);
host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
guest_ctxt = &vcpu->arch.ctxt;
__sysreg_save_state(host_ctxt);
__debug_cond_save_host_state(vcpu);
__activate_traps(vcpu);
__activate_vm(vcpu);
__vgic_restore_state(vcpu);
__timer_restore_state(vcpu);
/*
* We must restore the 32-bit state before the sysregs, thanks
* to Cortex-A57 erratum #852523.
*/
__sysreg32_restore_state(vcpu);
__sysreg_restore_state(guest_ctxt);
__debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
/* Jump in the fire! */
exit_code = __guest_enter(vcpu, host_ctxt);
/* And we're baaack! */
fp_enabled = __fpsimd_enabled();
__sysreg_save_state(guest_ctxt);
__sysreg32_save_state(vcpu);
__timer_save_state(vcpu);
__vgic_save_state(vcpu);
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
__sysreg_restore_state(host_ctxt);
if (fp_enabled) {
__fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
__fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs);
}
__debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
__debug_cond_restore_host_state(vcpu);
return exit_code;
}
__alias(__guest_run) int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
void __hyp_text __noreturn __hyp_panic(void)
{
unsigned long str_va = (unsigned long)__hyp_panic_string;
u64 spsr = read_sysreg(spsr_el2);
u64 elr = read_sysreg(elr_el2);
u64 par = read_sysreg(par_el1);
if (read_sysreg(vttbr_el2)) {
struct kvm_vcpu *vcpu;
struct kvm_cpu_context *host_ctxt;
vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2);
host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
__sysreg_restore_state(host_ctxt);
}
/* Call panic for real */
__hyp_do_panic(hyp_kern_va(str_va),
spsr, elr,
read_sysreg(esr_el2), read_sysreg(far_el2),
read_sysreg(hpfar_el2), par,
(void *)read_sysreg(tpidr_el2));
unreachable();
}

View File

@ -0,0 +1,138 @@
/*
* Copyright (C) 2012-2015 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/compiler.h>
#include <linux/kvm_host.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
#include "hyp.h"
/* ctxt is already in the HYP VA space */
void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
{
ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2);
ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1);
ctxt->sys_regs[SCTLR_EL1] = read_sysreg(sctlr_el1);
ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1);
ctxt->sys_regs[CPACR_EL1] = read_sysreg(cpacr_el1);
ctxt->sys_regs[TTBR0_EL1] = read_sysreg(ttbr0_el1);
ctxt->sys_regs[TTBR1_EL1] = read_sysreg(ttbr1_el1);
ctxt->sys_regs[TCR_EL1] = read_sysreg(tcr_el1);
ctxt->sys_regs[ESR_EL1] = read_sysreg(esr_el1);
ctxt->sys_regs[AFSR0_EL1] = read_sysreg(afsr0_el1);
ctxt->sys_regs[AFSR1_EL1] = read_sysreg(afsr1_el1);
ctxt->sys_regs[FAR_EL1] = read_sysreg(far_el1);
ctxt->sys_regs[MAIR_EL1] = read_sysreg(mair_el1);
ctxt->sys_regs[VBAR_EL1] = read_sysreg(vbar_el1);
ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg(contextidr_el1);
ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0);
ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0);
ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1);
ctxt->sys_regs[AMAIR_EL1] = read_sysreg(amair_el1);
ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg(cntkctl_el1);
ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1);
ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1);
ctxt->gp_regs.regs.sp = read_sysreg(sp_el0);
ctxt->gp_regs.regs.pc = read_sysreg(elr_el2);
ctxt->gp_regs.regs.pstate = read_sysreg(spsr_el2);
ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1);
ctxt->gp_regs.elr_el1 = read_sysreg(elr_el1);
ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg(spsr_el1);
}
void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
{
write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2);
write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1);
write_sysreg(ctxt->sys_regs[SCTLR_EL1], sctlr_el1);
write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
write_sysreg(ctxt->sys_regs[CPACR_EL1], cpacr_el1);
write_sysreg(ctxt->sys_regs[TTBR0_EL1], ttbr0_el1);
write_sysreg(ctxt->sys_regs[TTBR1_EL1], ttbr1_el1);
write_sysreg(ctxt->sys_regs[TCR_EL1], tcr_el1);
write_sysreg(ctxt->sys_regs[ESR_EL1], esr_el1);
write_sysreg(ctxt->sys_regs[AFSR0_EL1], afsr0_el1);
write_sysreg(ctxt->sys_regs[AFSR1_EL1], afsr1_el1);
write_sysreg(ctxt->sys_regs[FAR_EL1], far_el1);
write_sysreg(ctxt->sys_regs[MAIR_EL1], mair_el1);
write_sysreg(ctxt->sys_regs[VBAR_EL1], vbar_el1);
write_sysreg(ctxt->sys_regs[CONTEXTIDR_EL1], contextidr_el1);
write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
write_sysreg(ctxt->sys_regs[AMAIR_EL1], amair_el1);
write_sysreg(ctxt->sys_regs[CNTKCTL_EL1], cntkctl_el1);
write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1);
write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1);
write_sysreg(ctxt->gp_regs.regs.sp, sp_el0);
write_sysreg(ctxt->gp_regs.regs.pc, elr_el2);
write_sysreg(ctxt->gp_regs.regs.pstate, spsr_el2);
write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
write_sysreg(ctxt->gp_regs.elr_el1, elr_el1);
write_sysreg(ctxt->gp_regs.spsr[KVM_SPSR_EL1], spsr_el1);
}
void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu)
{
u64 *spsr, *sysreg;
if (read_sysreg(hcr_el2) & HCR_RW)
return;
spsr = vcpu->arch.ctxt.gp_regs.spsr;
sysreg = vcpu->arch.ctxt.sys_regs;
spsr[KVM_SPSR_ABT] = read_sysreg(spsr_abt);
spsr[KVM_SPSR_UND] = read_sysreg(spsr_und);
spsr[KVM_SPSR_IRQ] = read_sysreg(spsr_irq);
spsr[KVM_SPSR_FIQ] = read_sysreg(spsr_fiq);
sysreg[DACR32_EL2] = read_sysreg(dacr32_el2);
sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2);
if (__fpsimd_enabled())
sysreg[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2);
}
void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
{
u64 *spsr, *sysreg;
if (read_sysreg(hcr_el2) & HCR_RW)
return;
spsr = vcpu->arch.ctxt.gp_regs.spsr;
sysreg = vcpu->arch.ctxt.sys_regs;
write_sysreg(spsr[KVM_SPSR_ABT], spsr_abt);
write_sysreg(spsr[KVM_SPSR_UND], spsr_und);
write_sysreg(spsr[KVM_SPSR_IRQ], spsr_irq);
write_sysreg(spsr[KVM_SPSR_FIQ], spsr_fiq);
write_sysreg(sysreg[DACR32_EL2], dacr32_el2);
write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2);
if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2);
}

View File

@ -0,0 +1,71 @@
/*
* Copyright (C) 2012-2015 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <clocksource/arm_arch_timer.h>
#include <linux/compiler.h>
#include <linux/kvm_host.h>
#include <asm/kvm_mmu.h>
#include "hyp.h"
/* vcpu is already in the HYP VA space */
void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
u64 val;
if (kvm->arch.timer.enabled) {
timer->cntv_ctl = read_sysreg(cntv_ctl_el0);
timer->cntv_cval = read_sysreg(cntv_cval_el0);
}
/* Disable the virtual timer */
write_sysreg(0, cntv_ctl_el0);
/* Allow physical timer/counter access for the host */
val = read_sysreg(cnthctl_el2);
val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
write_sysreg(val, cnthctl_el2);
/* Clear cntvoff for the host */
write_sysreg(0, cntvoff_el2);
}
void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
u64 val;
/*
* Disallow physical timer access for the guest
* Physical counter access is allowed
*/
val = read_sysreg(cnthctl_el2);
val &= ~CNTHCTL_EL1PCEN;
val |= CNTHCTL_EL1PCTEN;
write_sysreg(val, cnthctl_el2);
if (kvm->arch.timer.enabled) {
write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2);
write_sysreg(timer->cntv_cval, cntv_cval_el0);
isb();
write_sysreg(timer->cntv_ctl, cntv_ctl_el0);
}
}

80
arch/arm64/kvm/hyp/tlb.c Normal file
View File

@ -0,0 +1,80 @@
/*
* Copyright (C) 2015 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "hyp.h"
static void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
{
dsb(ishst);
/* Switch to requested VMID */
kvm = kern_hyp_va(kvm);
write_sysreg(kvm->arch.vttbr, vttbr_el2);
isb();
/*
* We could do so much better if we had the VA as well.
* Instead, we invalidate Stage-2 for this IPA, and the
* whole of Stage-1. Weep...
*/
ipa >>= 12;
asm volatile("tlbi ipas2e1is, %0" : : "r" (ipa));
/*
* We have to ensure completion of the invalidation at Stage-2,
* since a table walk on another CPU could refill a TLB with a
* complete (S1 + S2) walk based on the old Stage-2 mapping if
* the Stage-1 invalidation happened first.
*/
dsb(ish);
asm volatile("tlbi vmalle1is" : : );
dsb(ish);
isb();
write_sysreg(0, vttbr_el2);
}
__alias(__tlb_flush_vmid_ipa) void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm,
phys_addr_t ipa);
static void __hyp_text __tlb_flush_vmid(struct kvm *kvm)
{
dsb(ishst);
/* Switch to requested VMID */
kvm = kern_hyp_va(kvm);
write_sysreg(kvm->arch.vttbr, vttbr_el2);
isb();
asm volatile("tlbi vmalls12e1is" : : );
dsb(ish);
isb();
write_sysreg(0, vttbr_el2);
}
__alias(__tlb_flush_vmid) void __kvm_tlb_flush_vmid(struct kvm *kvm);
static void __hyp_text __tlb_flush_vm_context(void)
{
dsb(ishst);
asm volatile("tlbi alle1is \n"
"ic ialluis ": : );
dsb(ish);
}
__alias(__tlb_flush_vm_context) void __kvm_flush_vm_context(void);

View File

@ -0,0 +1,84 @@
/*
* Copyright (C) 2012-2015 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/compiler.h>
#include <linux/irqchip/arm-gic.h>
#include <linux/kvm_host.h>
#include <asm/kvm_mmu.h>
#include "hyp.h"
/* vcpu is already in the HYP VA space */
void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
struct vgic_dist *vgic = &kvm->arch.vgic;
void __iomem *base = kern_hyp_va(vgic->vctrl_base);
u32 eisr0, eisr1, elrsr0, elrsr1;
int i, nr_lr;
if (!base)
return;
nr_lr = vcpu->arch.vgic_cpu.nr_lr;
cpu_if->vgic_vmcr = readl_relaxed(base + GICH_VMCR);
cpu_if->vgic_misr = readl_relaxed(base + GICH_MISR);
eisr0 = readl_relaxed(base + GICH_EISR0);
elrsr0 = readl_relaxed(base + GICH_ELRSR0);
if (unlikely(nr_lr > 32)) {
eisr1 = readl_relaxed(base + GICH_EISR1);
elrsr1 = readl_relaxed(base + GICH_ELRSR1);
} else {
eisr1 = elrsr1 = 0;
}
#ifdef CONFIG_CPU_BIG_ENDIAN
cpu_if->vgic_eisr = ((u64)eisr0 << 32) | eisr1;
cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1;
#else
cpu_if->vgic_eisr = ((u64)eisr1 << 32) | eisr0;
cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0;
#endif
cpu_if->vgic_apr = readl_relaxed(base + GICH_APR);
writel_relaxed(0, base + GICH_HCR);
for (i = 0; i < nr_lr; i++)
cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4));
}
/* vcpu is already in the HYP VA space */
void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
struct vgic_dist *vgic = &kvm->arch.vgic;
void __iomem *base = kern_hyp_va(vgic->vctrl_base);
int i, nr_lr;
if (!base)
return;
writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR);
writel_relaxed(cpu_if->vgic_vmcr, base + GICH_VMCR);
writel_relaxed(cpu_if->vgic_apr, base + GICH_APR);
nr_lr = vcpu->arch.vgic_cpu.nr_lr;
for (i = 0; i < nr_lr; i++)
writel_relaxed(cpu_if->vgic_lr[i], base + GICH_LR0 + (i * 4));
}

View File

@ -0,0 +1,228 @@
/*
* Copyright (C) 2012-2015 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/compiler.h>
#include <linux/irqchip/arm-gic-v3.h>
#include <linux/kvm_host.h>
#include <asm/kvm_mmu.h>
#include "hyp.h"
#define vtr_to_max_lr_idx(v) ((v) & 0xf)
#define vtr_to_nr_pri_bits(v) (((u32)(v) >> 29) + 1)
#define read_gicreg(r) \
({ \
u64 reg; \
asm volatile("mrs_s %0, " __stringify(r) : "=r" (reg)); \
reg; \
})
#define write_gicreg(v,r) \
do { \
u64 __val = (v); \
asm volatile("msr_s " __stringify(r) ", %0" : : "r" (__val));\
} while (0)
/* vcpu is already in the HYP VA space */
void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
u64 val;
u32 max_lr_idx, nr_pri_bits;
/*
* Make sure stores to the GIC via the memory mapped interface
* are now visible to the system register interface.
*/
dsb(st);
cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2);
cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2);
cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
write_gicreg(0, ICH_HCR_EL2);
val = read_gicreg(ICH_VTR_EL2);
max_lr_idx = vtr_to_max_lr_idx(val);
nr_pri_bits = vtr_to_nr_pri_bits(val);
switch (max_lr_idx) {
case 15:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)] = read_gicreg(ICH_LR15_EL2);
case 14:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)] = read_gicreg(ICH_LR14_EL2);
case 13:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)] = read_gicreg(ICH_LR13_EL2);
case 12:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)] = read_gicreg(ICH_LR12_EL2);
case 11:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)] = read_gicreg(ICH_LR11_EL2);
case 10:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)] = read_gicreg(ICH_LR10_EL2);
case 9:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)] = read_gicreg(ICH_LR9_EL2);
case 8:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)] = read_gicreg(ICH_LR8_EL2);
case 7:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)] = read_gicreg(ICH_LR7_EL2);
case 6:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)] = read_gicreg(ICH_LR6_EL2);
case 5:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)] = read_gicreg(ICH_LR5_EL2);
case 4:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)] = read_gicreg(ICH_LR4_EL2);
case 3:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)] = read_gicreg(ICH_LR3_EL2);
case 2:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)] = read_gicreg(ICH_LR2_EL2);
case 1:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)] = read_gicreg(ICH_LR1_EL2);
case 0:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)] = read_gicreg(ICH_LR0_EL2);
}
switch (nr_pri_bits) {
case 7:
cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2);
cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2);
case 6:
cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2);
default:
cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2);
}
switch (nr_pri_bits) {
case 7:
cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2);
cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2);
case 6:
cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2);
default:
cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2);
}
val = read_gicreg(ICC_SRE_EL2);
write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2);
isb(); /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */
write_gicreg(1, ICC_SRE_EL1);
}
void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
u64 val;
u32 max_lr_idx, nr_pri_bits;
/*
* VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a
* Group0 interrupt (as generated in GICv2 mode) to be
* delivered as a FIQ to the guest, with potentially fatal
* consequences. So we must make sure that ICC_SRE_EL1 has
* been actually programmed with the value we want before
* starting to mess with the rest of the GIC.
*/
write_gicreg(cpu_if->vgic_sre, ICC_SRE_EL1);
isb();
write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
val = read_gicreg(ICH_VTR_EL2);
max_lr_idx = vtr_to_max_lr_idx(val);
nr_pri_bits = vtr_to_nr_pri_bits(val);
switch (nr_pri_bits) {
case 7:
write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
case 6:
write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
default:
write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
}
switch (nr_pri_bits) {
case 7:
write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2);
write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2);
case 6:
write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2);
default:
write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2);
}
switch (max_lr_idx) {
case 15:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)], ICH_LR15_EL2);
case 14:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)], ICH_LR14_EL2);
case 13:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)], ICH_LR13_EL2);
case 12:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)], ICH_LR12_EL2);
case 11:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)], ICH_LR11_EL2);
case 10:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)], ICH_LR10_EL2);
case 9:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)], ICH_LR9_EL2);
case 8:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)], ICH_LR8_EL2);
case 7:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)], ICH_LR7_EL2);
case 6:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)], ICH_LR6_EL2);
case 5:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)], ICH_LR5_EL2);
case 4:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)], ICH_LR4_EL2);
case 3:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)], ICH_LR3_EL2);
case 2:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)], ICH_LR2_EL2);
case 1:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)], ICH_LR1_EL2);
case 0:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)], ICH_LR0_EL2);
}
/*
* Ensures that the above will have reached the
* (re)distributors. This ensure the guest will read the
* correct values from the memory-mapped interface.
*/
isb();
dsb(sy);
/*
* Prevent the guest from touching the GIC system registers if
* SRE isn't enabled for GICv3 emulation.
*/
if (!cpu_if->vgic_sre) {
write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
ICC_SRE_EL2);
}
}
static u64 __hyp_text __vgic_v3_read_ich_vtr_el2(void)
{
return read_gicreg(ICH_VTR_EL2);
}
__alias(__vgic_v3_read_ich_vtr_el2) u64 __vgic_v3_get_ich_vtr_el2(void);

View File

@ -29,6 +29,7 @@
#include <asm/debug-monitors.h>
#include <asm/esr.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_coproc.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_host.h>
@ -219,9 +220,9 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
* All writes will set the KVM_ARM64_DEBUG_DIRTY flag to ensure the
* hyp.S code switches between host and guest values in future.
*/
static inline void reg_to_dbg(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
u64 *dbg_reg)
static void reg_to_dbg(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
u64 *dbg_reg)
{
u64 val = p->regval;
@ -234,18 +235,18 @@ static inline void reg_to_dbg(struct kvm_vcpu *vcpu,
vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
}
static inline void dbg_to_reg(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
u64 *dbg_reg)
static void dbg_to_reg(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
u64 *dbg_reg)
{
p->regval = *dbg_reg;
if (p->is_32bit)
p->regval &= 0xffffffffUL;
}
static inline bool trap_bvr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *rd)
static bool trap_bvr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *rd)
{
u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
@ -279,15 +280,15 @@ static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
return 0;
}
static inline void reset_bvr(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
static void reset_bvr(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val;
}
static inline bool trap_bcr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *rd)
static bool trap_bcr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *rd)
{
u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
@ -322,15 +323,15 @@ static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
return 0;
}
static inline void reset_bcr(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
static void reset_bcr(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val;
}
static inline bool trap_wvr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *rd)
static bool trap_wvr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *rd)
{
u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
@ -365,15 +366,15 @@ static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
return 0;
}
static inline void reset_wvr(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
static void reset_wvr(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val;
}
static inline bool trap_wcr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *rd)
static bool trap_wcr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *rd)
{
u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
@ -407,8 +408,8 @@ static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
return 0;
}
static inline void reset_wcr(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
static void reset_wcr(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val;
}
@ -722,9 +723,9 @@ static bool trap_debug32(struct kvm_vcpu *vcpu,
* system is in.
*/
static inline bool trap_xvr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *rd)
static bool trap_xvr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *rd)
{
u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];

View File

@ -1,134 +0,0 @@
/*
* Copyright (C) 2012,2013 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/linkage.h>
#include <linux/irqchip/arm-gic.h>
#include <asm/assembler.h>
#include <asm/memory.h>
#include <asm/asm-offsets.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_mmu.h>
.text
.pushsection .hyp.text, "ax"
/*
* Save the VGIC CPU state into memory
* x0: Register pointing to VCPU struct
* Do not corrupt x1!!!
*/
ENTRY(__save_vgic_v2_state)
__save_vgic_v2_state:
/* Get VGIC VCTRL base into x2 */
ldr x2, [x0, #VCPU_KVM]
kern_hyp_va x2
ldr x2, [x2, #KVM_VGIC_VCTRL]
kern_hyp_va x2
cbz x2, 2f // disabled
/* Compute the address of struct vgic_cpu */
add x3, x0, #VCPU_VGIC_CPU
/* Save all interesting registers */
ldr w5, [x2, #GICH_VMCR]
ldr w6, [x2, #GICH_MISR]
ldr w7, [x2, #GICH_EISR0]
ldr w8, [x2, #GICH_EISR1]
ldr w9, [x2, #GICH_ELRSR0]
ldr w10, [x2, #GICH_ELRSR1]
ldr w11, [x2, #GICH_APR]
CPU_BE( rev w5, w5 )
CPU_BE( rev w6, w6 )
CPU_BE( rev w7, w7 )
CPU_BE( rev w8, w8 )
CPU_BE( rev w9, w9 )
CPU_BE( rev w10, w10 )
CPU_BE( rev w11, w11 )
str w5, [x3, #VGIC_V2_CPU_VMCR]
str w6, [x3, #VGIC_V2_CPU_MISR]
CPU_LE( str w7, [x3, #VGIC_V2_CPU_EISR] )
CPU_LE( str w8, [x3, #(VGIC_V2_CPU_EISR + 4)] )
CPU_LE( str w9, [x3, #VGIC_V2_CPU_ELRSR] )
CPU_LE( str w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] )
CPU_BE( str w7, [x3, #(VGIC_V2_CPU_EISR + 4)] )
CPU_BE( str w8, [x3, #VGIC_V2_CPU_EISR] )
CPU_BE( str w9, [x3, #(VGIC_V2_CPU_ELRSR + 4)] )
CPU_BE( str w10, [x3, #VGIC_V2_CPU_ELRSR] )
str w11, [x3, #VGIC_V2_CPU_APR]
/* Clear GICH_HCR */
str wzr, [x2, #GICH_HCR]
/* Save list registers */
add x2, x2, #GICH_LR0
ldr w4, [x3, #VGIC_CPU_NR_LR]
add x3, x3, #VGIC_V2_CPU_LR
1: ldr w5, [x2], #4
CPU_BE( rev w5, w5 )
str w5, [x3], #4
sub w4, w4, #1
cbnz w4, 1b
2:
ret
ENDPROC(__save_vgic_v2_state)
/*
* Restore the VGIC CPU state from memory
* x0: Register pointing to VCPU struct
*/
ENTRY(__restore_vgic_v2_state)
__restore_vgic_v2_state:
/* Get VGIC VCTRL base into x2 */
ldr x2, [x0, #VCPU_KVM]
kern_hyp_va x2
ldr x2, [x2, #KVM_VGIC_VCTRL]
kern_hyp_va x2
cbz x2, 2f // disabled
/* Compute the address of struct vgic_cpu */
add x3, x0, #VCPU_VGIC_CPU
/* We only restore a minimal set of registers */
ldr w4, [x3, #VGIC_V2_CPU_HCR]
ldr w5, [x3, #VGIC_V2_CPU_VMCR]
ldr w6, [x3, #VGIC_V2_CPU_APR]
CPU_BE( rev w4, w4 )
CPU_BE( rev w5, w5 )
CPU_BE( rev w6, w6 )
str w4, [x2, #GICH_HCR]
str w5, [x2, #GICH_VMCR]
str w6, [x2, #GICH_APR]
/* Restore list registers */
add x2, x2, #GICH_LR0
ldr w4, [x3, #VGIC_CPU_NR_LR]
add x3, x3, #VGIC_V2_CPU_LR
1: ldr w5, [x3], #4
CPU_BE( rev w5, w5 )
str w5, [x2], #4
sub w4, w4, #1
cbnz w4, 1b
2:
ret
ENDPROC(__restore_vgic_v2_state)
.popsection

View File

@ -1,269 +0,0 @@
/*
* Copyright (C) 2012,2013 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/linkage.h>
#include <linux/irqchip/arm-gic-v3.h>
#include <asm/assembler.h>
#include <asm/memory.h>
#include <asm/asm-offsets.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_arm.h>
.text
.pushsection .hyp.text, "ax"
/*
* We store LRs in reverse order to let the CPU deal with streaming
* access. Use this macro to make it look saner...
*/
#define LR_OFFSET(n) (VGIC_V3_CPU_LR + (15 - n) * 8)
/*
* Save the VGIC CPU state into memory
* x0: Register pointing to VCPU struct
* Do not corrupt x1!!!
*/
.macro save_vgic_v3_state
// Compute the address of struct vgic_cpu
add x3, x0, #VCPU_VGIC_CPU
// Make sure stores to the GIC via the memory mapped interface
// are now visible to the system register interface
dsb st
// Save all interesting registers
mrs_s x5, ICH_VMCR_EL2
mrs_s x6, ICH_MISR_EL2
mrs_s x7, ICH_EISR_EL2
mrs_s x8, ICH_ELSR_EL2
str w5, [x3, #VGIC_V3_CPU_VMCR]
str w6, [x3, #VGIC_V3_CPU_MISR]
str w7, [x3, #VGIC_V3_CPU_EISR]
str w8, [x3, #VGIC_V3_CPU_ELRSR]
msr_s ICH_HCR_EL2, xzr
mrs_s x21, ICH_VTR_EL2
mvn w22, w21
ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4
adr x24, 1f
add x24, x24, x23
br x24
1:
mrs_s x20, ICH_LR15_EL2
mrs_s x19, ICH_LR14_EL2
mrs_s x18, ICH_LR13_EL2
mrs_s x17, ICH_LR12_EL2
mrs_s x16, ICH_LR11_EL2
mrs_s x15, ICH_LR10_EL2
mrs_s x14, ICH_LR9_EL2
mrs_s x13, ICH_LR8_EL2
mrs_s x12, ICH_LR7_EL2
mrs_s x11, ICH_LR6_EL2
mrs_s x10, ICH_LR5_EL2
mrs_s x9, ICH_LR4_EL2
mrs_s x8, ICH_LR3_EL2
mrs_s x7, ICH_LR2_EL2
mrs_s x6, ICH_LR1_EL2
mrs_s x5, ICH_LR0_EL2
adr x24, 1f
add x24, x24, x23
br x24
1:
str x20, [x3, #LR_OFFSET(15)]
str x19, [x3, #LR_OFFSET(14)]
str x18, [x3, #LR_OFFSET(13)]
str x17, [x3, #LR_OFFSET(12)]
str x16, [x3, #LR_OFFSET(11)]
str x15, [x3, #LR_OFFSET(10)]
str x14, [x3, #LR_OFFSET(9)]
str x13, [x3, #LR_OFFSET(8)]
str x12, [x3, #LR_OFFSET(7)]
str x11, [x3, #LR_OFFSET(6)]
str x10, [x3, #LR_OFFSET(5)]
str x9, [x3, #LR_OFFSET(4)]
str x8, [x3, #LR_OFFSET(3)]
str x7, [x3, #LR_OFFSET(2)]
str x6, [x3, #LR_OFFSET(1)]
str x5, [x3, #LR_OFFSET(0)]
tbnz w21, #29, 6f // 6 bits
tbz w21, #30, 5f // 5 bits
// 7 bits
mrs_s x20, ICH_AP0R3_EL2
str w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
mrs_s x19, ICH_AP0R2_EL2
str w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
6: mrs_s x18, ICH_AP0R1_EL2
str w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
5: mrs_s x17, ICH_AP0R0_EL2
str w17, [x3, #VGIC_V3_CPU_AP0R]
tbnz w21, #29, 6f // 6 bits
tbz w21, #30, 5f // 5 bits
// 7 bits
mrs_s x20, ICH_AP1R3_EL2
str w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
mrs_s x19, ICH_AP1R2_EL2
str w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
6: mrs_s x18, ICH_AP1R1_EL2
str w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
5: mrs_s x17, ICH_AP1R0_EL2
str w17, [x3, #VGIC_V3_CPU_AP1R]
// Restore SRE_EL1 access and re-enable SRE at EL1.
mrs_s x5, ICC_SRE_EL2
orr x5, x5, #ICC_SRE_EL2_ENABLE
msr_s ICC_SRE_EL2, x5
isb
mov x5, #1
msr_s ICC_SRE_EL1, x5
.endm
/*
* Restore the VGIC CPU state from memory
* x0: Register pointing to VCPU struct
*/
.macro restore_vgic_v3_state
// Compute the address of struct vgic_cpu
add x3, x0, #VCPU_VGIC_CPU
// Restore all interesting registers
ldr w4, [x3, #VGIC_V3_CPU_HCR]
ldr w5, [x3, #VGIC_V3_CPU_VMCR]
ldr w25, [x3, #VGIC_V3_CPU_SRE]
msr_s ICC_SRE_EL1, x25
// make sure SRE is valid before writing the other registers
isb
msr_s ICH_HCR_EL2, x4
msr_s ICH_VMCR_EL2, x5
mrs_s x21, ICH_VTR_EL2
tbnz w21, #29, 6f // 6 bits
tbz w21, #30, 5f // 5 bits
// 7 bits
ldr w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
msr_s ICH_AP1R3_EL2, x20
ldr w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
msr_s ICH_AP1R2_EL2, x19
6: ldr w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
msr_s ICH_AP1R1_EL2, x18
5: ldr w17, [x3, #VGIC_V3_CPU_AP1R]
msr_s ICH_AP1R0_EL2, x17
tbnz w21, #29, 6f // 6 bits
tbz w21, #30, 5f // 5 bits
// 7 bits
ldr w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
msr_s ICH_AP0R3_EL2, x20
ldr w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
msr_s ICH_AP0R2_EL2, x19
6: ldr w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
msr_s ICH_AP0R1_EL2, x18
5: ldr w17, [x3, #VGIC_V3_CPU_AP0R]
msr_s ICH_AP0R0_EL2, x17
and w22, w21, #0xf
mvn w22, w21
ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4
adr x24, 1f
add x24, x24, x23
br x24
1:
ldr x20, [x3, #LR_OFFSET(15)]
ldr x19, [x3, #LR_OFFSET(14)]
ldr x18, [x3, #LR_OFFSET(13)]
ldr x17, [x3, #LR_OFFSET(12)]
ldr x16, [x3, #LR_OFFSET(11)]
ldr x15, [x3, #LR_OFFSET(10)]
ldr x14, [x3, #LR_OFFSET(9)]
ldr x13, [x3, #LR_OFFSET(8)]
ldr x12, [x3, #LR_OFFSET(7)]
ldr x11, [x3, #LR_OFFSET(6)]
ldr x10, [x3, #LR_OFFSET(5)]
ldr x9, [x3, #LR_OFFSET(4)]
ldr x8, [x3, #LR_OFFSET(3)]
ldr x7, [x3, #LR_OFFSET(2)]
ldr x6, [x3, #LR_OFFSET(1)]
ldr x5, [x3, #LR_OFFSET(0)]
adr x24, 1f
add x24, x24, x23
br x24
1:
msr_s ICH_LR15_EL2, x20
msr_s ICH_LR14_EL2, x19
msr_s ICH_LR13_EL2, x18
msr_s ICH_LR12_EL2, x17
msr_s ICH_LR11_EL2, x16
msr_s ICH_LR10_EL2, x15
msr_s ICH_LR9_EL2, x14
msr_s ICH_LR8_EL2, x13
msr_s ICH_LR7_EL2, x12
msr_s ICH_LR6_EL2, x11
msr_s ICH_LR5_EL2, x10
msr_s ICH_LR4_EL2, x9
msr_s ICH_LR3_EL2, x8
msr_s ICH_LR2_EL2, x7
msr_s ICH_LR1_EL2, x6
msr_s ICH_LR0_EL2, x5
// Ensure that the above will have reached the
// (re)distributors. This ensure the guest will read
// the correct values from the memory-mapped interface.
isb
dsb sy
// Prevent the guest from touching the GIC system registers
// if SRE isn't enabled for GICv3 emulation
cbnz x25, 1f
mrs_s x5, ICC_SRE_EL2
and x5, x5, #~ICC_SRE_EL2_ENABLE
msr_s ICC_SRE_EL2, x5
1:
.endm
ENTRY(__save_vgic_v3_state)
save_vgic_v3_state
ret
ENDPROC(__save_vgic_v3_state)
ENTRY(__restore_vgic_v3_state)
restore_vgic_v3_state
ret
ENDPROC(__restore_vgic_v3_state)
ENTRY(__vgic_v3_get_ich_vtr_el2)
mrs_s x0, ICH_VTR_EL2
ret
ENDPROC(__vgic_v3_get_ich_vtr_el2)
.popsection

View File

@ -50,6 +50,10 @@
#define KVM_NR_IRQCHIPS 1
#define KVM_IRQCHIP_NUM_PINS 256
/* PPC-specific vcpu->requests bit members */
#define KVM_REQ_WATCHDOG 8
#define KVM_REQ_EPR_EXIT 9
#include <linux/mmu_notifier.h>
#define KVM_ARCH_WANT_MMU_NOTIFIER

View File

@ -314,16 +314,10 @@ static void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
{
int r;
struct kvm_vcpu *v, *ret = NULL;
struct kvm_vcpu *ret;
mutex_lock(&kvm->lock);
kvm_for_each_vcpu(r, v, kvm) {
if (v->vcpu_id == id) {
ret = v;
break;
}
}
ret = kvm_get_vcpu_by_id(kvm, id);
mutex_unlock(&kvm->lock);
return ret;
}

View File

@ -512,7 +512,7 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
put_page(hpage);
}
static int kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
static bool kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
{
ulong mp_pa = vcpu->arch.magic_page_pa;
@ -521,7 +521,7 @@ static int kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
gpa &= ~0xFFFULL;
if (unlikely(mp_pa) && unlikely((mp_pa & KVM_PAM) == (gpa & KVM_PAM))) {
return 1;
return true;
}
return kvm_is_visible_gfn(vcpu->kvm, gpa >> PAGE_SHIFT);

View File

@ -104,6 +104,9 @@
#define HWCAP_S390_TE 1024
#define HWCAP_S390_VXRS 2048
/* Internal bits, not exposed via elf */
#define HWCAP_INT_SIE 1UL
/*
* These are used to set parameters in the core dumps.
*/
@ -169,6 +172,10 @@ extern unsigned int vdso_enabled;
extern unsigned long elf_hwcap;
#define ELF_HWCAP (elf_hwcap)
/* Internal hardware capabilities, not exposed via elf */
extern unsigned long int_hwcap;
/* This yields a string that ld.so will use to load implementation
specific libraries for optimization. This is more specific in
intent than poking at uname or /proc/cpuinfo.

View File

@ -25,7 +25,9 @@
#include <asm/fpu/api.h>
#include <asm/isc.h>
#define KVM_MAX_VCPUS 64
#define KVM_S390_BSCA_CPU_SLOTS 64
#define KVM_S390_ESCA_CPU_SLOTS 248
#define KVM_MAX_VCPUS KVM_S390_ESCA_CPU_SLOTS
#define KVM_USER_MEM_SLOTS 32
/*
@ -37,12 +39,41 @@
#define KVM_IRQCHIP_NUM_PINS 4096
#define KVM_HALT_POLL_NS_DEFAULT 0
/* s390-specific vcpu->requests bit members */
#define KVM_REQ_ENABLE_IBS 8
#define KVM_REQ_DISABLE_IBS 9
#define SIGP_CTRL_C 0x80
#define SIGP_CTRL_SCN_MASK 0x3f
struct sca_entry {
union bsca_sigp_ctrl {
__u8 value;
struct {
__u8 c : 1;
__u8 r : 1;
__u8 scn : 6;
};
} __packed;
union esca_sigp_ctrl {
__u16 value;
struct {
__u8 c : 1;
__u8 reserved: 7;
__u8 scn;
};
} __packed;
struct esca_entry {
union esca_sigp_ctrl sigp_ctrl;
__u16 reserved1[3];
__u64 sda;
__u64 reserved2[6];
} __packed;
struct bsca_entry {
__u8 reserved0;
__u8 sigp_ctrl;
union bsca_sigp_ctrl sigp_ctrl;
__u16 reserved[3];
__u64 sda;
__u64 reserved2[2];
@ -57,14 +88,22 @@ union ipte_control {
};
};
struct sca_block {
struct bsca_block {
union ipte_control ipte_control;
__u64 reserved[5];
__u64 mcn;
__u64 reserved2;
struct sca_entry cpu[64];
struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
} __attribute__((packed));
struct esca_block {
union ipte_control ipte_control;
__u64 reserved1[7];
__u64 mcn[4];
__u64 reserved2[20];
struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
} __packed;
#define CPUSTAT_STOPPED 0x80000000
#define CPUSTAT_WAIT 0x10000000
#define CPUSTAT_ECALL_PEND 0x08000000
@ -182,7 +221,8 @@ struct kvm_s390_sie_block {
__u64 pp; /* 0x01de */
__u8 reserved1e6[2]; /* 0x01e6 */
__u64 itdba; /* 0x01e8 */
__u8 reserved1f0[16]; /* 0x01f0 */
__u64 riccbd; /* 0x01f0 */
__u8 reserved1f8[8]; /* 0x01f8 */
} __attribute__((packed));
struct kvm_s390_itdb {
@ -585,11 +625,14 @@ struct kvm_s390_crypto_cb {
};
struct kvm_arch{
struct sca_block *sca;
void *sca;
int use_esca;
rwlock_t sca_lock;
debug_info_t *dbf;
struct kvm_s390_float_interrupt float_int;
struct kvm_device *flic;
struct gmap *gmap;
unsigned long mem_limit;
int css_support;
int use_irqchip;
int use_cmma;

View File

@ -29,7 +29,10 @@ struct sclp_ipl_info {
struct sclp_core_entry {
u8 core_id;
u8 reserved0[2];
u8 reserved0;
u8 : 4;
u8 sief2 : 1;
u8 : 3;
u8 : 3;
u8 siif : 1;
u8 sigpif : 1;
@ -53,6 +56,9 @@ struct sclp_info {
unsigned char has_sigpif : 1;
unsigned char has_core_type : 1;
unsigned char has_sprp : 1;
unsigned char has_hvs : 1;
unsigned char has_esca : 1;
unsigned char has_sief2 : 1;
unsigned int ibc;
unsigned int mtid;
unsigned int mtid_cp;

View File

@ -66,6 +66,8 @@ struct kvm_s390_io_adapter_req {
#define KVM_S390_VM_MEM_CLR_CMMA 1
#define KVM_S390_VM_MEM_LIMIT_SIZE 2
#define KVM_S390_NO_MEM_LIMIT U64_MAX
/* kvm attributes for KVM_S390_VM_TOD */
#define KVM_S390_VM_TOD_LOW 0
#define KVM_S390_VM_TOD_HIGH 1
@ -151,6 +153,7 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_ARCH0 (1UL << 4)
#define KVM_SYNC_PFAULT (1UL << 5)
#define KVM_SYNC_VRS (1UL << 6)
#define KVM_SYNC_RICCB (1UL << 7)
/* definition of registers in kvm_run */
struct kvm_sync_regs {
__u64 prefix; /* prefix register */
@ -168,6 +171,8 @@ struct kvm_sync_regs {
__u64 vrs[32][2]; /* vector registers */
__u8 reserved[512]; /* for future vector expansion */
__u32 fpc; /* only valid with vector registers */
__u8 padding[52]; /* riccb needs to be 64byte aligned */
__u8 riccb[64]; /* runtime instrumentation controls block */
};
#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)

View File

@ -61,6 +61,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
"edat", "etf3eh", "highgprs", "te", "vx"
};
static const char * const int_hwcap_str[] = {
"sie"
};
unsigned long n = (unsigned long) v - 1;
int i;
@ -75,6 +78,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
for (i = 0; i < ARRAY_SIZE(hwcap_str); i++)
if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
seq_printf(m, "%s ", hwcap_str[i]);
for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++)
if (int_hwcap_str[i] && (int_hwcap & (1UL << i)))
seq_printf(m, "%s ", int_hwcap_str[i]);
seq_puts(m, "\n");
show_cacheinfo(m);
}

View File

@ -80,6 +80,8 @@ EXPORT_SYMBOL(console_irq);
unsigned long elf_hwcap __read_mostly = 0;
char elf_platform[ELF_PLATFORM_SIZE];
unsigned long int_hwcap = 0;
int __initdata memory_end_set;
unsigned long __initdata memory_end;
unsigned long __initdata max_physmem_end;
@ -793,6 +795,13 @@ static int __init setup_hwcaps(void)
strcpy(elf_platform, "z13");
break;
}
/*
* Virtualization support HWCAP_INT_SIE is bit 0.
*/
if (sclp.has_sief2)
int_hwcap |= HWCAP_INT_SIE;
return 0;
}
arch_initcall(setup_hwcaps);

View File

@ -155,10 +155,8 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu *tcpu;
int tid;
int i;
tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
vcpu->stat.diagnose_9c++;
@ -167,12 +165,9 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
if (tid == vcpu->vcpu_id)
return 0;
kvm_for_each_vcpu(i, tcpu, kvm)
if (tcpu->vcpu_id == tid) {
kvm_vcpu_yield_to(tcpu);
break;
}
tcpu = kvm_get_vcpu_by_id(vcpu->kvm, tid);
if (tcpu)
kvm_vcpu_yield_to(tcpu);
return 0;
}

View File

@ -259,10 +259,14 @@ struct aste {
int ipte_lock_held(struct kvm_vcpu *vcpu)
{
union ipte_control *ic = &vcpu->kvm->arch.sca->ipte_control;
if (vcpu->arch.sie_block->eca & 1) {
int rc;
if (vcpu->arch.sie_block->eca & 1)
return ic->kh != 0;
read_lock(&vcpu->kvm->arch.sca_lock);
rc = kvm_s390_get_ipte_control(vcpu->kvm)->kh != 0;
read_unlock(&vcpu->kvm->arch.sca_lock);
return rc;
}
return vcpu->kvm->arch.ipte_lock_count != 0;
}
@ -274,16 +278,20 @@ static void ipte_lock_simple(struct kvm_vcpu *vcpu)
vcpu->kvm->arch.ipte_lock_count++;
if (vcpu->kvm->arch.ipte_lock_count > 1)
goto out;
ic = &vcpu->kvm->arch.sca->ipte_control;
retry:
read_lock(&vcpu->kvm->arch.sca_lock);
ic = kvm_s390_get_ipte_control(vcpu->kvm);
do {
old = READ_ONCE(*ic);
while (old.k) {
if (old.k) {
read_unlock(&vcpu->kvm->arch.sca_lock);
cond_resched();
old = READ_ONCE(*ic);
goto retry;
}
new = old;
new.k = 1;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
read_unlock(&vcpu->kvm->arch.sca_lock);
out:
mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
}
@ -296,12 +304,14 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
vcpu->kvm->arch.ipte_lock_count--;
if (vcpu->kvm->arch.ipte_lock_count)
goto out;
ic = &vcpu->kvm->arch.sca->ipte_control;
read_lock(&vcpu->kvm->arch.sca_lock);
ic = kvm_s390_get_ipte_control(vcpu->kvm);
do {
old = READ_ONCE(*ic);
new = old;
new.k = 0;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
read_unlock(&vcpu->kvm->arch.sca_lock);
wake_up(&vcpu->kvm->arch.ipte_wq);
out:
mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
@ -311,24 +321,29 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu)
{
union ipte_control old, new, *ic;
ic = &vcpu->kvm->arch.sca->ipte_control;
retry:
read_lock(&vcpu->kvm->arch.sca_lock);
ic = kvm_s390_get_ipte_control(vcpu->kvm);
do {
old = READ_ONCE(*ic);
while (old.kg) {
if (old.kg) {
read_unlock(&vcpu->kvm->arch.sca_lock);
cond_resched();
old = READ_ONCE(*ic);
goto retry;
}
new = old;
new.k = 1;
new.kh++;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
read_unlock(&vcpu->kvm->arch.sca_lock);
}
static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
{
union ipte_control old, new, *ic;
ic = &vcpu->kvm->arch.sca->ipte_control;
read_lock(&vcpu->kvm->arch.sca_lock);
ic = kvm_s390_get_ipte_control(vcpu->kvm);
do {
old = READ_ONCE(*ic);
new = old;
@ -336,6 +351,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
if (!new.kh)
new.k = 0;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
read_unlock(&vcpu->kvm->arch.sca_lock);
if (!new.kh)
wake_up(&vcpu->kvm->arch.ipte_wq);
}

View File

@ -54,9 +54,6 @@ void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc)
static int handle_noop(struct kvm_vcpu *vcpu)
{
switch (vcpu->arch.sie_block->icptcode) {
case 0x0:
vcpu->stat.exit_null++;
break;
case 0x10:
vcpu->stat.exit_external_request++;
break;
@ -338,8 +335,10 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu)
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
{
if (kvm_is_ucontrol(vcpu->kvm))
return -EOPNOTSUPP;
switch (vcpu->arch.sie_block->icptcode) {
case 0x00:
case 0x10:
case 0x18:
return handle_noop(vcpu);

View File

@ -34,6 +34,106 @@
#define PFAULT_DONE 0x0680
#define VIRTIO_PARAM 0x0d00
/* handle external calls via sigp interpretation facility */
static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id)
{
int c, scn;
if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND))
return 0;
read_lock(&vcpu->kvm->arch.sca_lock);
if (vcpu->kvm->arch.use_esca) {
struct esca_block *sca = vcpu->kvm->arch.sca;
union esca_sigp_ctrl sigp_ctrl =
sca->cpu[vcpu->vcpu_id].sigp_ctrl;
c = sigp_ctrl.c;
scn = sigp_ctrl.scn;
} else {
struct bsca_block *sca = vcpu->kvm->arch.sca;
union bsca_sigp_ctrl sigp_ctrl =
sca->cpu[vcpu->vcpu_id].sigp_ctrl;
c = sigp_ctrl.c;
scn = sigp_ctrl.scn;
}
read_unlock(&vcpu->kvm->arch.sca_lock);
if (src_id)
*src_id = scn;
return c;
}
static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
{
int expect, rc;
read_lock(&vcpu->kvm->arch.sca_lock);
if (vcpu->kvm->arch.use_esca) {
struct esca_block *sca = vcpu->kvm->arch.sca;
union esca_sigp_ctrl *sigp_ctrl =
&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
union esca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl;
new_val.scn = src_id;
new_val.c = 1;
old_val.c = 0;
expect = old_val.value;
rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value);
} else {
struct bsca_block *sca = vcpu->kvm->arch.sca;
union bsca_sigp_ctrl *sigp_ctrl =
&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl;
new_val.scn = src_id;
new_val.c = 1;
old_val.c = 0;
expect = old_val.value;
rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value);
}
read_unlock(&vcpu->kvm->arch.sca_lock);
if (rc != expect) {
/* another external call is pending */
return -EBUSY;
}
atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
return 0;
}
static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
int rc, expect;
atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags);
read_lock(&vcpu->kvm->arch.sca_lock);
if (vcpu->kvm->arch.use_esca) {
struct esca_block *sca = vcpu->kvm->arch.sca;
union esca_sigp_ctrl *sigp_ctrl =
&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
union esca_sigp_ctrl old = *sigp_ctrl;
expect = old.value;
rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
} else {
struct bsca_block *sca = vcpu->kvm->arch.sca;
union bsca_sigp_ctrl *sigp_ctrl =
&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
union bsca_sigp_ctrl old = *sigp_ctrl;
expect = old.value;
rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
}
read_unlock(&vcpu->kvm->arch.sca_lock);
WARN_ON(rc != expect); /* cannot clear? */
}
int psw_extint_disabled(struct kvm_vcpu *vcpu)
{
return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
@ -792,13 +892,11 @@ static const deliver_irq_t deliver_irq_funcs[] = {
int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
if (!sclp.has_sigpif)
return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
return (sigp_ctrl & SIGP_CTRL_C) &&
(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND);
return sca_ext_call_pending(vcpu, NULL);
}
int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop)
@ -909,9 +1007,7 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
memset(&li->irq, 0, sizeof(li->irq));
spin_unlock(&li->lock);
/* clear pending external calls set by sigp interpretation facility */
atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags);
vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl = 0;
sca_clear_ext_call(vcpu);
}
int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
@ -1003,21 +1099,6 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
return 0;
}
static int __inject_extcall_sigpif(struct kvm_vcpu *vcpu, uint16_t src_id)
{
unsigned char new_val, old_val;
uint8_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
new_val = SIGP_CTRL_C | (src_id & SIGP_CTRL_SCN_MASK);
old_val = *sigp_ctrl & ~SIGP_CTRL_C;
if (cmpxchg(sigp_ctrl, old_val, new_val) != old_val) {
/* another external call is pending */
return -EBUSY;
}
atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
return 0;
}
static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@ -1034,7 +1115,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
return -EINVAL;
if (sclp.has_sigpif)
return __inject_extcall_sigpif(vcpu, src_id);
return sca_inject_ext_call(vcpu, src_id);
if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
return -EBUSY;
@ -2203,7 +2284,7 @@ static void store_local_irq(struct kvm_s390_local_interrupt *li,
int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
{
uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
int scn;
unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)];
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
unsigned long pending_irqs;
@ -2243,14 +2324,12 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
}
}
if ((sigp_ctrl & SIGP_CTRL_C) &&
(atomic_read(&vcpu->arch.sie_block->cpuflags) &
CPUSTAT_ECALL_PEND)) {
if (sca_ext_call_pending(vcpu, &scn)) {
if (n + sizeof(irq) > len)
return -ENOBUFS;
memset(&irq, 0, sizeof(irq));
irq.type = KVM_S390_INT_EXTERNAL_CALL;
irq.u.extcall.code = sigp_ctrl & SIGP_CTRL_SCN_MASK;
irq.u.extcall.code = scn;
if (copy_to_user(&buf[n], &irq, sizeof(irq)))
return -EFAULT;
n += sizeof(irq);

View File

@ -246,7 +246,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
case KVM_CAP_NR_VCPUS:
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS
: KVM_S390_BSCA_CPU_SLOTS;
break;
case KVM_CAP_NR_MEMSLOTS:
r = KVM_USER_MEM_SLOTS;
@ -257,6 +258,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_VECTOR_REGISTERS:
r = MACHINE_HAS_VX;
break;
case KVM_CAP_S390_RI:
r = test_facility(64);
break;
default:
r = 0;
}
@ -283,6 +287,8 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm,
}
/* Section: vm related */
static void sca_del_vcpu(struct kvm_vcpu *vcpu);
/*
* Get (and clear) the dirty memory log for a memory slot.
*/
@ -355,6 +361,20 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
r ? "(not available)" : "(success)");
break;
case KVM_CAP_S390_RI:
r = -EINVAL;
mutex_lock(&kvm->lock);
if (atomic_read(&kvm->online_vcpus)) {
r = -EBUSY;
} else if (test_facility(64)) {
set_kvm_facility(kvm->arch.model.fac->mask, 64);
set_kvm_facility(kvm->arch.model.fac->list, 64);
r = 0;
}
mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
r ? "(not available)" : "(success)");
break;
case KVM_CAP_S390_USER_STSI:
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
kvm->arch.user_stsi = 1;
@ -375,8 +395,8 @@ static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *att
case KVM_S390_VM_MEM_LIMIT_SIZE:
ret = 0;
VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
kvm->arch.gmap->asce_end);
if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
kvm->arch.mem_limit);
if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
ret = -EFAULT;
break;
default:
@ -428,9 +448,17 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
if (get_user(new_limit, (u64 __user *)attr->addr))
return -EFAULT;
if (new_limit > kvm->arch.gmap->asce_end)
if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
new_limit > kvm->arch.mem_limit)
return -E2BIG;
if (!new_limit)
return -EINVAL;
/* gmap_alloc takes last usable address */
if (new_limit != KVM_S390_NO_MEM_LIMIT)
new_limit -= 1;
ret = -EBUSY;
mutex_lock(&kvm->lock);
if (atomic_read(&kvm->online_vcpus) == 0) {
@ -447,7 +475,9 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
}
}
mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
(void *) kvm->arch.gmap->asce);
break;
}
default:
@ -1024,7 +1054,7 @@ static int kvm_s390_apxa_installed(void)
u8 config[128];
int cc;
if (test_facility(2) && test_facility(12)) {
if (test_facility(12)) {
cc = kvm_s390_query_ap_config(config);
if (cc)
@ -1075,6 +1105,15 @@ static int kvm_s390_crypto_init(struct kvm *kvm)
return 0;
}
static void sca_dispose(struct kvm *kvm)
{
if (kvm->arch.use_esca)
free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
else
free_page((unsigned long)(kvm->arch.sca));
kvm->arch.sca = NULL;
}
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
int i, rc;
@ -1098,14 +1137,17 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
rc = -ENOMEM;
kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
kvm->arch.use_esca = 0; /* start with basic SCA */
rwlock_init(&kvm->arch.sca_lock);
kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
if (!kvm->arch.sca)
goto out_err;
spin_lock(&kvm_lock);
sca_offset += 16;
if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE)
if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
sca_offset = 0;
kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
kvm->arch.sca = (struct bsca_block *)
((char *) kvm->arch.sca + sca_offset);
spin_unlock(&kvm_lock);
sprintf(debug_name, "kvm-%u", current->pid);
@ -1157,8 +1199,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (type & KVM_VM_S390_UCONTROL) {
kvm->arch.gmap = NULL;
kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
} else {
kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
if (sclp.hamax == U64_MAX)
kvm->arch.mem_limit = TASK_MAX_SIZE;
else
kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
sclp.hamax + 1);
kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
if (!kvm->arch.gmap)
goto out_err;
kvm->arch.gmap->private = kvm;
@ -1170,14 +1218,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.epoch = 0;
spin_lock_init(&kvm->arch.start_stop_lock);
KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
return 0;
out_err:
kfree(kvm->arch.crypto.crycb);
free_page((unsigned long)kvm->arch.model.fac);
debug_unregister(kvm->arch.dbf);
free_page((unsigned long)(kvm->arch.sca));
sca_dispose(kvm);
KVM_EVENT(3, "creation of vm failed: %d", rc);
return rc;
}
@ -1188,14 +1236,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
kvm_s390_clear_local_irqs(vcpu);
kvm_clear_async_pf_completion_queue(vcpu);
if (!kvm_is_ucontrol(vcpu->kvm)) {
clear_bit(63 - vcpu->vcpu_id,
(unsigned long *) &vcpu->kvm->arch.sca->mcn);
if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
(__u64) vcpu->arch.sie_block)
vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
}
smp_mb();
if (!kvm_is_ucontrol(vcpu->kvm))
sca_del_vcpu(vcpu);
if (kvm_is_ucontrol(vcpu->kvm))
gmap_free(vcpu->arch.gmap);
@ -1228,14 +1270,14 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_free_vcpus(kvm);
free_page((unsigned long)kvm->arch.model.fac);
free_page((unsigned long)(kvm->arch.sca));
sca_dispose(kvm);
debug_unregister(kvm->arch.dbf);
kfree(kvm->arch.crypto.crycb);
if (!kvm_is_ucontrol(kvm))
gmap_free(kvm->arch.gmap);
kvm_s390_destroy_adapters(kvm);
kvm_s390_clear_float_irqs(kvm);
KVM_EVENT(3, "vm 0x%p destroyed", kvm);
KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
}
/* Section: vcpu related */
@ -1249,6 +1291,117 @@ static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
return 0;
}
static void sca_del_vcpu(struct kvm_vcpu *vcpu)
{
read_lock(&vcpu->kvm->arch.sca_lock);
if (vcpu->kvm->arch.use_esca) {
struct esca_block *sca = vcpu->kvm->arch.sca;
clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
sca->cpu[vcpu->vcpu_id].sda = 0;
} else {
struct bsca_block *sca = vcpu->kvm->arch.sca;
clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
sca->cpu[vcpu->vcpu_id].sda = 0;
}
read_unlock(&vcpu->kvm->arch.sca_lock);
}
static void sca_add_vcpu(struct kvm_vcpu *vcpu)
{
read_lock(&vcpu->kvm->arch.sca_lock);
if (vcpu->kvm->arch.use_esca) {
struct esca_block *sca = vcpu->kvm->arch.sca;
sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
vcpu->arch.sie_block->ecb2 |= 0x04U;
set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
} else {
struct bsca_block *sca = vcpu->kvm->arch.sca;
sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
}
read_unlock(&vcpu->kvm->arch.sca_lock);
}
/* Basic SCA to Extended SCA data copy routines */
static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
{
d->sda = s->sda;
d->sigp_ctrl.c = s->sigp_ctrl.c;
d->sigp_ctrl.scn = s->sigp_ctrl.scn;
}
static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
{
int i;
d->ipte_control = s->ipte_control;
d->mcn[0] = s->mcn;
for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
sca_copy_entry(&d->cpu[i], &s->cpu[i]);
}
static int sca_switch_to_extended(struct kvm *kvm)
{
struct bsca_block *old_sca = kvm->arch.sca;
struct esca_block *new_sca;
struct kvm_vcpu *vcpu;
unsigned int vcpu_idx;
u32 scaol, scaoh;
new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
if (!new_sca)
return -ENOMEM;
scaoh = (u32)((u64)(new_sca) >> 32);
scaol = (u32)(u64)(new_sca) & ~0x3fU;
kvm_s390_vcpu_block_all(kvm);
write_lock(&kvm->arch.sca_lock);
sca_copy_b_to_e(new_sca, old_sca);
kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
vcpu->arch.sie_block->scaoh = scaoh;
vcpu->arch.sie_block->scaol = scaol;
vcpu->arch.sie_block->ecb2 |= 0x04U;
}
kvm->arch.sca = new_sca;
kvm->arch.use_esca = 1;
write_unlock(&kvm->arch.sca_lock);
kvm_s390_vcpu_unblock_all(kvm);
free_page((unsigned long)old_sca);
VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
old_sca, kvm->arch.sca);
return 0;
}
static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
{
int rc;
if (id < KVM_S390_BSCA_CPU_SLOTS)
return true;
if (!sclp.has_esca)
return false;
mutex_lock(&kvm->lock);
rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
mutex_unlock(&kvm->lock);
return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
}
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
@ -1259,6 +1412,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
KVM_SYNC_CRS |
KVM_SYNC_ARCH0 |
KVM_SYNC_PFAULT;
if (test_kvm_facility(vcpu->kvm, 64))
vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
if (test_kvm_facility(vcpu->kvm, 129))
vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
@ -1369,8 +1524,11 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
preempt_enable();
mutex_unlock(&vcpu->kvm->lock);
if (!kvm_is_ucontrol(vcpu->kvm))
if (!kvm_is_ucontrol(vcpu->kvm)) {
vcpu->arch.gmap = vcpu->kvm->arch.gmap;
sca_add_vcpu(vcpu);
}
}
static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
@ -1439,10 +1597,13 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->eca |= 1;
if (sclp.has_sigpif)
vcpu->arch.sie_block->eca |= 0x10000000U;
if (test_kvm_facility(vcpu->kvm, 64))
vcpu->arch.sie_block->ecb3 |= 0x01;
if (test_kvm_facility(vcpu->kvm, 129)) {
vcpu->arch.sie_block->eca |= 0x00020000;
vcpu->arch.sie_block->ecd |= 0x20000000;
}
vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
if (vcpu->kvm->arch.use_cmma) {
@ -1465,7 +1626,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
struct sie_page *sie_page;
int rc = -EINVAL;
if (id >= KVM_MAX_VCPUS)
if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
goto out;
rc = -ENOMEM;
@ -1482,20 +1643,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
vcpu->arch.sie_block->icpua = id;
if (!kvm_is_ucontrol(kvm)) {
if (!kvm->arch.sca) {
WARN_ON_ONCE(1);
goto out_free_cpu;
}
if (!kvm->arch.sca->cpu[id].sda)
kvm->arch.sca->cpu[id].sda =
(__u64) vcpu->arch.sie_block;
vcpu->arch.sie_block->scaoh =
(__u32)(((__u64)kvm->arch.sca) >> 32);
vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
}
spin_lock_init(&vcpu->arch.local_int.lock);
vcpu->arch.local_int.float_int = &kvm->arch.float_int;
vcpu->arch.local_int.wq = &vcpu->wq;
@ -1509,15 +1656,13 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
*/
vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
GFP_KERNEL);
if (!vcpu->arch.guest_fpregs.fprs) {
rc = -ENOMEM;
if (!vcpu->arch.guest_fpregs.fprs)
goto out_free_sie_block;
}
rc = kvm_vcpu_init(vcpu, kvm, id);
if (rc)
goto out_free_sie_block;
VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
vcpu->arch.sie_block);
trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
@ -2013,7 +2158,8 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
*/
kvm_check_async_pf_completion(vcpu);
memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
if (need_resched())
schedule();
@ -2071,8 +2217,6 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
{
int rc = -1;
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
vcpu->arch.sie_block->icptcode);
trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
@ -2080,40 +2224,36 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
if (guestdbg_enabled(vcpu))
kvm_s390_restore_guest_per_regs(vcpu);
if (exit_reason >= 0) {
rc = 0;
vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
if (vcpu->arch.sie_block->icptcode > 0) {
int rc = kvm_handle_sie_intercept(vcpu);
if (rc != -EOPNOTSUPP)
return rc;
vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
return -EREMOTE;
} else if (exit_reason != -EFAULT) {
vcpu->stat.exit_null++;
return 0;
} else if (kvm_is_ucontrol(vcpu->kvm)) {
vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
vcpu->run->s390_ucontrol.trans_exc_code =
current->thread.gmap_addr;
vcpu->run->s390_ucontrol.pgm_code = 0x10;
rc = -EREMOTE;
return -EREMOTE;
} else if (current->thread.gmap_pfault) {
trace_kvm_s390_major_guest_pfault(vcpu);
current->thread.gmap_pfault = 0;
if (kvm_arch_setup_async_pf(vcpu)) {
rc = 0;
} else {
gpa_t gpa = current->thread.gmap_addr;
rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
}
if (kvm_arch_setup_async_pf(vcpu))
return 0;
return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
}
if (rc == -1)
rc = vcpu_post_run_fault_in_sie(vcpu);
memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
if (rc == 0) {
if (kvm_is_ucontrol(vcpu->kvm))
/* Don't exit for host interrupts. */
rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
else
rc = kvm_handle_sie_intercept(vcpu);
}
return rc;
return vcpu_post_run_fault_in_sie(vcpu);
}
static int __vcpu_run(struct kvm_vcpu *vcpu)
@ -2233,18 +2373,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
rc = 0;
}
if (rc == -EOPNOTSUPP) {
/* intercept cannot be handled in-kernel, prepare kvm-run */
kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
rc = 0;
}
if (rc == -EREMOTE) {
/* intercept was handled, but userspace support is needed
* kvm_run has been prepared by the handler */
/* userspace support is needed, kvm_run has been prepared */
rc = 0;
}
@ -2736,6 +2866,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
if (mem->memory_size & 0xffffful)
return -EINVAL;
if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
return -EINVAL;
return 0;
}
@ -2767,6 +2900,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
static int __init kvm_s390_init(void)
{
if (!sclp.has_sief2) {
pr_info("SIE not available\n");
return -ENODEV;
}
return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
}

View File

@ -340,4 +340,11 @@ void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu);
void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu);
void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu);
/* support for Basic/Extended SCA handling */
static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm)
{
struct bsca_block *sca = kvm->arch.sca; /* SCA version doesn't matter */
return &sca->ipte_control;
}
#endif

View File

@ -55,8 +55,8 @@ TRACE_EVENT(kvm_s390_create_vcpu,
__entry->sie_block = sie_block;
),
TP_printk("create cpu %d at %p, sie block at %p", __entry->id,
__entry->vcpu, __entry->sie_block)
TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK",
__entry->id, __entry->vcpu, __entry->sie_block)
);
TRACE_EVENT(kvm_s390_destroy_vcpu,
@ -254,7 +254,7 @@ TRACE_EVENT(kvm_s390_enable_css,
__entry->kvm = kvm;
),
TP_printk("enabling channel I/O support (kvm @ %p)\n",
TP_printk("enabling channel I/O support (kvm @ %pK)\n",
__entry->kvm)
);

View File

@ -133,7 +133,7 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
/**
* gmap_alloc - allocate a guest address space
* @mm: pointer to the parent mm_struct
* @limit: maximum size of the gmap address space
* @limit: maximum address of the gmap address space
*
* Returns a guest address space structure.
*/
@ -402,7 +402,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
if ((from | to | len) & (PMD_SIZE - 1))
return -EINVAL;
if (len == 0 || from + len < from || to + len < to ||
from + len > TASK_MAX_SIZE || to + len > gmap->asce_end)
from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end)
return -EINVAL;
flush = 0;

View File

@ -25,6 +25,7 @@
#include <linux/pvclock_gtod.h>
#include <linux/clocksource.h>
#include <linux/irqbypass.h>
#include <linux/hyperv.h>
#include <asm/pvclock-abi.h>
#include <asm/desc.h>
@ -45,6 +46,31 @@
#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
/* x86-specific vcpu->requests bit members */
#define KVM_REQ_MIGRATE_TIMER 8
#define KVM_REQ_REPORT_TPR_ACCESS 9
#define KVM_REQ_TRIPLE_FAULT 10
#define KVM_REQ_MMU_SYNC 11
#define KVM_REQ_CLOCK_UPDATE 12
#define KVM_REQ_DEACTIVATE_FPU 13
#define KVM_REQ_EVENT 14
#define KVM_REQ_APF_HALT 15
#define KVM_REQ_STEAL_UPDATE 16
#define KVM_REQ_NMI 17
#define KVM_REQ_PMU 18
#define KVM_REQ_PMI 19
#define KVM_REQ_SMI 20
#define KVM_REQ_MASTERCLOCK_UPDATE 21
#define KVM_REQ_MCLOCK_INPROGRESS 22
#define KVM_REQ_SCAN_IOAPIC 23
#define KVM_REQ_GLOBAL_CLOCK_UPDATE 24
#define KVM_REQ_APIC_PAGE_RELOAD 25
#define KVM_REQ_HV_CRASH 26
#define KVM_REQ_IOAPIC_EOI_EXIT 27
#define KVM_REQ_HV_RESET 28
#define KVM_REQ_HV_EXIT 29
#define KVM_REQ_HV_STIMER 30
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
| X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
@ -213,6 +239,10 @@ union kvm_mmu_page_role {
};
};
struct kvm_rmap_head {
unsigned long val;
};
struct kvm_mmu_page {
struct list_head link;
struct hlist_node hash_link;
@ -230,7 +260,7 @@ struct kvm_mmu_page {
bool unsync;
int root_count; /* Currently serving as active root */
unsigned int unsync_children;
unsigned long parent_ptes; /* Reverse mapping for parent_pte */
struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
/* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen. */
unsigned long mmu_valid_gen;
@ -374,10 +404,38 @@ struct kvm_mtrr {
struct list_head head;
};
/* Hyper-V SynIC timer */
struct kvm_vcpu_hv_stimer {
struct hrtimer timer;
int index;
u64 config;
u64 count;
u64 exp_time;
struct hv_message msg;
bool msg_pending;
};
/* Hyper-V synthetic interrupt controller (SynIC)*/
struct kvm_vcpu_hv_synic {
u64 version;
u64 control;
u64 msg_page;
u64 evt_page;
atomic64_t sint[HV_SYNIC_SINT_COUNT];
atomic_t sint_to_gsi[HV_SYNIC_SINT_COUNT];
DECLARE_BITMAP(auto_eoi_bitmap, 256);
DECLARE_BITMAP(vec_bitmap, 256);
bool active;
};
/* Hyper-V per vcpu emulation context */
struct kvm_vcpu_hv {
u64 hv_vapic;
s64 runtime_offset;
struct kvm_vcpu_hv_synic synic;
struct kvm_hyperv_exit exit;
struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
};
struct kvm_vcpu_arch {
@ -400,7 +458,8 @@ struct kvm_vcpu_arch {
u64 efer;
u64 apic_base;
struct kvm_lapic *apic; /* kernel irqchip context */
u64 eoi_exit_bitmap[4];
bool apicv_active;
DECLARE_BITMAP(ioapic_handled_vectors, 256);
unsigned long apic_attention;
int32_t apic_arb_prio;
int mp_state;
@ -589,7 +648,7 @@ struct kvm_lpage_info {
};
struct kvm_arch_memory_slot {
unsigned long *rmap[KVM_NR_PAGE_SIZES];
struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
};
@ -831,10 +890,11 @@ struct kvm_x86_ops {
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
int (*cpu_uses_apicv)(struct kvm_vcpu *vcpu);
bool (*get_enable_apicv)(void);
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
void (*hwapic_isr_update)(struct kvm *kvm, int isr);
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu);
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
@ -1086,6 +1146,8 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception);
void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu);
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
@ -1231,6 +1293,9 @@ u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
void kvm_make_mclock_inprogress_request(struct kvm *kvm);
void kvm_make_scan_ioapic_request(struct kvm *kvm);
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,

View File

@ -269,4 +269,96 @@ typedef struct _HV_REFERENCE_TSC_PAGE {
#define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17)
#define HV_SYNIC_SINT_VECTOR_MASK (0xFF)
#define HV_SYNIC_STIMER_COUNT (4)
/* Define synthetic interrupt controller message constants. */
#define HV_MESSAGE_SIZE (256)
#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240)
#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30)
/* Define hypervisor message types. */
enum hv_message_type {
HVMSG_NONE = 0x00000000,
/* Memory access messages. */
HVMSG_UNMAPPED_GPA = 0x80000000,
HVMSG_GPA_INTERCEPT = 0x80000001,
/* Timer notification messages. */
HVMSG_TIMER_EXPIRED = 0x80000010,
/* Error messages. */
HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020,
HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021,
HVMSG_UNSUPPORTED_FEATURE = 0x80000022,
/* Trace buffer complete messages. */
HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040,
/* Platform-specific processor intercept messages. */
HVMSG_X64_IOPORT_INTERCEPT = 0x80010000,
HVMSG_X64_MSR_INTERCEPT = 0x80010001,
HVMSG_X64_CPUID_INTERCEPT = 0x80010002,
HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003,
HVMSG_X64_APIC_EOI = 0x80010004,
HVMSG_X64_LEGACY_FP_ERROR = 0x80010005
};
/* Define synthetic interrupt controller message flags. */
union hv_message_flags {
__u8 asu8;
struct {
__u8 msg_pending:1;
__u8 reserved:7;
};
};
/* Define port identifier type. */
union hv_port_id {
__u32 asu32;
struct {
__u32 id:24;
__u32 reserved:8;
} u;
};
/* Define synthetic interrupt controller message header. */
struct hv_message_header {
__u32 message_type;
__u8 payload_size;
union hv_message_flags message_flags;
__u8 reserved[2];
union {
__u64 sender;
union hv_port_id port;
};
};
/* Define synthetic interrupt controller message format. */
struct hv_message {
struct hv_message_header header;
union {
__u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
} u;
};
/* Define the synthetic interrupt message page layout. */
struct hv_message_page {
struct hv_message sint_message[HV_SYNIC_SINT_COUNT];
};
/* Define timer message payload structure. */
struct hv_timer_message_payload {
__u32 timer_index;
__u32 reserved;
__u64 expiration_time; /* When the timer expired */
__u64 delivery_time; /* When the message was delivered */
};
#define HV_STIMER_ENABLE (1ULL << 0)
#define HV_STIMER_PERIODIC (1ULL << 1)
#define HV_STIMER_LAZY (1ULL << 2)
#define HV_STIMER_AUTOENABLE (1ULL << 3)
#define HV_STIMER_SINT(config) (__u8)(((config) >> 16) & 0x0F)
#endif

View File

@ -23,13 +23,665 @@
#include "x86.h"
#include "lapic.h"
#include "ioapic.h"
#include "hyperv.h"
#include <linux/kvm_host.h>
#include <linux/highmem.h>
#include <asm/apicdef.h>
#include <trace/events/kvm.h>
#include "trace.h"
static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint)
{
return atomic64_read(&synic->sint[sint]);
}
static inline int synic_get_sint_vector(u64 sint_value)
{
if (sint_value & HV_SYNIC_SINT_MASKED)
return -1;
return sint_value & HV_SYNIC_SINT_VECTOR_MASK;
}
static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic,
int vector)
{
int i;
for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
return true;
}
return false;
}
static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic,
int vector)
{
int i;
u64 sint_value;
for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
sint_value = synic_read_sint(synic, i);
if (synic_get_sint_vector(sint_value) == vector &&
sint_value & HV_SYNIC_SINT_AUTO_EOI)
return true;
}
return false;
}
static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
u64 data, bool host)
{
int vector;
vector = data & HV_SYNIC_SINT_VECTOR_MASK;
if (vector < 16 && !host)
return 1;
/*
* Guest may configure multiple SINTs to use the same vector, so
* we maintain a bitmap of vectors handled by synic, and a
* bitmap of vectors with auto-eoi behavior. The bitmaps are
* updated here, and atomically queried on fast paths.
*/
atomic64_set(&synic->sint[sint], data);
if (synic_has_vector_connected(synic, vector))
__set_bit(vector, synic->vec_bitmap);
else
__clear_bit(vector, synic->vec_bitmap);
if (synic_has_vector_auto_eoi(synic, vector))
__set_bit(vector, synic->auto_eoi_bitmap);
else
__clear_bit(vector, synic->auto_eoi_bitmap);
/* Load SynIC vectors into EOI exit bitmap */
kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic));
return 0;
}
static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vcpu_id)
{
struct kvm_vcpu *vcpu;
struct kvm_vcpu_hv_synic *synic;
if (vcpu_id >= atomic_read(&kvm->online_vcpus))
return NULL;
vcpu = kvm_get_vcpu(kvm, vcpu_id);
if (!vcpu)
return NULL;
synic = vcpu_to_synic(vcpu);
return (synic->active) ? synic : NULL;
}
static void synic_clear_sint_msg_pending(struct kvm_vcpu_hv_synic *synic,
u32 sint)
{
struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
struct page *page;
gpa_t gpa;
struct hv_message *msg;
struct hv_message_page *msg_page;
gpa = synic->msg_page & PAGE_MASK;
page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
if (is_error_page(page)) {
vcpu_err(vcpu, "Hyper-V SynIC can't get msg page, gpa 0x%llx\n",
gpa);
return;
}
msg_page = kmap_atomic(page);
msg = &msg_page->sint_message[sint];
msg->header.message_flags.msg_pending = 0;
kunmap_atomic(msg_page);
kvm_release_page_dirty(page);
kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
}
static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint)
{
struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
struct kvm_vcpu_hv_stimer *stimer;
int gsi, idx, stimers_pending;
trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint);
if (synic->msg_page & HV_SYNIC_SIMP_ENABLE)
synic_clear_sint_msg_pending(synic, sint);
/* Try to deliver pending Hyper-V SynIC timers messages */
stimers_pending = 0;
for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) {
stimer = &hv_vcpu->stimer[idx];
if (stimer->msg_pending &&
(stimer->config & HV_STIMER_ENABLE) &&
HV_STIMER_SINT(stimer->config) == sint) {
set_bit(stimer->index,
hv_vcpu->stimer_pending_bitmap);
stimers_pending++;
}
}
if (stimers_pending)
kvm_make_request(KVM_REQ_HV_STIMER, vcpu);
idx = srcu_read_lock(&kvm->irq_srcu);
gsi = atomic_read(&synic->sint_to_gsi[sint]);
if (gsi != -1)
kvm_notify_acked_gsi(kvm, gsi);
srcu_read_unlock(&kvm->irq_srcu, idx);
}
static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr)
{
struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC;
hv_vcpu->exit.u.synic.msr = msr;
hv_vcpu->exit.u.synic.control = synic->control;
hv_vcpu->exit.u.synic.evt_page = synic->evt_page;
hv_vcpu->exit.u.synic.msg_page = synic->msg_page;
kvm_make_request(KVM_REQ_HV_EXIT, vcpu);
}
static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
u32 msr, u64 data, bool host)
{
struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
int ret;
if (!synic->active)
return 1;
trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host);
ret = 0;
switch (msr) {
case HV_X64_MSR_SCONTROL:
synic->control = data;
if (!host)
synic_exit(synic, msr);
break;
case HV_X64_MSR_SVERSION:
if (!host) {
ret = 1;
break;
}
synic->version = data;
break;
case HV_X64_MSR_SIEFP:
if (data & HV_SYNIC_SIEFP_ENABLE)
if (kvm_clear_guest(vcpu->kvm,
data & PAGE_MASK, PAGE_SIZE)) {
ret = 1;
break;
}
synic->evt_page = data;
if (!host)
synic_exit(synic, msr);
break;
case HV_X64_MSR_SIMP:
if (data & HV_SYNIC_SIMP_ENABLE)
if (kvm_clear_guest(vcpu->kvm,
data & PAGE_MASK, PAGE_SIZE)) {
ret = 1;
break;
}
synic->msg_page = data;
if (!host)
synic_exit(synic, msr);
break;
case HV_X64_MSR_EOM: {
int i;
for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
kvm_hv_notify_acked_sint(vcpu, i);
break;
}
case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host);
break;
default:
ret = 1;
break;
}
return ret;
}
static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata)
{
int ret;
if (!synic->active)
return 1;
ret = 0;
switch (msr) {
case HV_X64_MSR_SCONTROL:
*pdata = synic->control;
break;
case HV_X64_MSR_SVERSION:
*pdata = synic->version;
break;
case HV_X64_MSR_SIEFP:
*pdata = synic->evt_page;
break;
case HV_X64_MSR_SIMP:
*pdata = synic->msg_page;
break;
case HV_X64_MSR_EOM:
*pdata = 0;
break;
case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
*pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]);
break;
default:
ret = 1;
break;
}
return ret;
}
int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
{
struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
struct kvm_lapic_irq irq;
int ret, vector;
if (sint >= ARRAY_SIZE(synic->sint))
return -EINVAL;
vector = synic_get_sint_vector(synic_read_sint(synic, sint));
if (vector < 0)
return -ENOENT;
memset(&irq, 0, sizeof(irq));
irq.dest_id = kvm_apic_id(vcpu->arch.apic);
irq.dest_mode = APIC_DEST_PHYSICAL;
irq.delivery_mode = APIC_DM_FIXED;
irq.vector = vector;
irq.level = 1;
ret = kvm_irq_delivery_to_apic(vcpu->kvm, NULL, &irq, NULL);
trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret);
return ret;
}
int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint)
{
struct kvm_vcpu_hv_synic *synic;
synic = synic_get(kvm, vcpu_id);
if (!synic)
return -EINVAL;
return synic_set_irq(synic, sint);
}
void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)
{
struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
int i;
trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector);
for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
kvm_hv_notify_acked_sint(vcpu, i);
}
static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vcpu_id, u32 sint, int gsi)
{
struct kvm_vcpu_hv_synic *synic;
synic = synic_get(kvm, vcpu_id);
if (!synic)
return -EINVAL;
if (sint >= ARRAY_SIZE(synic->sint_to_gsi))
return -EINVAL;
atomic_set(&synic->sint_to_gsi[sint], gsi);
return 0;
}
void kvm_hv_irq_routing_update(struct kvm *kvm)
{
struct kvm_irq_routing_table *irq_rt;
struct kvm_kernel_irq_routing_entry *e;
u32 gsi;
irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu,
lockdep_is_held(&kvm->irq_lock));
for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) {
hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
if (e->type == KVM_IRQ_ROUTING_HV_SINT)
kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu,
e->hv_sint.sint, gsi);
}
}
}
static void synic_init(struct kvm_vcpu_hv_synic *synic)
{
int i;
memset(synic, 0, sizeof(*synic));
synic->version = HV_SYNIC_VERSION_1;
for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED);
atomic_set(&synic->sint_to_gsi[i], -1);
}
}
static u64 get_time_ref_counter(struct kvm *kvm)
{
return div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
}
static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
bool vcpu_kick)
{
struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
set_bit(stimer->index,
vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap);
kvm_make_request(KVM_REQ_HV_STIMER, vcpu);
if (vcpu_kick)
kvm_vcpu_kick(vcpu);
}
static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer)
{
struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
trace_kvm_hv_stimer_cleanup(stimer_to_vcpu(stimer)->vcpu_id,
stimer->index);
hrtimer_cancel(&stimer->timer);
clear_bit(stimer->index,
vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap);
stimer->msg_pending = false;
stimer->exp_time = 0;
}
static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer)
{
struct kvm_vcpu_hv_stimer *stimer;
stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer);
trace_kvm_hv_stimer_callback(stimer_to_vcpu(stimer)->vcpu_id,
stimer->index);
stimer_mark_pending(stimer, true);
return HRTIMER_NORESTART;
}
/*
* stimer_start() assumptions:
* a) stimer->count is not equal to 0
* b) stimer->config has HV_STIMER_ENABLE flag
*/
static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
{
u64 time_now;
ktime_t ktime_now;
time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm);
ktime_now = ktime_get();
if (stimer->config & HV_STIMER_PERIODIC) {
if (stimer->exp_time) {
if (time_now >= stimer->exp_time) {
u64 remainder;
div64_u64_rem(time_now - stimer->exp_time,
stimer->count, &remainder);
stimer->exp_time =
time_now + (stimer->count - remainder);
}
} else
stimer->exp_time = time_now + stimer->count;
trace_kvm_hv_stimer_start_periodic(
stimer_to_vcpu(stimer)->vcpu_id,
stimer->index,
time_now, stimer->exp_time);
hrtimer_start(&stimer->timer,
ktime_add_ns(ktime_now,
100 * (stimer->exp_time - time_now)),
HRTIMER_MODE_ABS);
return 0;
}
stimer->exp_time = stimer->count;
if (time_now >= stimer->count) {
/*
* Expire timer according to Hypervisor Top-Level Functional
* specification v4(15.3.1):
* "If a one shot is enabled and the specified count is in
* the past, it will expire immediately."
*/
stimer_mark_pending(stimer, false);
return 0;
}
trace_kvm_hv_stimer_start_one_shot(stimer_to_vcpu(stimer)->vcpu_id,
stimer->index,
time_now, stimer->count);
hrtimer_start(&stimer->timer,
ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)),
HRTIMER_MODE_ABS);
return 0;
}
static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
bool host)
{
trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id,
stimer->index, config, host);
stimer_cleanup(stimer);
if ((stimer->config & HV_STIMER_ENABLE) && HV_STIMER_SINT(config) == 0)
config &= ~HV_STIMER_ENABLE;
stimer->config = config;
stimer_mark_pending(stimer, false);
return 0;
}
static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count,
bool host)
{
trace_kvm_hv_stimer_set_count(stimer_to_vcpu(stimer)->vcpu_id,
stimer->index, count, host);
stimer_cleanup(stimer);
stimer->count = count;
if (stimer->count == 0)
stimer->config &= ~HV_STIMER_ENABLE;
else if (stimer->config & HV_STIMER_AUTOENABLE)
stimer->config |= HV_STIMER_ENABLE;
stimer_mark_pending(stimer, false);
return 0;
}
static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig)
{
*pconfig = stimer->config;
return 0;
}
static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount)
{
*pcount = stimer->count;
return 0;
}
static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint,
struct hv_message *src_msg)
{
struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
struct page *page;
gpa_t gpa;
struct hv_message *dst_msg;
int r;
struct hv_message_page *msg_page;
if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE))
return -ENOENT;
gpa = synic->msg_page & PAGE_MASK;
page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
if (is_error_page(page))
return -EFAULT;
msg_page = kmap_atomic(page);
dst_msg = &msg_page->sint_message[sint];
if (sync_cmpxchg(&dst_msg->header.message_type, HVMSG_NONE,
src_msg->header.message_type) != HVMSG_NONE) {
dst_msg->header.message_flags.msg_pending = 1;
r = -EAGAIN;
} else {
memcpy(&dst_msg->u.payload, &src_msg->u.payload,
src_msg->header.payload_size);
dst_msg->header.message_type = src_msg->header.message_type;
dst_msg->header.payload_size = src_msg->header.payload_size;
r = synic_set_irq(synic, sint);
if (r >= 1)
r = 0;
else if (r == 0)
r = -EFAULT;
}
kunmap_atomic(msg_page);
kvm_release_page_dirty(page);
kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
return r;
}
static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer)
{
struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
struct hv_message *msg = &stimer->msg;
struct hv_timer_message_payload *payload =
(struct hv_timer_message_payload *)&msg->u.payload;
payload->expiration_time = stimer->exp_time;
payload->delivery_time = get_time_ref_counter(vcpu->kvm);
return synic_deliver_msg(vcpu_to_synic(vcpu),
HV_STIMER_SINT(stimer->config), msg);
}
static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
{
int r;
stimer->msg_pending = true;
r = stimer_send_msg(stimer);
trace_kvm_hv_stimer_expiration(stimer_to_vcpu(stimer)->vcpu_id,
stimer->index, r);
if (!r) {
stimer->msg_pending = false;
if (!(stimer->config & HV_STIMER_PERIODIC))
stimer->config &= ~HV_STIMER_ENABLE;
}
}
void kvm_hv_process_stimers(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
struct kvm_vcpu_hv_stimer *stimer;
u64 time_now, exp_time;
int i;
for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) {
stimer = &hv_vcpu->stimer[i];
if (stimer->config & HV_STIMER_ENABLE) {
exp_time = stimer->exp_time;
if (exp_time) {
time_now =
get_time_ref_counter(vcpu->kvm);
if (time_now >= exp_time)
stimer_expiration(stimer);
}
if ((stimer->config & HV_STIMER_ENABLE) &&
stimer->count)
stimer_start(stimer);
else
stimer_cleanup(stimer);
}
}
}
void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
int i;
for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
stimer_cleanup(&hv_vcpu->stimer[i]);
}
static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer)
{
struct hv_message *msg = &stimer->msg;
struct hv_timer_message_payload *payload =
(struct hv_timer_message_payload *)&msg->u.payload;
memset(&msg->header, 0, sizeof(msg->header));
msg->header.message_type = HVMSG_TIMER_EXPIRED;
msg->header.payload_size = sizeof(*payload);
payload->timer_index = stimer->index;
payload->expiration_time = 0;
payload->delivery_time = 0;
}
static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index)
{
memset(stimer, 0, sizeof(*stimer));
stimer->index = timer_index;
hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
stimer->timer.function = stimer_timer_callback;
stimer_prepare_msg(stimer);
}
void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
int i;
synic_init(&hv_vcpu->synic);
bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
stimer_init(&hv_vcpu->stimer[i], i);
}
int kvm_hv_activate_synic(struct kvm_vcpu *vcpu)
{
/*
* Hyper-V SynIC auto EOI SINT's are
* not compatible with APICV, so deactivate APICV
*/
kvm_vcpu_deactivate_apicv(vcpu);
vcpu_to_synic(vcpu)->active = true;
return 0;
}
static bool kvm_hv_msr_partition_wide(u32 msr)
{
bool r = false;
@ -226,6 +878,31 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
return 1;
hv->runtime_offset = data - current_task_runtime_100ns();
break;
case HV_X64_MSR_SCONTROL:
case HV_X64_MSR_SVERSION:
case HV_X64_MSR_SIEFP:
case HV_X64_MSR_SIMP:
case HV_X64_MSR_EOM:
case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host);
case HV_X64_MSR_STIMER0_CONFIG:
case HV_X64_MSR_STIMER1_CONFIG:
case HV_X64_MSR_STIMER2_CONFIG:
case HV_X64_MSR_STIMER3_CONFIG: {
int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
return stimer_set_config(vcpu_to_stimer(vcpu, timer_index),
data, host);
}
case HV_X64_MSR_STIMER0_COUNT:
case HV_X64_MSR_STIMER1_COUNT:
case HV_X64_MSR_STIMER2_COUNT:
case HV_X64_MSR_STIMER3_COUNT: {
int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
return stimer_set_count(vcpu_to_stimer(vcpu, timer_index),
data, host);
}
default:
vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
msr, data);
@ -248,11 +925,9 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case HV_X64_MSR_HYPERCALL:
data = hv->hv_hypercall;
break;
case HV_X64_MSR_TIME_REF_COUNT: {
data =
div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
case HV_X64_MSR_TIME_REF_COUNT:
data = get_time_ref_counter(kvm);
break;
}
case HV_X64_MSR_REFERENCE_TSC:
data = hv->hv_tsc_page;
break;
@ -304,6 +979,31 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case HV_X64_MSR_VP_RUNTIME:
data = current_task_runtime_100ns() + hv->runtime_offset;
break;
case HV_X64_MSR_SCONTROL:
case HV_X64_MSR_SVERSION:
case HV_X64_MSR_SIEFP:
case HV_X64_MSR_SIMP:
case HV_X64_MSR_EOM:
case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata);
case HV_X64_MSR_STIMER0_CONFIG:
case HV_X64_MSR_STIMER1_CONFIG:
case HV_X64_MSR_STIMER2_CONFIG:
case HV_X64_MSR_STIMER3_CONFIG: {
int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
return stimer_get_config(vcpu_to_stimer(vcpu, timer_index),
pdata);
}
case HV_X64_MSR_STIMER0_COUNT:
case HV_X64_MSR_STIMER1_COUNT:
case HV_X64_MSR_STIMER2_COUNT:
case HV_X64_MSR_STIMER3_COUNT: {
int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
return stimer_get_count(vcpu_to_stimer(vcpu, timer_index),
pdata);
}
default:
vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
return 1;

View File

@ -24,9 +24,64 @@
#ifndef __ARCH_X86_KVM_HYPERV_H__
#define __ARCH_X86_KVM_HYPERV_H__
static inline struct kvm_vcpu_hv *vcpu_to_hv_vcpu(struct kvm_vcpu *vcpu)
{
return &vcpu->arch.hyperv;
}
static inline struct kvm_vcpu *hv_vcpu_to_vcpu(struct kvm_vcpu_hv *hv_vcpu)
{
struct kvm_vcpu_arch *arch;
arch = container_of(hv_vcpu, struct kvm_vcpu_arch, hyperv);
return container_of(arch, struct kvm_vcpu, arch);
}
static inline struct kvm_vcpu_hv_synic *vcpu_to_synic(struct kvm_vcpu *vcpu)
{
return &vcpu->arch.hyperv.synic;
}
static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic)
{
return hv_vcpu_to_vcpu(container_of(synic, struct kvm_vcpu_hv, synic));
}
int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host);
int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
bool kvm_hv_hypercall_enabled(struct kvm *kvm);
int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
void kvm_hv_irq_routing_update(struct kvm *kvm);
int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint);
void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector);
int kvm_hv_activate_synic(struct kvm_vcpu *vcpu);
void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu,
int timer_index)
{
return &vcpu_to_hv_vcpu(vcpu)->stimer[timer_index];
}
static inline struct kvm_vcpu *stimer_to_vcpu(struct kvm_vcpu_hv_stimer *stimer)
{
struct kvm_vcpu_hv *hv_vcpu;
hv_vcpu = container_of(stimer - stimer->index, struct kvm_vcpu_hv,
stimer[0]);
return hv_vcpu_to_vcpu(hv_vcpu);
}
static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
{
return !bitmap_empty(vcpu->arch.hyperv.stimer_pending_bitmap,
HV_SYNIC_STIMER_COUNT);
}
void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
#endif

View File

@ -233,7 +233,7 @@ static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr)
}
void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
{
struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
union kvm_ioapic_redirect_entry *e;
@ -250,7 +250,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
(e->fields.trig_mode == IOAPIC_EDGE_TRIG &&
kvm_apic_pending_eoi(vcpu, e->fields.vector)))
__set_bit(e->fields.vector,
(unsigned long *)eoi_exit_bitmap);
ioapic_handled_vectors);
}
}
spin_unlock(&ioapic->lock);

View File

@ -121,7 +121,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq, unsigned long *dest_map);
int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu,
ulong *ioapic_handled_vectors);
void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
ulong *ioapic_handled_vectors);
#endif

View File

@ -76,7 +76,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
if (kvm_cpu_has_extint(v))
return 1;
if (kvm_vcpu_apic_vid_enabled(v))
if (kvm_vcpu_apicv_active(v))
return 0;
return kvm_apic_has_interrupt(v) != -1; /* LAPIC */

View File

@ -33,6 +33,8 @@
#include "lapic.h"
#include "hyperv.h"
static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
bool line_status)
@ -219,6 +221,16 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
srcu_read_unlock(&kvm->irq_srcu, idx);
}
static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
bool line_status)
{
if (!level)
return -1;
return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint);
}
int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
@ -257,6 +269,11 @@ int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
e->msi.address_hi = ue->u.msi.address_hi;
e->msi.data = ue->u.msi.data;
break;
case KVM_IRQ_ROUTING_HV_SINT:
e->set = kvm_hv_set_sint;
e->hv_sint.vcpu = ue->u.hv_sint.vcpu;
e->hv_sint.sint = ue->u.hv_sint.sint;
break;
default:
goto out;
}
@ -332,14 +349,15 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm)
return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
}
void kvm_arch_irq_routing_update(struct kvm *kvm)
void kvm_arch_post_irq_routing_update(struct kvm *kvm)
{
if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm))
return;
kvm_make_scan_ioapic_request(kvm);
}
void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
ulong *ioapic_handled_vectors)
{
struct kvm *kvm = vcpu->kvm;
struct kvm_kernel_irq_routing_entry *entry;
@ -369,9 +387,26 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
u32 vector = entry->msi.data & 0xff;
__set_bit(vector,
(unsigned long *) eoi_exit_bitmap);
ioapic_handled_vectors);
}
}
}
srcu_read_unlock(&kvm->irq_srcu, idx);
}
int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm,
int irq_source_id, int level, bool line_status)
{
switch (irq->type) {
case KVM_IRQ_ROUTING_HV_SINT:
return kvm_hv_set_sint(irq, kvm, irq_source_id, level,
line_status);
default:
return -EWOULDBLOCK;
}
}
void kvm_arch_irq_routing_update(struct kvm *kvm)
{
kvm_hv_irq_routing_update(kvm);
}

View File

@ -41,6 +41,7 @@
#include "trace.h"
#include "x86.h"
#include "cpuid.h"
#include "hyperv.h"
#ifndef CONFIG_X86_64
#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
@ -128,11 +129,6 @@ static inline int apic_enabled(struct kvm_lapic *apic)
(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
static inline int kvm_apic_id(struct kvm_lapic *apic)
{
return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
}
/* The logical map is definitely wrong if we have multiple
* modes at the same time. (Physical map is always right.)
*/
@ -379,7 +375,8 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
if (!apic->irr_pending)
return -1;
kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
if (apic->vcpu->arch.apicv_active)
kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
result = apic_search_irr(apic);
ASSERT(result == -1 || result >= 16);
@ -392,7 +389,7 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
vcpu = apic->vcpu;
if (unlikely(kvm_vcpu_apic_vid_enabled(vcpu))) {
if (unlikely(vcpu->arch.apicv_active)) {
/* try to update RVI */
apic_clear_vector(vec, apic->regs + APIC_IRR);
kvm_make_request(KVM_REQ_EVENT, vcpu);
@ -418,7 +415,7 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
* because the processor can modify ISR under the hood. Instead
* just set SVI.
*/
if (unlikely(kvm_x86_ops->hwapic_isr_update))
if (unlikely(vcpu->arch.apicv_active))
kvm_x86_ops->hwapic_isr_update(vcpu->kvm, vec);
else {
++apic->isr_count;
@ -466,7 +463,7 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
* on the other hand isr_count and highest_isr_cache are unused
* and must be left alone.
*/
if (unlikely(kvm_x86_ops->hwapic_isr_update))
if (unlikely(vcpu->arch.apicv_active))
kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
apic_find_highest_isr(apic));
else {
@ -852,7 +849,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
apic_clear_vector(vector, apic->regs + APIC_TMR);
}
if (kvm_x86_ops->deliver_posted_interrupt)
if (vcpu->arch.apicv_active)
kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
else {
apic_set_irr(vector, apic);
@ -932,7 +929,7 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
{
return test_bit(vector, (ulong *)apic->vcpu->arch.eoi_exit_bitmap);
return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
}
static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
@ -974,6 +971,9 @@ static int apic_set_eoi(struct kvm_lapic *apic)
apic_clear_isr(vector, apic);
apic_update_ppr(apic);
if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap))
kvm_hv_synic_send_eoi(apic->vcpu, vector);
kvm_ioapic_send_eoi(apic, vector);
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
return vector;
@ -1225,7 +1225,7 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
int vec = reg & APIC_VECTOR_MASK;
void *bitmap = apic->regs + APIC_ISR;
if (kvm_x86_ops->deliver_posted_interrupt)
if (vcpu->arch.apicv_active)
bitmap = apic->regs + APIC_IRR;
if (apic_test_vector(vec, bitmap))
@ -1693,8 +1693,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
}
apic->irr_pending = kvm_vcpu_apic_vid_enabled(vcpu);
apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1 : 0;
apic->irr_pending = vcpu->arch.apicv_active;
apic->isr_count = vcpu->arch.apicv_active ? 1 : 0;
apic->highest_isr_cache = -1;
update_divide_count(apic);
atomic_set(&apic->lapic_timer.pending, 0);
@ -1883,6 +1883,12 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
apic_set_isr(vector, apic);
apic_update_ppr(apic);
apic_clear_irr(vector, apic);
if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) {
apic_clear_isr(vector, apic);
apic_update_ppr(apic);
}
return vector;
}
@ -1906,15 +1912,15 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
update_divide_count(apic);
start_apic_timer(apic);
apic->irr_pending = true;
apic->isr_count = kvm_x86_ops->hwapic_isr_update ?
apic->isr_count = vcpu->arch.apicv_active ?
1 : count_vectors(apic->regs + APIC_ISR);
apic->highest_isr_cache = -1;
if (kvm_x86_ops->hwapic_irr_update)
if (vcpu->arch.apicv_active) {
kvm_x86_ops->hwapic_irr_update(vcpu,
apic_find_highest_irr(apic));
if (unlikely(kvm_x86_ops->hwapic_isr_update))
kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
apic_find_highest_isr(apic));
}
kvm_make_request(KVM_REQ_EVENT, vcpu);
if (ioapic_in_kernel(vcpu->kvm))
kvm_rtc_eoi_tracking_restore_one(vcpu);

View File

@ -143,9 +143,9 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic)
return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
}
static inline bool kvm_vcpu_apic_vid_enabled(struct kvm_vcpu *vcpu)
static inline bool kvm_vcpu_apicv_active(struct kvm_vcpu *vcpu)
{
return kvm_x86_ops->cpu_uses_apicv(vcpu);
return vcpu->arch.apic && vcpu->arch.apicv_active;
}
static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
@ -164,6 +164,11 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
return kvm_vcpu_has_lapic(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
}
static inline int kvm_apic_id(struct kvm_lapic *apic)
{
return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
}
bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
void wait_lapic_expire(struct kvm_vcpu *vcpu);

View File

@ -311,11 +311,6 @@ static int is_large_pte(u64 pte)
return pte & PT_PAGE_SIZE_MASK;
}
static int is_rmap_spte(u64 pte)
{
return is_shadow_present_pte(pte);
}
static int is_last_spte(u64 pte, int level)
{
if (level == PT_PAGE_TABLE_LEVEL)
@ -540,7 +535,7 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte)
u64 old_spte = *sptep;
bool ret = false;
WARN_ON(!is_rmap_spte(new_spte));
WARN_ON(!is_shadow_present_pte(new_spte));
if (!is_shadow_present_pte(old_spte)) {
mmu_spte_set(sptep, new_spte);
@ -595,7 +590,7 @@ static int mmu_spte_clear_track_bits(u64 *sptep)
else
old_spte = __update_clear_spte_slow(sptep, 0ull);
if (!is_rmap_spte(old_spte))
if (!is_shadow_present_pte(old_spte))
return 0;
pfn = spte_to_pfn(old_spte);
@ -909,36 +904,35 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn,
}
/*
* Pte mapping structures:
* About rmap_head encoding:
*
* If pte_list bit zero is zero, then pte_list point to the spte.
*
* If pte_list bit zero is one, (then pte_list & ~1) points to a struct
* If the bit zero of rmap_head->val is clear, then it points to the only spte
* in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct
* pte_list_desc containing more mappings.
*
* Returns the number of pte entries before the spte was added or zero if
* the spte was not added.
*
*/
/*
* Returns the number of pointers in the rmap chain, not counting the new one.
*/
static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
unsigned long *pte_list)
struct kvm_rmap_head *rmap_head)
{
struct pte_list_desc *desc;
int i, count = 0;
if (!*pte_list) {
if (!rmap_head->val) {
rmap_printk("pte_list_add: %p %llx 0->1\n", spte, *spte);
*pte_list = (unsigned long)spte;
} else if (!(*pte_list & 1)) {
rmap_head->val = (unsigned long)spte;
} else if (!(rmap_head->val & 1)) {
rmap_printk("pte_list_add: %p %llx 1->many\n", spte, *spte);
desc = mmu_alloc_pte_list_desc(vcpu);
desc->sptes[0] = (u64 *)*pte_list;
desc->sptes[0] = (u64 *)rmap_head->val;
desc->sptes[1] = spte;
*pte_list = (unsigned long)desc | 1;
rmap_head->val = (unsigned long)desc | 1;
++count;
} else {
rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte);
desc = (struct pte_list_desc *)(*pte_list & ~1ul);
desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
while (desc->sptes[PTE_LIST_EXT-1] && desc->more) {
desc = desc->more;
count += PTE_LIST_EXT;
@ -955,8 +949,9 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
}
static void
pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc,
int i, struct pte_list_desc *prev_desc)
pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
struct pte_list_desc *desc, int i,
struct pte_list_desc *prev_desc)
{
int j;
@ -967,43 +962,43 @@ pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc,
if (j != 0)
return;
if (!prev_desc && !desc->more)
*pte_list = (unsigned long)desc->sptes[0];
rmap_head->val = (unsigned long)desc->sptes[0];
else
if (prev_desc)
prev_desc->more = desc->more;
else
*pte_list = (unsigned long)desc->more | 1;
rmap_head->val = (unsigned long)desc->more | 1;
mmu_free_pte_list_desc(desc);
}
static void pte_list_remove(u64 *spte, unsigned long *pte_list)
static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
{
struct pte_list_desc *desc;
struct pte_list_desc *prev_desc;
int i;
if (!*pte_list) {
if (!rmap_head->val) {
printk(KERN_ERR "pte_list_remove: %p 0->BUG\n", spte);
BUG();
} else if (!(*pte_list & 1)) {
} else if (!(rmap_head->val & 1)) {
rmap_printk("pte_list_remove: %p 1->0\n", spte);
if ((u64 *)*pte_list != spte) {
if ((u64 *)rmap_head->val != spte) {
printk(KERN_ERR "pte_list_remove: %p 1->BUG\n", spte);
BUG();
}
*pte_list = 0;
rmap_head->val = 0;
} else {
rmap_printk("pte_list_remove: %p many->many\n", spte);
desc = (struct pte_list_desc *)(*pte_list & ~1ul);
desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
prev_desc = NULL;
while (desc) {
for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i)
for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) {
if (desc->sptes[i] == spte) {
pte_list_desc_remove_entry(pte_list,
desc, i,
prev_desc);
pte_list_desc_remove_entry(rmap_head,
desc, i, prev_desc);
return;
}
}
prev_desc = desc;
desc = desc->more;
}
@ -1012,28 +1007,8 @@ static void pte_list_remove(u64 *spte, unsigned long *pte_list)
}
}
typedef void (*pte_list_walk_fn) (u64 *spte);
static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
{
struct pte_list_desc *desc;
int i;
if (!*pte_list)
return;
if (!(*pte_list & 1))
return fn((u64 *)*pte_list);
desc = (struct pte_list_desc *)(*pte_list & ~1ul);
while (desc) {
for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i)
fn(desc->sptes[i]);
desc = desc->more;
}
}
static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
struct kvm_memory_slot *slot)
static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level,
struct kvm_memory_slot *slot)
{
unsigned long idx;
@ -1041,10 +1016,8 @@ static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
return &slot->arch.rmap[level - PT_PAGE_TABLE_LEVEL][idx];
}
/*
* Take gfn and return the reverse mapping to it.
*/
static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, struct kvm_mmu_page *sp)
static struct kvm_rmap_head *gfn_to_rmap(struct kvm *kvm, gfn_t gfn,
struct kvm_mmu_page *sp)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *slot;
@ -1065,24 +1038,24 @@ static bool rmap_can_add(struct kvm_vcpu *vcpu)
static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
{
struct kvm_mmu_page *sp;
unsigned long *rmapp;
struct kvm_rmap_head *rmap_head;
sp = page_header(__pa(spte));
kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);
rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp);
return pte_list_add(vcpu, spte, rmapp);
rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp);
return pte_list_add(vcpu, spte, rmap_head);
}
static void rmap_remove(struct kvm *kvm, u64 *spte)
{
struct kvm_mmu_page *sp;
gfn_t gfn;
unsigned long *rmapp;
struct kvm_rmap_head *rmap_head;
sp = page_header(__pa(spte));
gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
rmapp = gfn_to_rmap(kvm, gfn, sp);
pte_list_remove(spte, rmapp);
rmap_head = gfn_to_rmap(kvm, gfn, sp);
pte_list_remove(spte, rmap_head);
}
/*
@ -1102,19 +1075,26 @@ struct rmap_iterator {
*
* Returns sptep if found, NULL otherwise.
*/
static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter)
static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
struct rmap_iterator *iter)
{
if (!rmap)
u64 *sptep;
if (!rmap_head->val)
return NULL;
if (!(rmap & 1)) {
if (!(rmap_head->val & 1)) {
iter->desc = NULL;
return (u64 *)rmap;
sptep = (u64 *)rmap_head->val;
goto out;
}
iter->desc = (struct pte_list_desc *)(rmap & ~1ul);
iter->desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
iter->pos = 0;
return iter->desc->sptes[iter->pos];
sptep = iter->desc->sptes[iter->pos];
out:
BUG_ON(!is_shadow_present_pte(*sptep));
return sptep;
}
/*
@ -1124,14 +1104,14 @@ static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter)
*/
static u64 *rmap_get_next(struct rmap_iterator *iter)
{
u64 *sptep;
if (iter->desc) {
if (iter->pos < PTE_LIST_EXT - 1) {
u64 *sptep;
++iter->pos;
sptep = iter->desc->sptes[iter->pos];
if (sptep)
return sptep;
goto out;
}
iter->desc = iter->desc->more;
@ -1139,17 +1119,20 @@ static u64 *rmap_get_next(struct rmap_iterator *iter)
if (iter->desc) {
iter->pos = 0;
/* desc->sptes[0] cannot be NULL */
return iter->desc->sptes[iter->pos];
sptep = iter->desc->sptes[iter->pos];
goto out;
}
}
return NULL;
out:
BUG_ON(!is_shadow_present_pte(*sptep));
return sptep;
}
#define for_each_rmap_spte(_rmap_, _iter_, _spte_) \
for (_spte_ = rmap_get_first(*_rmap_, _iter_); \
_spte_ && ({BUG_ON(!is_shadow_present_pte(*_spte_)); 1;}); \
_spte_ = rmap_get_next(_iter_))
#define for_each_rmap_spte(_rmap_head_, _iter_, _spte_) \
for (_spte_ = rmap_get_first(_rmap_head_, _iter_); \
_spte_; _spte_ = rmap_get_next(_iter_))
static void drop_spte(struct kvm *kvm, u64 *sptep)
{
@ -1207,14 +1190,15 @@ static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool pt_protect)
return mmu_spte_update(sptep, spte);
}
static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
static bool __rmap_write_protect(struct kvm *kvm,
struct kvm_rmap_head *rmap_head,
bool pt_protect)
{
u64 *sptep;
struct rmap_iterator iter;
bool flush = false;
for_each_rmap_spte(rmapp, &iter, sptep)
for_each_rmap_spte(rmap_head, &iter, sptep)
flush |= spte_write_protect(kvm, sptep, pt_protect);
return flush;
@ -1231,13 +1215,13 @@ static bool spte_clear_dirty(struct kvm *kvm, u64 *sptep)
return mmu_spte_update(sptep, spte);
}
static bool __rmap_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
{
u64 *sptep;
struct rmap_iterator iter;
bool flush = false;
for_each_rmap_spte(rmapp, &iter, sptep)
for_each_rmap_spte(rmap_head, &iter, sptep)
flush |= spte_clear_dirty(kvm, sptep);
return flush;
@ -1254,13 +1238,13 @@ static bool spte_set_dirty(struct kvm *kvm, u64 *sptep)
return mmu_spte_update(sptep, spte);
}
static bool __rmap_set_dirty(struct kvm *kvm, unsigned long *rmapp)
static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
{
u64 *sptep;
struct rmap_iterator iter;
bool flush = false;
for_each_rmap_spte(rmapp, &iter, sptep)
for_each_rmap_spte(rmap_head, &iter, sptep)
flush |= spte_set_dirty(kvm, sptep);
return flush;
@ -1280,12 +1264,12 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
gfn_t gfn_offset, unsigned long mask)
{
unsigned long *rmapp;
struct kvm_rmap_head *rmap_head;
while (mask) {
rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
PT_PAGE_TABLE_LEVEL, slot);
__rmap_write_protect(kvm, rmapp, false);
rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
PT_PAGE_TABLE_LEVEL, slot);
__rmap_write_protect(kvm, rmap_head, false);
/* clear the first set bit */
mask &= mask - 1;
@ -1305,12 +1289,12 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
gfn_t gfn_offset, unsigned long mask)
{
unsigned long *rmapp;
struct kvm_rmap_head *rmap_head;
while (mask) {
rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
PT_PAGE_TABLE_LEVEL, slot);
__rmap_clear_dirty(kvm, rmapp);
rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
PT_PAGE_TABLE_LEVEL, slot);
__rmap_clear_dirty(kvm, rmap_head);
/* clear the first set bit */
mask &= mask - 1;
@ -1342,28 +1326,27 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
{
struct kvm_memory_slot *slot;
unsigned long *rmapp;
struct kvm_rmap_head *rmap_head;
int i;
bool write_protected = false;
slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
rmapp = __gfn_to_rmap(gfn, i, slot);
write_protected |= __rmap_write_protect(vcpu->kvm, rmapp, true);
rmap_head = __gfn_to_rmap(gfn, i, slot);
write_protected |= __rmap_write_protect(vcpu->kvm, rmap_head, true);
}
return write_protected;
}
static bool kvm_zap_rmapp(struct kvm *kvm, unsigned long *rmapp)
static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
{
u64 *sptep;
struct rmap_iterator iter;
bool flush = false;
while ((sptep = rmap_get_first(*rmapp, &iter))) {
BUG_ON(!(*sptep & PT_PRESENT_MASK));
while ((sptep = rmap_get_first(rmap_head, &iter))) {
rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep);
drop_spte(kvm, sptep);
@ -1373,14 +1356,14 @@ static bool kvm_zap_rmapp(struct kvm *kvm, unsigned long *rmapp)
return flush;
}
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn, int level,
unsigned long data)
{
return kvm_zap_rmapp(kvm, rmapp);
return kvm_zap_rmapp(kvm, rmap_head);
}
static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn, int level,
unsigned long data)
{
@ -1395,7 +1378,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
new_pfn = pte_pfn(*ptep);
restart:
for_each_rmap_spte(rmapp, &iter, sptep) {
for_each_rmap_spte(rmap_head, &iter, sptep) {
rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n",
sptep, *sptep, gfn, level);
@ -1433,11 +1416,11 @@ struct slot_rmap_walk_iterator {
/* output fields. */
gfn_t gfn;
unsigned long *rmap;
struct kvm_rmap_head *rmap;
int level;
/* private field. */
unsigned long *end_rmap;
struct kvm_rmap_head *end_rmap;
};
static void
@ -1496,7 +1479,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
unsigned long end,
unsigned long data,
int (*handler)(struct kvm *kvm,
unsigned long *rmapp,
struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot,
gfn_t gfn,
int level,
@ -1540,7 +1523,8 @@ static int kvm_handle_hva_range(struct kvm *kvm,
static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
unsigned long data,
int (*handler)(struct kvm *kvm, unsigned long *rmapp,
int (*handler)(struct kvm *kvm,
struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot,
gfn_t gfn, int level,
unsigned long data))
@ -1563,7 +1547,7 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
}
static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
static int kvm_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn, int level,
unsigned long data)
{
@ -1573,18 +1557,19 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
BUG_ON(!shadow_accessed_mask);
for_each_rmap_spte(rmapp, &iter, sptep)
for_each_rmap_spte(rmap_head, &iter, sptep) {
if (*sptep & shadow_accessed_mask) {
young = 1;
clear_bit((ffs(shadow_accessed_mask) - 1),
(unsigned long *)sptep);
}
}
trace_kvm_age_page(gfn, level, slot, young);
return young;
}
static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn,
int level, unsigned long data)
{
@ -1600,11 +1585,12 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
if (!shadow_accessed_mask)
goto out;
for_each_rmap_spte(rmapp, &iter, sptep)
for_each_rmap_spte(rmap_head, &iter, sptep) {
if (*sptep & shadow_accessed_mask) {
young = 1;
break;
}
}
out:
return young;
}
@ -1613,14 +1599,14 @@ out:
static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
{
unsigned long *rmapp;
struct kvm_rmap_head *rmap_head;
struct kvm_mmu_page *sp;
sp = page_header(__pa(spte));
rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp);
rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp);
kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, gfn, sp->role.level, 0);
kvm_unmap_rmapp(vcpu->kvm, rmap_head, NULL, gfn, sp->role.level, 0);
kvm_flush_remote_tlbs(vcpu->kvm);
}
@ -1720,8 +1706,7 @@ static void drop_parent_pte(struct kvm_mmu_page *sp,
mmu_spte_clear_no_track(parent_pte);
}
static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
u64 *parent_pte, int direct)
static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct)
{
struct kvm_mmu_page *sp;
@ -1737,8 +1722,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
* this feature. See the comments in kvm_zap_obsolete_pages().
*/
list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
sp->parent_ptes = 0;
mmu_page_add_parent_pte(vcpu, sp, parent_pte);
kvm_mod_used_mmu_pages(vcpu->kvm, +1);
return sp;
}
@ -1746,7 +1729,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
static void mark_unsync(u64 *spte);
static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp)
{
pte_list_walk(&sp->parent_ptes, mark_unsync);
u64 *sptep;
struct rmap_iterator iter;
for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) {
mark_unsync(sptep);
}
}
static void mark_unsync(u64 *spte)
@ -1806,6 +1794,13 @@ static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
return (pvec->nr == KVM_PAGE_ARRAY_NR);
}
static inline void clear_unsync_child_bit(struct kvm_mmu_page *sp, int idx)
{
--sp->unsync_children;
WARN_ON((int)sp->unsync_children < 0);
__clear_bit(idx, sp->unsync_child_bitmap);
}
static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
struct kvm_mmu_pages *pvec)
{
@ -1815,8 +1810,10 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
struct kvm_mmu_page *child;
u64 ent = sp->spt[i];
if (!is_shadow_present_pte(ent) || is_large_pte(ent))
goto clear_child_bitmap;
if (!is_shadow_present_pte(ent) || is_large_pte(ent)) {
clear_unsync_child_bit(sp, i);
continue;
}
child = page_header(ent & PT64_BASE_ADDR_MASK);
@ -1825,28 +1822,21 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
return -ENOSPC;
ret = __mmu_unsync_walk(child, pvec);
if (!ret)
goto clear_child_bitmap;
else if (ret > 0)
if (!ret) {
clear_unsync_child_bit(sp, i);
continue;
} else if (ret > 0) {
nr_unsync_leaf += ret;
else
} else
return ret;
} else if (child->unsync) {
nr_unsync_leaf++;
if (mmu_pages_add(pvec, child, i))
return -ENOSPC;
} else
goto clear_child_bitmap;
continue;
clear_child_bitmap:
__clear_bit(i, sp->unsync_child_bitmap);
sp->unsync_children--;
WARN_ON((int)sp->unsync_children < 0);
clear_unsync_child_bit(sp, i);
}
return nr_unsync_leaf;
}
@ -2009,9 +1999,7 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
if (!sp)
return;
--sp->unsync_children;
WARN_ON((int)sp->unsync_children < 0);
__clear_bit(idx, sp->unsync_child_bitmap);
clear_unsync_child_bit(sp, idx);
level++;
} while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children);
}
@ -2053,14 +2041,6 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
}
}
static void init_shadow_page_table(struct kvm_mmu_page *sp)
{
int i;
for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
sp->spt[i] = 0ull;
}
static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
{
sp->write_flooding_count = 0;
@ -2083,8 +2063,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
gva_t gaddr,
unsigned level,
int direct,
unsigned access,
u64 *parent_pte)
unsigned access)
{
union kvm_mmu_page_role role;
unsigned quadrant;
@ -2116,21 +2095,18 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
if (sp->unsync && kvm_sync_page_transient(vcpu, sp))
break;
mmu_page_add_parent_pte(vcpu, sp, parent_pte);
if (sp->unsync_children) {
if (sp->unsync_children)
kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
kvm_mmu_mark_parents_unsync(sp);
} else if (sp->unsync)
kvm_mmu_mark_parents_unsync(sp);
__clear_sp_write_flooding_count(sp);
trace_kvm_mmu_get_page(sp, false);
return sp;
}
++vcpu->kvm->stat.mmu_cache_miss;
sp = kvm_mmu_alloc_page(vcpu, parent_pte, direct);
if (!sp)
return sp;
sp = kvm_mmu_alloc_page(vcpu, direct);
sp->gfn = gfn;
sp->role = role;
hlist_add_head(&sp->hash_link,
@ -2144,7 +2120,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
account_shadowed(vcpu->kvm, sp);
}
sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
init_shadow_page_table(sp);
clear_page(sp->spt);
trace_kvm_mmu_get_page(sp, true);
return sp;
}
@ -2198,7 +2174,8 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
return __shadow_walk_next(iterator, *iterator->sptep);
}
static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp, bool accessed)
static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep,
struct kvm_mmu_page *sp)
{
u64 spte;
@ -2206,12 +2183,14 @@ static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp, bool accessed)
VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);
spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK |
shadow_user_mask | shadow_x_mask;
if (accessed)
spte |= shadow_accessed_mask;
shadow_user_mask | shadow_x_mask | shadow_accessed_mask;
mmu_spte_set(sptep, spte);
mmu_page_add_parent_pte(vcpu, sp, sptep);
if (sp->unsync_children || sp->unsync)
mark_unsync(sptep);
}
static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
@ -2270,17 +2249,12 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,
mmu_page_zap_pte(kvm, sp, sp->spt + i);
}
static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
{
mmu_page_remove_parent_pte(sp, parent_pte);
}
static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
{
u64 *sptep;
struct rmap_iterator iter;
while ((sptep = rmap_get_first(sp->parent_ptes, &iter)))
while ((sptep = rmap_get_first(&sp->parent_ptes, &iter)))
drop_parent_pte(sp, sptep);
}
@ -2564,18 +2538,18 @@ done:
return ret;
}
static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
unsigned pte_access, int write_fault, int *emulate,
int level, gfn_t gfn, pfn_t pfn, bool speculative,
bool host_writable)
static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
int write_fault, int level, gfn_t gfn, pfn_t pfn,
bool speculative, bool host_writable)
{
int was_rmapped = 0;
int rmap_count;
bool emulate = false;
pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
*sptep, write_fault, gfn);
if (is_rmap_spte(*sptep)) {
if (is_shadow_present_pte(*sptep)) {
/*
* If we overwrite a PTE page pointer with a 2MB PMD, unlink
* the parent of the now unreachable PTE.
@ -2600,12 +2574,12 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative,
true, host_writable)) {
if (write_fault)
*emulate = 1;
emulate = true;
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
if (unlikely(is_mmio_spte(*sptep) && emulate))
*emulate = 1;
if (unlikely(is_mmio_spte(*sptep)))
emulate = true;
pgprintk("%s: setting spte %llx\n", __func__, *sptep);
pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n",
@ -2624,6 +2598,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
}
kvm_release_pfn_clean(pfn);
return emulate;
}
static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
@ -2658,9 +2634,8 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
return -1;
for (i = 0; i < ret; i++, gfn++, start++)
mmu_set_spte(vcpu, start, access, 0, NULL,
sp->role.level, gfn, page_to_pfn(pages[i]),
true, true);
mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn,
page_to_pfn(pages[i]), true, true);
return 0;
}
@ -2708,9 +2683,8 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
__direct_pte_prefetch(vcpu, sp, sptep);
}
static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
int map_writable, int level, gfn_t gfn, pfn_t pfn,
bool prefault)
static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
int level, gfn_t gfn, pfn_t pfn, bool prefault)
{
struct kvm_shadow_walk_iterator iterator;
struct kvm_mmu_page *sp;
@ -2722,9 +2696,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
if (iterator.level == level) {
mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
write, &emulate, level, gfn, pfn,
prefault, map_writable);
emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
write, level, gfn, pfn, prefault,
map_writable);
direct_pte_prefetch(vcpu, iterator.sptep);
++vcpu->stat.pf_fixed;
break;
@ -2737,10 +2711,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
base_addr &= PT64_LVL_ADDR_MASK(iterator.level);
pseudo_gfn = base_addr >> PAGE_SHIFT;
sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
iterator.level - 1,
1, ACC_ALL, iterator.sptep);
iterator.level - 1, 1, ACC_ALL);
link_shadow_page(iterator.sptep, sp, true);
link_shadow_page(vcpu, iterator.sptep, sp);
}
}
return emulate;
@ -2919,7 +2892,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
* If the mapping has been changed, let the vcpu fault on the
* same address again.
*/
if (!is_rmap_spte(spte)) {
if (!is_shadow_present_pte(spte)) {
ret = true;
goto exit;
}
@ -3018,11 +2991,9 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
make_mmu_pages_available(vcpu);
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn,
prefault);
r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
spin_unlock(&vcpu->kvm->mmu_lock);
return r;
out_unlock:
@ -3097,8 +3068,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
spin_lock(&vcpu->kvm->mmu_lock);
make_mmu_pages_available(vcpu);
sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL,
1, ACC_ALL, NULL);
sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, 1, ACC_ALL);
++sp->root_count;
spin_unlock(&vcpu->kvm->mmu_lock);
vcpu->arch.mmu.root_hpa = __pa(sp->spt);
@ -3110,9 +3080,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
spin_lock(&vcpu->kvm->mmu_lock);
make_mmu_pages_available(vcpu);
sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
i << 30,
PT32_ROOT_LEVEL, 1, ACC_ALL,
NULL);
i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
root = __pa(sp->spt);
++sp->root_count;
spin_unlock(&vcpu->kvm->mmu_lock);
@ -3149,7 +3117,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
spin_lock(&vcpu->kvm->mmu_lock);
make_mmu_pages_available(vcpu);
sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL,
0, ACC_ALL, NULL);
0, ACC_ALL);
root = __pa(sp->spt);
++sp->root_count;
spin_unlock(&vcpu->kvm->mmu_lock);
@ -3182,9 +3150,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
}
spin_lock(&vcpu->kvm->mmu_lock);
make_mmu_pages_available(vcpu);
sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
PT32_ROOT_LEVEL, 0,
ACC_ALL, NULL);
sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
0, ACC_ALL);
root = __pa(sp->spt);
++sp->root_count;
spin_unlock(&vcpu->kvm->mmu_lock);
@ -3531,8 +3498,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
make_mmu_pages_available(vcpu);
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
r = __direct_map(vcpu, gpa, write, map_writable,
level, gfn, pfn, prefault);
r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
spin_unlock(&vcpu->kvm->mmu_lock);
return r;
@ -4058,10 +4024,12 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
g_context->inject_page_fault = kvm_inject_page_fault;
/*
* Note that arch.mmu.gva_to_gpa translates l2_gva to l1_gpa. The
* translation of l2_gpa to l1_gpa addresses is done using the
* arch.nested_mmu.gva_to_gpa function. Basically the gva_to_gpa
* functions between mmu and nested_mmu are swapped.
* Note that arch.mmu.gva_to_gpa translates l2_gpa to l1_gpa using
* L1's nested page tables (e.g. EPT12). The nested translation
* of l2_gva to l1_gpa is done by arch.nested_mmu.gva_to_gpa using
* L2's page tables as the first level of translation and L1's
* nested page tables as the second level of translation. Basically
* the gva_to_gpa functions between mmu and nested_mmu are swapped.
*/
if (!is_paging(vcpu)) {
g_context->nx = false;
@ -4495,7 +4463,7 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu)
}
/* The return value indicates if tlb flush on all vcpus is needed. */
typedef bool (*slot_level_handler) (struct kvm *kvm, unsigned long *rmap);
typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head);
/* The caller should hold mmu-lock before calling this function. */
static bool
@ -4589,9 +4557,10 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
spin_unlock(&kvm->mmu_lock);
}
static bool slot_rmap_write_protect(struct kvm *kvm, unsigned long *rmapp)
static bool slot_rmap_write_protect(struct kvm *kvm,
struct kvm_rmap_head *rmap_head)
{
return __rmap_write_protect(kvm, rmapp, false);
return __rmap_write_protect(kvm, rmap_head, false);
}
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
@ -4627,7 +4596,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
}
static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
unsigned long *rmapp)
struct kvm_rmap_head *rmap_head)
{
u64 *sptep;
struct rmap_iterator iter;
@ -4636,7 +4605,7 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
struct kvm_mmu_page *sp;
restart:
for_each_rmap_spte(rmapp, &iter, sptep) {
for_each_rmap_spte(rmap_head, &iter, sptep) {
sp = page_header(__pa(sptep));
pfn = spte_to_pfn(*sptep);

View File

@ -129,7 +129,7 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
{
static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
unsigned long *rmapp;
struct kvm_rmap_head *rmap_head;
struct kvm_mmu_page *rev_sp;
struct kvm_memslots *slots;
struct kvm_memory_slot *slot;
@ -150,8 +150,8 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
return;
}
rmapp = __gfn_to_rmap(gfn, rev_sp->role.level, slot);
if (!*rmapp) {
rmap_head = __gfn_to_rmap(gfn, rev_sp->role.level, slot);
if (!rmap_head->val) {
if (!__ratelimit(&ratelimit_state))
return;
audit_printk(kvm, "no rmap for writable spte %llx\n",
@ -183,7 +183,7 @@ static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp)
return;
for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
if (!is_rmap_spte(sp->spt[i]))
if (!is_shadow_present_pte(sp->spt[i]))
continue;
inspect_spte_has_rmap(kvm, sp->spt + i);
@ -192,7 +192,7 @@ static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp)
static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
{
unsigned long *rmapp;
struct kvm_rmap_head *rmap_head;
u64 *sptep;
struct rmap_iterator iter;
struct kvm_memslots *slots;
@ -203,13 +203,14 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
slots = kvm_memslots_for_spte_role(kvm, sp->role);
slot = __gfn_to_memslot(slots, sp->gfn);
rmapp = __gfn_to_rmap(sp->gfn, PT_PAGE_TABLE_LEVEL, slot);
rmap_head = __gfn_to_rmap(sp->gfn, PT_PAGE_TABLE_LEVEL, slot);
for_each_rmap_spte(rmapp, &iter, sptep)
for_each_rmap_spte(rmap_head, &iter, sptep) {
if (is_writable_pte(*sptep))
audit_printk(kvm, "shadow page has writable "
"mappings: gfn %llx role %x\n",
sp->gfn, sp->role.word);
}
}
static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp)

View File

@ -475,8 +475,8 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
* we call mmu_set_spte() with host_writable = true because
* pte_prefetch_gfn_to_pfn always gets a writable pfn.
*/
mmu_set_spte(vcpu, spte, pte_access, 0, NULL, PT_PAGE_TABLE_LEVEL,
gfn, pfn, true, true);
mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn,
true, true);
return true;
}
@ -556,7 +556,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
struct kvm_mmu_page *sp = NULL;
struct kvm_shadow_walk_iterator it;
unsigned direct_access, access = gw->pt_access;
int top_level, emulate = 0;
int top_level, emulate;
direct_access = gw->pte_access;
@ -587,7 +587,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
if (!is_shadow_present_pte(*it.sptep)) {
table_gfn = gw->table_gfn[it.level - 2];
sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1,
false, access, it.sptep);
false, access);
}
/*
@ -598,7 +598,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
goto out_gpte_changed;
if (sp)
link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK);
link_shadow_page(vcpu, it.sptep, sp);
}
for (;
@ -617,20 +617,18 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1,
true, direct_access, it.sptep);
link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK);
true, direct_access);
link_shadow_page(vcpu, it.sptep, sp);
}
clear_sp_write_flooding_count(it.sptep);
mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, &emulate,
it.level, gw->gfn, pfn, prefault, map_writable);
emulate = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault,
it.level, gw->gfn, pfn, prefault, map_writable);
FNAME(pte_prefetch)(vcpu, gw, it.sptep);
return emulate;
out_gpte_changed:
if (sp)
kvm_mmu_put_page(sp, it.sptep);
kvm_release_pfn_clean(pfn);
return 0;
}

View File

@ -86,6 +86,7 @@ static const u32 host_save_user_msrs[] = {
MSR_FS_BASE,
#endif
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
MSR_TSC_AUX,
};
#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
@ -135,6 +136,7 @@ struct vcpu_svm {
uint64_t asid_generation;
uint64_t sysenter_esp;
uint64_t sysenter_eip;
uint64_t tsc_aux;
u64 next_rip;
@ -1238,6 +1240,9 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio);
}
}
/* This assumes that the kernel never uses MSR_TSC_AUX */
if (static_cpu_has(X86_FEATURE_RDTSCP))
wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
}
static void svm_vcpu_put(struct kvm_vcpu *vcpu)
@ -3024,6 +3029,11 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_SYSENTER_ESP:
msr_info->data = svm->sysenter_esp;
break;
case MSR_TSC_AUX:
if (!boot_cpu_has(X86_FEATURE_RDTSCP))
return 1;
msr_info->data = svm->tsc_aux;
break;
/*
* Nobody will change the following 5 values in the VMCB so we can
* safely return them on rdmsr. They will always be 0 until LBRV is
@ -3162,6 +3172,18 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
svm->sysenter_esp = data;
svm->vmcb->save.sysenter_esp = data;
break;
case MSR_TSC_AUX:
if (!boot_cpu_has(X86_FEATURE_RDTSCP))
return 1;
/*
* This is rare, so we update the MSR here instead of using
* direct_access_msrs. Doing that would require a rdmsr in
* svm_vcpu_put.
*/
svm->tsc_aux = data;
wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
break;
case MSR_IA32_DEBUGCTLMSR:
if (!boot_cpu_has(X86_FEATURE_LBRV)) {
vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
@ -3578,12 +3600,16 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
return;
}
static int svm_cpu_uses_apicv(struct kvm_vcpu *vcpu)
static bool svm_get_enable_apicv(void)
{
return 0;
return false;
}
static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu)
static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
{
}
static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
{
return;
}
@ -4054,7 +4080,7 @@ static int svm_get_lpage_level(void)
static bool svm_rdtscp_supported(void)
{
return false;
return boot_cpu_has(X86_FEATURE_RDTSCP);
}
static bool svm_invpcid_supported(void)
@ -4345,7 +4371,8 @@ static struct kvm_x86_ops svm_x86_ops = {
.enable_irq_window = enable_irq_window,
.update_cr8_intercept = update_cr8_intercept,
.set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
.cpu_uses_apicv = svm_cpu_uses_apicv,
.get_enable_apicv = svm_get_enable_apicv,
.refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
.load_eoi_exitmap = svm_load_eoi_exitmap,
.sync_pir_to_irr = svm_sync_pir_to_irr,

View File

@ -268,7 +268,7 @@ TRACE_EVENT(kvm_inj_virq,
#define kvm_trace_sym_exc \
EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \
EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), \
EXS(MF), EXS(MC)
EXS(MF), EXS(AC), EXS(MC)
/*
* Tracepoint for kvm interrupt injection:
@ -1025,6 +1025,269 @@ TRACE_EVENT(kvm_pi_irte_update,
__entry->pi_desc_addr)
);
/*
* Tracepoint for kvm_hv_notify_acked_sint.
*/
TRACE_EVENT(kvm_hv_notify_acked_sint,
TP_PROTO(int vcpu_id, u32 sint),
TP_ARGS(vcpu_id, sint),
TP_STRUCT__entry(
__field(int, vcpu_id)
__field(u32, sint)
),
TP_fast_assign(
__entry->vcpu_id = vcpu_id;
__entry->sint = sint;
),
TP_printk("vcpu_id %d sint %u", __entry->vcpu_id, __entry->sint)
);
/*
* Tracepoint for synic_set_irq.
*/
TRACE_EVENT(kvm_hv_synic_set_irq,
TP_PROTO(int vcpu_id, u32 sint, int vector, int ret),
TP_ARGS(vcpu_id, sint, vector, ret),
TP_STRUCT__entry(
__field(int, vcpu_id)
__field(u32, sint)
__field(int, vector)
__field(int, ret)
),
TP_fast_assign(
__entry->vcpu_id = vcpu_id;
__entry->sint = sint;
__entry->vector = vector;
__entry->ret = ret;
),
TP_printk("vcpu_id %d sint %u vector %d ret %d",
__entry->vcpu_id, __entry->sint, __entry->vector,
__entry->ret)
);
/*
* Tracepoint for kvm_hv_synic_send_eoi.
*/
TRACE_EVENT(kvm_hv_synic_send_eoi,
TP_PROTO(int vcpu_id, int vector),
TP_ARGS(vcpu_id, vector),
TP_STRUCT__entry(
__field(int, vcpu_id)
__field(u32, sint)
__field(int, vector)
__field(int, ret)
),
TP_fast_assign(
__entry->vcpu_id = vcpu_id;
__entry->vector = vector;
),
TP_printk("vcpu_id %d vector %d", __entry->vcpu_id, __entry->vector)
);
/*
* Tracepoint for synic_set_msr.
*/
TRACE_EVENT(kvm_hv_synic_set_msr,
TP_PROTO(int vcpu_id, u32 msr, u64 data, bool host),
TP_ARGS(vcpu_id, msr, data, host),
TP_STRUCT__entry(
__field(int, vcpu_id)
__field(u32, msr)
__field(u64, data)
__field(bool, host)
),
TP_fast_assign(
__entry->vcpu_id = vcpu_id;
__entry->msr = msr;
__entry->data = data;
__entry->host = host
),
TP_printk("vcpu_id %d msr 0x%x data 0x%llx host %d",
__entry->vcpu_id, __entry->msr, __entry->data, __entry->host)
);
/*
* Tracepoint for stimer_set_config.
*/
TRACE_EVENT(kvm_hv_stimer_set_config,
TP_PROTO(int vcpu_id, int timer_index, u64 config, bool host),
TP_ARGS(vcpu_id, timer_index, config, host),
TP_STRUCT__entry(
__field(int, vcpu_id)
__field(int, timer_index)
__field(u64, config)
__field(bool, host)
),
TP_fast_assign(
__entry->vcpu_id = vcpu_id;
__entry->timer_index = timer_index;
__entry->config = config;
__entry->host = host;
),
TP_printk("vcpu_id %d timer %d config 0x%llx host %d",
__entry->vcpu_id, __entry->timer_index, __entry->config,
__entry->host)
);
/*
* Tracepoint for stimer_set_count.
*/
TRACE_EVENT(kvm_hv_stimer_set_count,
TP_PROTO(int vcpu_id, int timer_index, u64 count, bool host),
TP_ARGS(vcpu_id, timer_index, count, host),
TP_STRUCT__entry(
__field(int, vcpu_id)
__field(int, timer_index)
__field(u64, count)
__field(bool, host)
),
TP_fast_assign(
__entry->vcpu_id = vcpu_id;
__entry->timer_index = timer_index;
__entry->count = count;
__entry->host = host;
),
TP_printk("vcpu_id %d timer %d count %llu host %d",
__entry->vcpu_id, __entry->timer_index, __entry->count,
__entry->host)
);
/*
* Tracepoint for stimer_start(periodic timer case).
*/
TRACE_EVENT(kvm_hv_stimer_start_periodic,
TP_PROTO(int vcpu_id, int timer_index, u64 time_now, u64 exp_time),
TP_ARGS(vcpu_id, timer_index, time_now, exp_time),
TP_STRUCT__entry(
__field(int, vcpu_id)
__field(int, timer_index)
__field(u64, time_now)
__field(u64, exp_time)
),
TP_fast_assign(
__entry->vcpu_id = vcpu_id;
__entry->timer_index = timer_index;
__entry->time_now = time_now;
__entry->exp_time = exp_time;
),
TP_printk("vcpu_id %d timer %d time_now %llu exp_time %llu",
__entry->vcpu_id, __entry->timer_index, __entry->time_now,
__entry->exp_time)
);
/*
* Tracepoint for stimer_start(one-shot timer case).
*/
TRACE_EVENT(kvm_hv_stimer_start_one_shot,
TP_PROTO(int vcpu_id, int timer_index, u64 time_now, u64 count),
TP_ARGS(vcpu_id, timer_index, time_now, count),
TP_STRUCT__entry(
__field(int, vcpu_id)
__field(int, timer_index)
__field(u64, time_now)
__field(u64, count)
),
TP_fast_assign(
__entry->vcpu_id = vcpu_id;
__entry->timer_index = timer_index;
__entry->time_now = time_now;
__entry->count = count;
),
TP_printk("vcpu_id %d timer %d time_now %llu count %llu",
__entry->vcpu_id, __entry->timer_index, __entry->time_now,
__entry->count)
);
/*
* Tracepoint for stimer_timer_callback.
*/
TRACE_EVENT(kvm_hv_stimer_callback,
TP_PROTO(int vcpu_id, int timer_index),
TP_ARGS(vcpu_id, timer_index),
TP_STRUCT__entry(
__field(int, vcpu_id)
__field(int, timer_index)
),
TP_fast_assign(
__entry->vcpu_id = vcpu_id;
__entry->timer_index = timer_index;
),
TP_printk("vcpu_id %d timer %d",
__entry->vcpu_id, __entry->timer_index)
);
/*
* Tracepoint for stimer_expiration.
*/
TRACE_EVENT(kvm_hv_stimer_expiration,
TP_PROTO(int vcpu_id, int timer_index, int msg_send_result),
TP_ARGS(vcpu_id, timer_index, msg_send_result),
TP_STRUCT__entry(
__field(int, vcpu_id)
__field(int, timer_index)
__field(int, msg_send_result)
),
TP_fast_assign(
__entry->vcpu_id = vcpu_id;
__entry->timer_index = timer_index;
__entry->msg_send_result = msg_send_result;
),
TP_printk("vcpu_id %d timer %d msg send result %d",
__entry->vcpu_id, __entry->timer_index,
__entry->msg_send_result)
);
/*
* Tracepoint for stimer_cleanup.
*/
TRACE_EVENT(kvm_hv_stimer_cleanup,
TP_PROTO(int vcpu_id, int timer_index),
TP_ARGS(vcpu_id, timer_index),
TP_STRUCT__entry(
__field(int, vcpu_id)
__field(int, timer_index)
),
TP_fast_assign(
__entry->vcpu_id = vcpu_id;
__entry->timer_index = timer_index;
),
TP_printk("vcpu_id %d timer %d",
__entry->vcpu_id, __entry->timer_index)
);
#endif /* _TRACE_KVM_H */
#undef TRACE_INCLUDE_PATH

View File

@ -19,6 +19,7 @@
#include "irq.h"
#include "mmu.h"
#include "cpuid.h"
#include "lapic.h"
#include <linux/kvm_host.h>
#include <linux/module.h>
@ -862,7 +863,6 @@ static void kvm_cpu_vmxon(u64 addr);
static void kvm_cpu_vmxoff(void);
static bool vmx_mpx_supported(void);
static bool vmx_xsaves_supported(void);
static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu);
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
static void vmx_set_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
@ -870,7 +870,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
static bool guest_state_valid(struct kvm_vcpu *vcpu);
static u32 vmx_segment_access_rights(struct kvm_segment *var);
static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
static int alloc_identity_pagetable(struct kvm *kvm);
@ -1448,7 +1447,51 @@ static inline void ept_sync_context(u64 eptp)
}
}
static __always_inline unsigned long vmcs_readl(unsigned long field)
static __always_inline void vmcs_check16(unsigned long field)
{
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000,
"16-bit accessor invalid for 64-bit field");
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
"16-bit accessor invalid for 64-bit high field");
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
"16-bit accessor invalid for 32-bit high field");
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
"16-bit accessor invalid for natural width field");
}
static __always_inline void vmcs_check32(unsigned long field)
{
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
"32-bit accessor invalid for 16-bit field");
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
"32-bit accessor invalid for natural width field");
}
static __always_inline void vmcs_check64(unsigned long field)
{
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
"64-bit accessor invalid for 16-bit field");
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
"64-bit accessor invalid for 64-bit high field");
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
"64-bit accessor invalid for 32-bit field");
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
"64-bit accessor invalid for natural width field");
}
static __always_inline void vmcs_checkl(unsigned long field)
{
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
"Natural width accessor invalid for 16-bit field");
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000,
"Natural width accessor invalid for 64-bit field");
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
"Natural width accessor invalid for 64-bit high field");
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
"Natural width accessor invalid for 32-bit field");
}
static __always_inline unsigned long __vmcs_readl(unsigned long field)
{
unsigned long value;
@ -1459,23 +1502,32 @@ static __always_inline unsigned long vmcs_readl(unsigned long field)
static __always_inline u16 vmcs_read16(unsigned long field)
{
return vmcs_readl(field);
vmcs_check16(field);
return __vmcs_readl(field);
}
static __always_inline u32 vmcs_read32(unsigned long field)
{
return vmcs_readl(field);
vmcs_check32(field);
return __vmcs_readl(field);
}
static __always_inline u64 vmcs_read64(unsigned long field)
{
vmcs_check64(field);
#ifdef CONFIG_X86_64
return vmcs_readl(field);
return __vmcs_readl(field);
#else
return vmcs_readl(field) | ((u64)vmcs_readl(field+1) << 32);
return __vmcs_readl(field) | ((u64)__vmcs_readl(field+1) << 32);
#endif
}
static __always_inline unsigned long vmcs_readl(unsigned long field)
{
vmcs_checkl(field);
return __vmcs_readl(field);
}
static noinline void vmwrite_error(unsigned long field, unsigned long value)
{
printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n",
@ -1483,7 +1535,7 @@ static noinline void vmwrite_error(unsigned long field, unsigned long value)
dump_stack();
}
static void vmcs_writel(unsigned long field, unsigned long value)
static __always_inline void __vmcs_writel(unsigned long field, unsigned long value)
{
u8 error;
@ -1493,33 +1545,46 @@ static void vmcs_writel(unsigned long field, unsigned long value)
vmwrite_error(field, value);
}
static void vmcs_write16(unsigned long field, u16 value)
static __always_inline void vmcs_write16(unsigned long field, u16 value)
{
vmcs_writel(field, value);
vmcs_check16(field);
__vmcs_writel(field, value);
}
static void vmcs_write32(unsigned long field, u32 value)
static __always_inline void vmcs_write32(unsigned long field, u32 value)
{
vmcs_writel(field, value);
vmcs_check32(field);
__vmcs_writel(field, value);
}
static void vmcs_write64(unsigned long field, u64 value)
static __always_inline void vmcs_write64(unsigned long field, u64 value)
{
vmcs_writel(field, value);
vmcs_check64(field);
__vmcs_writel(field, value);
#ifndef CONFIG_X86_64
asm volatile ("");
vmcs_writel(field+1, value >> 32);
__vmcs_writel(field+1, value >> 32);
#endif
}
static void vmcs_clear_bits(unsigned long field, u32 mask)
static __always_inline void vmcs_writel(unsigned long field, unsigned long value)
{
vmcs_writel(field, vmcs_readl(field) & ~mask);
vmcs_checkl(field);
__vmcs_writel(field, value);
}
static void vmcs_set_bits(unsigned long field, u32 mask)
static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask)
{
vmcs_writel(field, vmcs_readl(field) | mask);
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
"vmcs_clear_bits does not support 64-bit fields");
__vmcs_writel(field, __vmcs_readl(field) & ~mask);
}
static __always_inline void vmcs_set_bits(unsigned long field, u32 mask)
{
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
"vmcs_set_bits does not support 64-bit fields");
__vmcs_writel(field, __vmcs_readl(field) | mask);
}
static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
@ -2498,7 +2563,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
vmx->nested.nested_vmx_pinbased_ctls_high |=
PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
PIN_BASED_VMX_PREEMPTION_TIMER;
if (vmx_cpu_uses_apicv(&vmx->vcpu))
if (kvm_vcpu_apicv_active(&vmx->vcpu))
vmx->nested.nested_vmx_pinbased_ctls_high |=
PIN_BASED_POSTED_INTR;
@ -4462,9 +4527,9 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
msr, MSR_TYPE_W);
}
static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu)
static bool vmx_get_enable_apicv(void)
{
return enable_apicv && lapic_in_kernel(vcpu);
return enable_apicv;
}
static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
@ -4586,11 +4651,6 @@ static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
}
static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu)
{
return;
}
/*
* Set up the vmcs's constant host-state fields, i.e., host-state fields that
* will not change in the lifetime of the guest.
@ -4660,11 +4720,18 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
{
u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
if (!vmx_cpu_uses_apicv(&vmx->vcpu))
if (!kvm_vcpu_apicv_active(&vmx->vcpu))
pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
return pin_based_exec_ctrl;
}
static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
}
static u32 vmx_exec_control(struct vcpu_vmx *vmx)
{
u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
@ -4703,7 +4770,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
if (!ple_gap)
exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
if (!vmx_cpu_uses_apicv(&vmx->vcpu))
if (!kvm_vcpu_apicv_active(&vmx->vcpu))
exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
@ -4767,7 +4834,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
vmx_secondary_exec_control(vmx));
if (vmx_cpu_uses_apicv(&vmx->vcpu)) {
if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
vmcs_write64(EOI_EXIT_BITMAP0, 0);
vmcs_write64(EOI_EXIT_BITMAP1, 0);
vmcs_write64(EOI_EXIT_BITMAP2, 0);
@ -4775,7 +4842,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write16(GUEST_INTR_STATUS, 0);
vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
}
@ -4867,7 +4934,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
seg_setup(VCPU_SREG_CS);
vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
vmcs_write32(GUEST_CS_BASE, 0xffff0000);
vmcs_writel(GUEST_CS_BASE, 0xffff0000ul);
seg_setup(VCPU_SREG_DS);
seg_setup(VCPU_SREG_ES);
@ -4903,7 +4970,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);
setup_msrs(vmx);
@ -4919,7 +4986,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
if (vmx_cpu_uses_apicv(vcpu))
if (kvm_vcpu_apicv_active(vcpu))
memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
if (vmx->vpid != 0)
@ -6203,15 +6270,6 @@ static __init int hardware_setup(void)
kvm_tsc_scaling_ratio_frac_bits = 48;
}
if (enable_apicv)
kvm_x86_ops->update_cr8_intercept = NULL;
else {
kvm_x86_ops->hwapic_irr_update = NULL;
kvm_x86_ops->hwapic_isr_update = NULL;
kvm_x86_ops->deliver_posted_interrupt = NULL;
kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
}
vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
@ -7901,7 +7959,7 @@ static void dump_vmcs(void)
u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
u32 secondary_exec_control = 0;
unsigned long cr4 = vmcs_readl(GUEST_CR4);
u64 efer = vmcs_readl(GUEST_IA32_EFER);
u64 efer = vmcs_read64(GUEST_IA32_EFER);
int i, n;
if (cpu_has_secondary_exec_ctrls())
@ -7917,10 +7975,10 @@ static void dump_vmcs(void)
if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) &&
(cr4 & X86_CR4_PAE) && !(efer & EFER_LMA))
{
pr_err("PDPTR0 = 0x%016lx PDPTR1 = 0x%016lx\n",
vmcs_readl(GUEST_PDPTR0), vmcs_readl(GUEST_PDPTR1));
pr_err("PDPTR2 = 0x%016lx PDPTR3 = 0x%016lx\n",
vmcs_readl(GUEST_PDPTR2), vmcs_readl(GUEST_PDPTR3));
pr_err("PDPTR0 = 0x%016llx PDPTR1 = 0x%016llx\n",
vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1));
pr_err("PDPTR2 = 0x%016llx PDPTR3 = 0x%016llx\n",
vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3));
}
pr_err("RSP = 0x%016lx RIP = 0x%016lx\n",
vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP));
@ -7941,16 +7999,16 @@ static void dump_vmcs(void)
vmx_dump_sel("TR: ", GUEST_TR_SELECTOR);
if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) ||
(vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER)))
pr_err("EFER = 0x%016llx PAT = 0x%016lx\n",
efer, vmcs_readl(GUEST_IA32_PAT));
pr_err("DebugCtl = 0x%016lx DebugExceptions = 0x%016lx\n",
vmcs_readl(GUEST_IA32_DEBUGCTL),
pr_err("EFER = 0x%016llx PAT = 0x%016llx\n",
efer, vmcs_read64(GUEST_IA32_PAT));
pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n",
vmcs_read64(GUEST_IA32_DEBUGCTL),
vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
pr_err("PerfGlobCtl = 0x%016lx\n",
vmcs_readl(GUEST_IA32_PERF_GLOBAL_CTRL));
pr_err("PerfGlobCtl = 0x%016llx\n",
vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL));
if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
pr_err("BndCfgS = 0x%016lx\n", vmcs_readl(GUEST_BNDCFGS));
pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS));
pr_err("Interruptibility = %08x ActivityState = %08x\n",
vmcs_read32(GUEST_INTERRUPTIBILITY_INFO),
vmcs_read32(GUEST_ACTIVITY_STATE));
@ -7979,11 +8037,12 @@ static void dump_vmcs(void)
vmcs_read32(HOST_IA32_SYSENTER_CS),
vmcs_readl(HOST_IA32_SYSENTER_EIP));
if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER))
pr_err("EFER = 0x%016lx PAT = 0x%016lx\n",
vmcs_readl(HOST_IA32_EFER), vmcs_readl(HOST_IA32_PAT));
pr_err("EFER = 0x%016llx PAT = 0x%016llx\n",
vmcs_read64(HOST_IA32_EFER),
vmcs_read64(HOST_IA32_PAT));
if (vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
pr_err("PerfGlobCtl = 0x%016lx\n",
vmcs_readl(HOST_IA32_PERF_GLOBAL_CTRL));
pr_err("PerfGlobCtl = 0x%016llx\n",
vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL));
pr_err("*** Control State ***\n");
pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
@ -8006,16 +8065,16 @@ static void dump_vmcs(void)
pr_err("IDTVectoring: info=%08x errcode=%08x\n",
vmcs_read32(IDT_VECTORING_INFO_FIELD),
vmcs_read32(IDT_VECTORING_ERROR_CODE));
pr_err("TSC Offset = 0x%016lx\n", vmcs_readl(TSC_OFFSET));
pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET));
if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING)
pr_err("TSC Multiplier = 0x%016lx\n",
vmcs_readl(TSC_MULTIPLIER));
pr_err("TSC Multiplier = 0x%016llx\n",
vmcs_read64(TSC_MULTIPLIER));
if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW)
pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
pr_err("EPT pointer = 0x%016lx\n", vmcs_readl(EPT_POINTER));
pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER));
n = vmcs_read32(CR3_TARGET_COUNT);
for (i = 0; i + 1 < n; i += 4)
pr_err("CR3 target%u=%016lx target%u=%016lx\n",
@ -8154,7 +8213,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
* apicv
*/
if (!cpu_has_vmx_virtualize_x2apic_mode() ||
!vmx_cpu_uses_apicv(vcpu))
!kvm_vcpu_apicv_active(vcpu))
return;
if (!cpu_need_tpr_shadow(vcpu))
@ -8259,10 +8318,9 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
}
}
static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu)
static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
{
u64 *eoi_exit_bitmap = vcpu->arch.eoi_exit_bitmap;
if (!vmx_cpu_uses_apicv(vcpu))
if (!kvm_vcpu_apicv_active(vcpu))
return;
vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
@ -8932,7 +8990,8 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
best->ebx &= ~bit(X86_FEATURE_INVPCID);
}
vmcs_set_secondary_exec_control(secondary_exec_ctl);
if (cpu_has_secondary_exec_ctrls())
vmcs_set_secondary_exec_control(secondary_exec_ctl);
if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) {
if (guest_cpuid_has_pcommit(vcpu))
@ -9508,7 +9567,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
*/
vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
vmx->nested.pi_pending = false;
vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
vmcs_write64(POSTED_INTR_DESC_ADDR,
page_to_phys(vmx->nested.pi_desc_page) +
(unsigned long)(vmcs12->posted_intr_desc_addr &
@ -10169,7 +10228,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
* Additionally, restore L2's PDPTR to vmcs12.
*/
if (enable_ept) {
vmcs12->guest_cr3 = vmcs_read64(GUEST_CR3);
vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3);
vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
@ -10805,7 +10864,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
.update_cr8_intercept = update_cr8_intercept,
.set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
.set_apic_access_page_addr = vmx_set_apic_access_page_addr,
.cpu_uses_apicv = vmx_cpu_uses_apicv,
.get_enable_apicv = vmx_get_enable_apicv,
.refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
.load_eoi_exitmap = vmx_load_eoi_exitmap,
.hwapic_irr_update = vmx_hwapic_irr_update,
.hwapic_isr_update = vmx_hwapic_isr_update,

View File

@ -951,7 +951,7 @@ static u32 msrs_to_save[] = {
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
#endif
MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS
MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
};
static unsigned num_msrs_to_save;
@ -966,6 +966,8 @@ static u32 emulated_msrs[] = {
HV_X64_MSR_RESET,
HV_X64_MSR_VP_INDEX,
HV_X64_MSR_VP_RUNTIME,
HV_X64_MSR_SCONTROL,
HV_X64_MSR_STIMER0_CONFIG,
HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
MSR_KVM_PV_EOI_EN,
@ -1167,7 +1169,8 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
++version;
kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version)))
return;
/*
* The guest calculates current wall clock time by adding
@ -1683,6 +1686,11 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
#endif
}
void kvm_make_mclock_inprogress_request(struct kvm *kvm)
{
kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
}
static void kvm_gen_update_masterclock(struct kvm *kvm)
{
#ifdef CONFIG_X86_64
@ -2198,6 +2206,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
case HV_X64_MSR_CRASH_CTL:
case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
return kvm_hv_set_msr_common(vcpu, msr, data,
msr_info->host_initiated);
case MSR_IA32_BBL_CR_CTL3:
@ -2402,6 +2411,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
case HV_X64_MSR_CRASH_CTL:
case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
return kvm_hv_get_msr_common(vcpu,
msr_info->index, &msr_info->data);
break;
@ -2541,6 +2551,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_HYPERV:
case KVM_CAP_HYPERV_VAPIC:
case KVM_CAP_HYPERV_SPIN:
case KVM_CAP_HYPERV_SYNIC:
case KVM_CAP_PCI_SEGMENT:
case KVM_CAP_DEBUGREGS:
case KVM_CAP_X86_ROBUST_SINGLESTEP:
@ -2693,6 +2704,11 @@ static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
return kvm_arch_has_noncoherent_dma(vcpu->kvm);
}
static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
{
set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
/* Address WBINVD may be executed by guest */
@ -2748,7 +2764,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s)
{
kvm_x86_ops->sync_pir_to_irr(vcpu);
if (vcpu->arch.apicv_active)
kvm_x86_ops->sync_pir_to_irr(vcpu);
memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
return 0;
@ -3191,6 +3209,20 @@ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
return 0;
}
static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
struct kvm_enable_cap *cap)
{
if (cap->flags)
return -EINVAL;
switch (cap->cap) {
case KVM_CAP_HYPERV_SYNIC:
return kvm_hv_activate_synic(vcpu);
default:
return -EINVAL;
}
}
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@ -3455,6 +3487,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_set_guest_paused(vcpu);
goto out;
}
case KVM_ENABLE_CAP: {
struct kvm_enable_cap cap;
r = -EFAULT;
if (copy_from_user(&cap, argp, sizeof(cap)))
goto out;
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
break;
}
default:
r = -EINVAL;
}
@ -4006,16 +4047,17 @@ static void kvm_init_msr_list(void)
/*
* Even MSRs that are valid in the host may not be exposed
* to the guests in some cases. We could work around this
* in VMX with the generic MSR save/load machinery, but it
* is not really worthwhile since it will really only
* happen with nested virtualization.
* to the guests in some cases.
*/
switch (msrs_to_save[i]) {
case MSR_IA32_BNDCFGS:
if (!kvm_x86_ops->mpx_supported())
continue;
break;
case MSR_TSC_AUX:
if (!kvm_x86_ops->rdtscp_supported())
continue;
break;
default:
break;
}
@ -5872,6 +5914,12 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
}
void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
{
vcpu->arch.apicv_active = false;
kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
}
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
@ -5965,6 +6013,9 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
if (!vcpu->arch.apic)
return;
if (vcpu->arch.apicv_active)
return;
if (!vcpu->arch.apic->vapic_addr)
max_irr = kvm_lapic_find_highest_irr(vcpu);
else
@ -6301,20 +6352,30 @@ static void process_smi(struct kvm_vcpu *vcpu)
kvm_mmu_reset_context(vcpu);
}
void kvm_make_scan_ioapic_request(struct kvm *kvm)
{
kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
}
static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
{
u64 eoi_exit_bitmap[4];
if (!kvm_apic_hw_enabled(vcpu->arch.apic))
return;
memset(vcpu->arch.eoi_exit_bitmap, 0, 256 / 8);
bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256);
if (irqchip_split(vcpu->kvm))
kvm_scan_ioapic_routes(vcpu, vcpu->arch.eoi_exit_bitmap);
kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
else {
kvm_x86_ops->sync_pir_to_irr(vcpu);
kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap);
if (vcpu->arch.apicv_active)
kvm_x86_ops->sync_pir_to_irr(vcpu);
kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
}
kvm_x86_ops->load_eoi_exitmap(vcpu);
bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
vcpu_to_synic(vcpu)->vec_bitmap, 256);
kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
}
static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
@ -6422,7 +6483,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
if (test_bit(vcpu->arch.pending_ioapic_eoi,
(void *) vcpu->arch.eoi_exit_bitmap)) {
vcpu->arch.ioapic_handled_vectors)) {
vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
vcpu->run->eoi.vector =
vcpu->arch.pending_ioapic_eoi;
@ -6446,6 +6507,20 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
r = 0;
goto out;
}
if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_HYPERV;
vcpu->run->hyperv = vcpu->arch.hyperv.exit;
r = 0;
goto out;
}
/*
* KVM_REQ_HV_STIMER has to be processed after
* KVM_REQ_CLOCK_UPDATE, because Hyper-V SynIC timers
* depend on the guest clock being up-to-date
*/
if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
kvm_hv_process_stimers(vcpu);
}
/*
@ -6457,7 +6532,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
* Update architecture specific hints for APIC
* virtual interrupt delivery.
*/
if (kvm_x86_ops->hwapic_irr_update)
if (vcpu->arch.apicv_active)
kvm_x86_ops->hwapic_irr_update(vcpu,
kvm_lapic_find_highest_irr(vcpu));
}
@ -7528,6 +7603,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
BUG_ON(vcpu->kvm == NULL);
kvm = vcpu->kvm;
vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv();
vcpu->arch.pv.pv_unhalted = false;
vcpu->arch.emulate_ctxt.ops = &emulate_ops;
if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
@ -7585,6 +7661,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->arch.pending_external_vector = -1;
kvm_hv_vcpu_init(vcpu);
return 0;
fail_free_mce_banks:
@ -7603,6 +7681,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
int idx;
kvm_hv_vcpu_uninit(vcpu);
kvm_pmu_destroy(vcpu);
kfree(vcpu->arch.mce_banks);
kvm_free_lapic(vcpu);
@ -7997,6 +8076,9 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
kvm_cpu_has_interrupt(vcpu))
return true;
if (kvm_hv_has_stimer_pending(vcpu))
return true;
return false;
}

View File

@ -63,10 +63,6 @@ enum hv_cpuid_function {
/* Define version of the synthetic interrupt controller. */
#define HV_SYNIC_VERSION (1)
/* Define synthetic interrupt controller message constants. */
#define HV_MESSAGE_SIZE (256)
#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240)
#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30)
#define HV_ANY_VP (0xFFFFFFFF)
/* Define synthetic interrupt controller flag constants. */
@ -74,48 +70,9 @@ enum hv_cpuid_function {
#define HV_EVENT_FLAGS_BYTE_COUNT (256)
#define HV_EVENT_FLAGS_DWORD_COUNT (256 / sizeof(u32))
/* Define hypervisor message types. */
enum hv_message_type {
HVMSG_NONE = 0x00000000,
/* Memory access messages. */
HVMSG_UNMAPPED_GPA = 0x80000000,
HVMSG_GPA_INTERCEPT = 0x80000001,
/* Timer notification messages. */
HVMSG_TIMER_EXPIRED = 0x80000010,
/* Error messages. */
HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020,
HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021,
HVMSG_UNSUPPORTED_FEATURE = 0x80000022,
/* Trace buffer complete messages. */
HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040,
/* Platform-specific processor intercept messages. */
HVMSG_X64_IOPORT_INTERCEPT = 0x80010000,
HVMSG_X64_MSR_INTERCEPT = 0x80010001,
HVMSG_X64_CPUID_INTERCEPT = 0x80010002,
HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003,
HVMSG_X64_APIC_EOI = 0x80010004,
HVMSG_X64_LEGACY_FP_ERROR = 0x80010005
};
#define HV_SYNIC_STIMER_COUNT (4)
/* Define invalid partition identifier. */
#define HV_PARTITION_ID_INVALID ((u64)0x0)
/* Define port identifier type. */
union hv_port_id {
u32 asu32;
struct {
u32 id:24;
u32 reserved:8;
} u ;
};
/* Define port type. */
enum hv_port_type {
HVPORT_MSG = 1,
@ -163,27 +120,6 @@ struct hv_connection_info {
};
};
/* Define synthetic interrupt controller message flags. */
union hv_message_flags {
u8 asu8;
struct {
u8 msg_pending:1;
u8 reserved:7;
};
};
/* Define synthetic interrupt controller message header. */
struct hv_message_header {
enum hv_message_type message_type;
u8 payload_size;
union hv_message_flags message_flags;
u8 reserved[2];
union {
u64 sender;
union hv_port_id port;
};
};
/*
* Timer configuration register.
*/
@ -200,31 +136,9 @@ union hv_timer_config {
};
};
/* Define timer message payload structure. */
struct hv_timer_message_payload {
u32 timer_index;
u32 reserved;
u64 expiration_time; /* When the timer expired */
u64 delivery_time; /* When the message was delivered */
};
/* Define synthetic interrupt controller message format. */
struct hv_message {
struct hv_message_header header;
union {
u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
} u ;
};
/* Define the number of message buffers associated with each port. */
#define HV_PORT_MESSAGE_BUFFER_COUNT (16)
/* Define the synthetic interrupt message page layout. */
struct hv_message_page {
struct hv_message sint_message[HV_SYNIC_SINT_COUNT];
};
/* Define the synthetic interrupt controller event flags format. */
union hv_synic_event_flags {
u8 flags8[HV_EVENT_FLAGS_BYTE_COUNT];
@ -347,7 +261,7 @@ enum hv_call_code {
struct hv_input_post_message {
union hv_connection_id connectionid;
u32 reserved;
enum hv_message_type message_type;
u32 message_type;
u32 payload_size;
u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
};

View File

@ -40,10 +40,14 @@ struct read_info_sccb {
u8 fac85; /* 85 */
u8 _pad_86[91 - 86]; /* 86-90 */
u8 flags; /* 91 */
u8 _pad_92[100 - 92]; /* 92-99 */
u8 _pad_92[99 - 92]; /* 92-98 */
u8 hamaxpow; /* 99 */
u32 rnsize2; /* 100-103 */
u64 rnmax2; /* 104-111 */
u8 _pad_112[120 - 112]; /* 112-119 */
u8 _pad_112[116 - 112]; /* 112-115 */
u8 fac116; /* 116 */
u8 _pad_117[119 - 117]; /* 117-118 */
u8 fac119; /* 119 */
u16 hcpua; /* 120-121 */
u8 _pad_122[4096 - 122]; /* 122-4095 */
} __packed __aligned(PAGE_SIZE);
@ -108,6 +112,8 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
sclp.facilities = sccb->facilities;
sclp.has_sprp = !!(sccb->fac84 & 0x02);
sclp.has_core_type = !!(sccb->fac84 & 0x01);
sclp.has_esca = !!(sccb->fac116 & 0x08);
sclp.has_hvs = !!(sccb->fac119 & 0x80);
if (sccb->fac85 & 0x02)
S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP;
sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2;
@ -115,6 +121,11 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
sclp.rzm <<= 20;
sclp.ibc = sccb->ibc;
if (sccb->hamaxpow && sccb->hamaxpow < 64)
sclp.hamax = (1UL << sccb->hamaxpow) - 1;
else
sclp.hamax = U64_MAX;
if (!sccb->hcpua) {
if (MACHINE_IS_VM)
sclp.max_cores = 64;
@ -131,6 +142,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
continue;
sclp.has_siif = cpue->siif;
sclp.has_sigpif = cpue->sigpif;
sclp.has_sief2 = cpue->sief2;
break;
}

View File

@ -23,6 +23,12 @@
#define ARCH_TIMER_CTRL_IT_MASK (1 << 1)
#define ARCH_TIMER_CTRL_IT_STAT (1 << 2)
#define CNTHCTL_EL1PCTEN (1 << 0)
#define CNTHCTL_EL1PCEN (1 << 1)
#define CNTHCTL_EVNTEN (1 << 2)
#define CNTHCTL_EVNTDIR (1 << 3)
#define CNTHCTL_EVNTI (0xF << 4)
enum arch_timer_reg {
ARCH_TIMER_REG_CTRL,
ARCH_TIMER_REG_TVAL,

View File

@ -279,6 +279,12 @@ struct vgic_v2_cpu_if {
u32 vgic_lr[VGIC_V2_MAX_LRS];
};
/*
* LRs are stored in reverse order in memory. make sure we index them
* correctly.
*/
#define VGIC_V3_LR_INDEX(lr) (VGIC_V3_MAX_LRS - 1 - lr)
struct vgic_v3_cpu_if {
#ifdef CONFIG_KVM_ARM_VGIC_V3
u32 vgic_hcr;

View File

@ -111,38 +111,13 @@ static inline bool is_error_page(struct page *page)
}
/*
* vcpu->requests bit members
* Architecture-independent vcpu->requests bit members
* Bits 4-7 are reserved for more arch-independent bits.
*/
#define KVM_REQ_TLB_FLUSH 0
#define KVM_REQ_MIGRATE_TIMER 1
#define KVM_REQ_REPORT_TPR_ACCESS 2
#define KVM_REQ_MMU_RELOAD 3
#define KVM_REQ_TRIPLE_FAULT 4
#define KVM_REQ_PENDING_TIMER 5
#define KVM_REQ_UNHALT 6
#define KVM_REQ_MMU_SYNC 7
#define KVM_REQ_CLOCK_UPDATE 8
#define KVM_REQ_KICK 9
#define KVM_REQ_DEACTIVATE_FPU 10
#define KVM_REQ_EVENT 11
#define KVM_REQ_APF_HALT 12
#define KVM_REQ_STEAL_UPDATE 13
#define KVM_REQ_NMI 14
#define KVM_REQ_PMU 15
#define KVM_REQ_PMI 16
#define KVM_REQ_WATCHDOG 17
#define KVM_REQ_MASTERCLOCK_UPDATE 18
#define KVM_REQ_MCLOCK_INPROGRESS 19
#define KVM_REQ_EPR_EXIT 20
#define KVM_REQ_SCAN_IOAPIC 21
#define KVM_REQ_GLOBAL_CLOCK_UPDATE 22
#define KVM_REQ_ENABLE_IBS 23
#define KVM_REQ_DISABLE_IBS 24
#define KVM_REQ_APIC_PAGE_RELOAD 25
#define KVM_REQ_SMI 26
#define KVM_REQ_HV_CRASH 27
#define KVM_REQ_IOAPIC_EOI_EXIT 28
#define KVM_REQ_HV_RESET 29
#define KVM_REQ_MMU_RELOAD 1
#define KVM_REQ_PENDING_TIMER 2
#define KVM_REQ_UNHALT 3
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
@ -318,6 +293,11 @@ struct kvm_s390_adapter_int {
u32 adapter_id;
};
struct kvm_hv_sint {
u32 vcpu;
u32 sint;
};
struct kvm_kernel_irq_routing_entry {
u32 gsi;
u32 type;
@ -331,6 +311,7 @@ struct kvm_kernel_irq_routing_entry {
} irqchip;
struct msi_msg msi;
struct kvm_s390_adapter_int adapter;
struct kvm_hv_sint hv_sint;
};
struct hlist_node link;
};
@ -439,10 +420,13 @@ struct kvm {
/* The guest did something we don't support. */
#define vcpu_unimpl(vcpu, fmt, ...) \
kvm_pr_unimpl("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
kvm_pr_unimpl("vcpu%i, guest rIP: 0x%lx " fmt, \
(vcpu)->vcpu_id, kvm_rip_read(vcpu), ## __VA_ARGS__)
#define vcpu_debug(vcpu, fmt, ...) \
kvm_debug("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
#define vcpu_err(vcpu, fmt, ...) \
kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
{
@ -465,6 +449,11 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
struct kvm_vcpu *vcpu;
int i;
if (id < 0 || id >= KVM_MAX_VCPUS)
return NULL;
vcpu = kvm_get_vcpu(kvm, id);
if (vcpu && vcpu->vcpu_id == id)
return vcpu;
kvm_for_each_vcpu(i, vcpu, kvm)
if (vcpu->vcpu_id == id)
return vcpu;
@ -484,12 +473,12 @@ void vcpu_put(struct kvm_vcpu *vcpu);
#ifdef __KVM_HAVE_IOAPIC
void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
void kvm_arch_irq_routing_update(struct kvm *kvm);
void kvm_arch_post_irq_routing_update(struct kvm *kvm);
#else
static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
{
}
static inline void kvm_arch_irq_routing_update(struct kvm *kvm)
static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm)
{
}
#endif
@ -634,7 +623,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
@ -668,8 +657,6 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
void kvm_flush_remote_tlbs(struct kvm *kvm);
void kvm_reload_remote_mmus(struct kvm *kvm);
void kvm_make_mclock_inprogress_request(struct kvm *kvm);
void kvm_make_scan_ioapic_request(struct kvm *kvm);
bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
long kvm_arch_dev_ioctl(struct file *filp,
@ -990,11 +977,6 @@ static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa)
return kvm_is_error_hva(hva);
}
static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
{
set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
}
enum kvm_stat_kind {
KVM_STAT_VM,
KVM_STAT_VCPU,
@ -1004,7 +986,6 @@ struct kvm_stats_debugfs_item {
const char *name;
int offset;
enum kvm_stat_kind kind;
struct dentry *dentry;
};
extern struct kvm_stats_debugfs_item debugfs_entries[];
extern struct dentry *kvm_debugfs_dir;
@ -1091,6 +1072,7 @@ static inline void kvm_irq_routing_update(struct kvm *kvm)
{
}
#endif
void kvm_arch_irq_routing_update(struct kvm *kvm);
static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
{

View File

@ -4,10 +4,8 @@
#include <uapi/linux/kvm_para.h>
static inline int kvm_para_has_feature(unsigned int feature)
static inline bool kvm_para_has_feature(unsigned int feature)
{
if (kvm_arch_para_features() & (1UL << feature))
return 1;
return 0;
return !!(kvm_arch_para_features() & (1UL << feature));
}
#endif /* __LINUX_KVM_PARA_H */

View File

@ -154,6 +154,20 @@ struct kvm_s390_skeys {
__u32 flags;
__u32 reserved[9];
};
struct kvm_hyperv_exit {
#define KVM_EXIT_HYPERV_SYNIC 1
__u32 type;
union {
struct {
__u32 msr;
__u64 control;
__u64 evt_page;
__u64 msg_page;
} synic;
} u;
};
#define KVM_S390_GET_SKEYS_NONE 1
#define KVM_S390_SKEYS_MAX 1048576
@ -184,6 +198,7 @@ struct kvm_s390_skeys {
#define KVM_EXIT_SYSTEM_EVENT 24
#define KVM_EXIT_S390_STSI 25
#define KVM_EXIT_IOAPIC_EOI 26
#define KVM_EXIT_HYPERV 27
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@ -338,6 +353,8 @@ struct kvm_run {
struct {
__u8 vector;
} eoi;
/* KVM_EXIT_HYPERV */
struct kvm_hyperv_exit hyperv;
/* Fix the size of the union. */
char padding[256];
};
@ -831,6 +848,8 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_GUEST_DEBUG_HW_WPS 120
#define KVM_CAP_SPLIT_IRQCHIP 121
#define KVM_CAP_IOEVENTFD_ANY_LENGTH 122
#define KVM_CAP_HYPERV_SYNIC 123
#define KVM_CAP_S390_RI 124
#ifdef KVM_CAP_IRQ_ROUTING
@ -854,10 +873,16 @@ struct kvm_irq_routing_s390_adapter {
__u32 adapter_id;
};
struct kvm_irq_routing_hv_sint {
__u32 vcpu;
__u32 sint;
};
/* gsi routing entry types */
#define KVM_IRQ_ROUTING_IRQCHIP 1
#define KVM_IRQ_ROUTING_MSI 2
#define KVM_IRQ_ROUTING_S390_ADAPTER 3
#define KVM_IRQ_ROUTING_HV_SINT 4
struct kvm_irq_routing_entry {
__u32 gsi;
@ -868,6 +893,7 @@ struct kvm_irq_routing_entry {
struct kvm_irq_routing_irqchip irqchip;
struct kvm_irq_routing_msi msi;
struct kvm_irq_routing_s390_adapter adapter;
struct kvm_irq_routing_hv_sint hv_sint;
__u32 pad[8];
} u;
};

View File

@ -28,6 +28,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
/* These are for GICv2 emulation only */
@ -36,18 +37,12 @@
#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT)
#define ICH_LR_VIRTUALID_MASK (BIT_ULL(32) - 1)
/*
* LRs are stored in reverse order in memory. make sure we index them
* correctly.
*/
#define LR_INDEX(lr) (VGIC_V3_MAX_LRS - 1 - lr)
static u32 ich_vtr_el2;
static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
{
struct vgic_lr lr_desc;
u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)];
u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[VGIC_V3_LR_INDEX(lr)];
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
lr_desc.irq = val & ICH_LR_VIRTUALID_MASK;
@ -111,7 +106,7 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
lr_val |= ((u64)lr_desc.hwirq) << ICH_LR_PHYS_ID_SHIFT;
}
vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[VGIC_V3_LR_INDEX(lr)] = lr_val;
if (!(lr_desc.state & LR_STATE_MASK))
vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);

View File

@ -878,7 +878,7 @@ static int vgic_handle_mmio_write(struct kvm_vcpu *vcpu,
true);
}
struct kvm_io_device_ops vgic_io_ops = {
static struct kvm_io_device_ops vgic_io_ops = {
.read = vgic_handle_mmio_read,
.write = vgic_handle_mmio_write,
};

View File

@ -57,8 +57,7 @@ int kvm_async_pf_init(void)
void kvm_async_pf_deinit(void)
{
if (async_pf_cache)
kmem_cache_destroy(async_pf_cache);
kmem_cache_destroy(async_pf_cache);
async_pf_cache = NULL;
}

View File

@ -166,6 +166,10 @@ out:
return r;
}
void __attribute__((weak)) kvm_arch_irq_routing_update(struct kvm *kvm)
{
}
int kvm_set_irq_routing(struct kvm *kvm,
const struct kvm_irq_routing_entry *ue,
unsigned nr,
@ -219,9 +223,10 @@ int kvm_set_irq_routing(struct kvm *kvm,
old = kvm->irq_routing;
rcu_assign_pointer(kvm->irq_routing, new);
kvm_irq_routing_update(kvm);
kvm_arch_irq_routing_update(kvm);
mutex_unlock(&kvm->irq_lock);
kvm_arch_irq_routing_update(kvm);
kvm_arch_post_irq_routing_update(kvm);
synchronize_srcu_expedited(&kvm->irq_srcu);

View File

@ -206,16 +206,6 @@ void kvm_reload_remote_mmus(struct kvm *kvm)
kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
}
void kvm_make_mclock_inprogress_request(struct kvm *kvm)
{
kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
}
void kvm_make_scan_ioapic_request(struct kvm *kvm)
{
kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
}
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
{
struct page *page;
@ -1164,15 +1154,15 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn
return __gfn_to_memslot(kvm_vcpu_memslots(vcpu), gfn);
}
int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
{
struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn);
if (!memslot || memslot->id >= KVM_USER_MEM_SLOTS ||
memslot->flags & KVM_MEMSLOT_INVALID)
return 0;
return false;
return 1;
return true;
}
EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
@ -2257,7 +2247,7 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu)
static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
{
int r;
struct kvm_vcpu *vcpu, *v;
struct kvm_vcpu *vcpu;
if (id >= KVM_MAX_VCPUS)
return -EINVAL;
@ -2281,12 +2271,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
r = -EINVAL;
goto unlock_vcpu_destroy;
}
kvm_for_each_vcpu(r, v, kvm)
if (v->vcpu_id == id) {
r = -EEXIST;
goto unlock_vcpu_destroy;
}
if (kvm_get_vcpu_by_id(kvm, id)) {
r = -EEXIST;
goto unlock_vcpu_destroy;
}
BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]);
@ -3449,10 +3437,9 @@ static int kvm_init_debug(void)
goto out;
for (p = debugfs_entries; p->name; ++p) {
p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir,
(void *)(long)p->offset,
stat_fops[p->kind]);
if (p->dentry == NULL)
if (!debugfs_create_file(p->name, 0444, kvm_debugfs_dir,
(void *)(long)p->offset,
stat_fops[p->kind]))
goto out_dir;
}
@ -3464,15 +3451,6 @@ out:
return r;
}
static void kvm_exit_debug(void)
{
struct kvm_stats_debugfs_item *p;
for (p = debugfs_entries; p->name; ++p)
debugfs_remove(p->dentry);
debugfs_remove(kvm_debugfs_dir);
}
static int kvm_suspend(void)
{
if (kvm_usage_count)
@ -3630,7 +3608,7 @@ EXPORT_SYMBOL_GPL(kvm_init);
void kvm_exit(void)
{
kvm_exit_debug();
debugfs_remove_recursive(kvm_debugfs_dir);
misc_deregister(&kvm_dev);
kmem_cache_destroy(kvm_vcpu_cache);
kvm_async_pf_deinit();