mirror of
https://github.com/torvalds/linux.git
synced 2024-11-15 00:21:59 +00:00
KVM: arm64: nv: Implement nested Stage-2 page table walk logic
Based on the pseudo-code in the ARM ARM, implement a stage 2 software page table walker. Co-developed-by: Jintack Lim <jintack.lim@linaro.org> Signed-off-by: Jintack Lim <jintack.lim@linaro.org> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20240614144552.2773592-3-maz@kernel.org Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
This commit is contained in:
parent
4f128f8e1a
commit
61e30b9eef
@ -152,6 +152,7 @@
|
||||
#define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0))
|
||||
|
||||
/* ISS field definitions for exceptions taken in to Hyp */
|
||||
#define ESR_ELx_FSC_ADDRSZ (0x00)
|
||||
#define ESR_ELx_CV (UL(1) << 24)
|
||||
#define ESR_ELx_COND_SHIFT (20)
|
||||
#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT)
|
||||
|
@ -68,6 +68,19 @@ extern struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu);
|
||||
extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu);
|
||||
extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu);
|
||||
|
||||
struct kvm_s2_trans {
|
||||
phys_addr_t output;
|
||||
unsigned long block_size;
|
||||
bool writable;
|
||||
bool readable;
|
||||
int level;
|
||||
u32 esr;
|
||||
u64 upper_attr;
|
||||
};
|
||||
|
||||
extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
|
||||
struct kvm_s2_trans *result);
|
||||
|
||||
int kvm_init_nv_sysregs(struct kvm *kvm);
|
||||
|
||||
#ifdef CONFIG_ARM64_PTR_AUTH
|
||||
|
@ -91,6 +91,270 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct s2_walk_info {
|
||||
int (*read_desc)(phys_addr_t pa, u64 *desc, void *data);
|
||||
void *data;
|
||||
u64 baddr;
|
||||
unsigned int max_oa_bits;
|
||||
unsigned int pgshift;
|
||||
unsigned int sl;
|
||||
unsigned int t0sz;
|
||||
bool be;
|
||||
};
|
||||
|
||||
static unsigned int ps_to_output_size(unsigned int ps)
|
||||
{
|
||||
switch (ps) {
|
||||
case 0: return 32;
|
||||
case 1: return 36;
|
||||
case 2: return 40;
|
||||
case 3: return 42;
|
||||
case 4: return 44;
|
||||
case 5:
|
||||
default:
|
||||
return 48;
|
||||
}
|
||||
}
|
||||
|
||||
static u32 compute_fsc(int level, u32 fsc)
|
||||
{
|
||||
return fsc | (level & 0x3);
|
||||
}
|
||||
|
||||
static int get_ia_size(struct s2_walk_info *wi)
|
||||
{
|
||||
return 64 - wi->t0sz;
|
||||
}
|
||||
|
||||
static int check_base_s2_limits(struct s2_walk_info *wi,
|
||||
int level, int input_size, int stride)
|
||||
{
|
||||
int start_size, ia_size;
|
||||
|
||||
ia_size = get_ia_size(wi);
|
||||
|
||||
/* Check translation limits */
|
||||
switch (BIT(wi->pgshift)) {
|
||||
case SZ_64K:
|
||||
if (level == 0 || (level == 1 && ia_size <= 42))
|
||||
return -EFAULT;
|
||||
break;
|
||||
case SZ_16K:
|
||||
if (level == 0 || (level == 1 && ia_size <= 40))
|
||||
return -EFAULT;
|
||||
break;
|
||||
case SZ_4K:
|
||||
if (level < 0 || (level == 0 && ia_size <= 42))
|
||||
return -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Check input size limits */
|
||||
if (input_size > ia_size)
|
||||
return -EFAULT;
|
||||
|
||||
/* Check number of entries in starting level table */
|
||||
start_size = input_size - ((3 - level) * stride + wi->pgshift);
|
||||
if (start_size < 1 || start_size > stride + 4)
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check if output is within boundaries */
|
||||
static int check_output_size(struct s2_walk_info *wi, phys_addr_t output)
|
||||
{
|
||||
unsigned int output_size = wi->max_oa_bits;
|
||||
|
||||
if (output_size != 48 && (output & GENMASK_ULL(47, output_size)))
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is essentially a C-version of the pseudo code from the ARM ARM
|
||||
* AArch64.TranslationTableWalk function. I strongly recommend looking at
|
||||
* that pseudocode in trying to understand this.
|
||||
*
|
||||
* Must be called with the kvm->srcu read lock held
|
||||
*/
|
||||
static int walk_nested_s2_pgd(phys_addr_t ipa,
|
||||
struct s2_walk_info *wi, struct kvm_s2_trans *out)
|
||||
{
|
||||
int first_block_level, level, stride, input_size, base_lower_bound;
|
||||
phys_addr_t base_addr;
|
||||
unsigned int addr_top, addr_bottom;
|
||||
u64 desc; /* page table entry */
|
||||
int ret;
|
||||
phys_addr_t paddr;
|
||||
|
||||
switch (BIT(wi->pgshift)) {
|
||||
default:
|
||||
case SZ_64K:
|
||||
case SZ_16K:
|
||||
level = 3 - wi->sl;
|
||||
first_block_level = 2;
|
||||
break;
|
||||
case SZ_4K:
|
||||
level = 2 - wi->sl;
|
||||
first_block_level = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
stride = wi->pgshift - 3;
|
||||
input_size = get_ia_size(wi);
|
||||
if (input_size > 48 || input_size < 25)
|
||||
return -EFAULT;
|
||||
|
||||
ret = check_base_s2_limits(wi, level, input_size, stride);
|
||||
if (WARN_ON(ret))
|
||||
return ret;
|
||||
|
||||
base_lower_bound = 3 + input_size - ((3 - level) * stride +
|
||||
wi->pgshift);
|
||||
base_addr = wi->baddr & GENMASK_ULL(47, base_lower_bound);
|
||||
|
||||
if (check_output_size(wi, base_addr)) {
|
||||
out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ);
|
||||
return 1;
|
||||
}
|
||||
|
||||
addr_top = input_size - 1;
|
||||
|
||||
while (1) {
|
||||
phys_addr_t index;
|
||||
|
||||
addr_bottom = (3 - level) * stride + wi->pgshift;
|
||||
index = (ipa & GENMASK_ULL(addr_top, addr_bottom))
|
||||
>> (addr_bottom - 3);
|
||||
|
||||
paddr = base_addr | index;
|
||||
ret = wi->read_desc(paddr, &desc, wi->data);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Handle reversedescriptors if endianness differs between the
|
||||
* host and the guest hypervisor.
|
||||
*/
|
||||
if (wi->be)
|
||||
desc = be64_to_cpu((__force __be64)desc);
|
||||
else
|
||||
desc = le64_to_cpu((__force __le64)desc);
|
||||
|
||||
/* Check for valid descriptor at this point */
|
||||
if (!(desc & 1) || ((desc & 3) == 1 && level == 3)) {
|
||||
out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
|
||||
out->upper_attr = desc;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* We're at the final level or block translation level */
|
||||
if ((desc & 3) == 1 || level == 3)
|
||||
break;
|
||||
|
||||
if (check_output_size(wi, desc)) {
|
||||
out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ);
|
||||
out->upper_attr = desc;
|
||||
return 1;
|
||||
}
|
||||
|
||||
base_addr = desc & GENMASK_ULL(47, wi->pgshift);
|
||||
|
||||
level += 1;
|
||||
addr_top = addr_bottom - 1;
|
||||
}
|
||||
|
||||
if (level < first_block_level) {
|
||||
out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
|
||||
out->upper_attr = desc;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't use the contiguous bit in the stage-2 ptes, so skip check
|
||||
* for misprogramming of the contiguous bit.
|
||||
*/
|
||||
|
||||
if (check_output_size(wi, desc)) {
|
||||
out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ);
|
||||
out->upper_attr = desc;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!(desc & BIT(10))) {
|
||||
out->esr = compute_fsc(level, ESR_ELx_FSC_ACCESS);
|
||||
out->upper_attr = desc;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Calculate and return the result */
|
||||
paddr = (desc & GENMASK_ULL(47, addr_bottom)) |
|
||||
(ipa & GENMASK_ULL(addr_bottom - 1, 0));
|
||||
out->output = paddr;
|
||||
out->block_size = 1UL << ((3 - level) * stride + wi->pgshift);
|
||||
out->readable = desc & (0b01 << 6);
|
||||
out->writable = desc & (0b10 << 6);
|
||||
out->level = level;
|
||||
out->upper_attr = desc & GENMASK_ULL(63, 52);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int read_guest_s2_desc(phys_addr_t pa, u64 *desc, void *data)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = data;
|
||||
|
||||
return kvm_read_guest(vcpu->kvm, pa, desc, sizeof(*desc));
|
||||
}
|
||||
|
||||
static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi)
|
||||
{
|
||||
wi->t0sz = vtcr & TCR_EL2_T0SZ_MASK;
|
||||
|
||||
switch (vtcr & VTCR_EL2_TG0_MASK) {
|
||||
case VTCR_EL2_TG0_4K:
|
||||
wi->pgshift = 12; break;
|
||||
case VTCR_EL2_TG0_16K:
|
||||
wi->pgshift = 14; break;
|
||||
case VTCR_EL2_TG0_64K:
|
||||
default: /* IMPDEF: treat any other value as 64k */
|
||||
wi->pgshift = 16; break;
|
||||
}
|
||||
|
||||
wi->sl = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
|
||||
/* Global limit for now, should eventually be per-VM */
|
||||
wi->max_oa_bits = min(get_kvm_ipa_limit(),
|
||||
ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr)));
|
||||
}
|
||||
|
||||
int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
|
||||
struct kvm_s2_trans *result)
|
||||
{
|
||||
u64 vtcr = vcpu_read_sys_reg(vcpu, VTCR_EL2);
|
||||
struct s2_walk_info wi;
|
||||
int ret;
|
||||
|
||||
result->esr = 0;
|
||||
|
||||
if (!vcpu_has_nv(vcpu))
|
||||
return 0;
|
||||
|
||||
wi.read_desc = read_guest_s2_desc;
|
||||
wi.data = vcpu;
|
||||
wi.baddr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
|
||||
|
||||
vtcr_to_walk_info(vtcr, &wi);
|
||||
|
||||
wi.be = vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_EE;
|
||||
|
||||
ret = walk_nested_s2_pgd(gipa, &wi, result);
|
||||
if (ret)
|
||||
result->esr |= (kvm_vcpu_get_esr(vcpu) & ~ESR_ELx_FSC);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
|
Loading…
Reference in New Issue
Block a user