nEPT: Add nEPT violation/misconfigration support
Inject nEPT fault to L1 guest. This patch is original from Xinhao. Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Signed-off-by: Jun Nakajima <jun.nakajima@intel.com> Signed-off-by: Xinhao Xu <xinhao.xu@intel.com> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com> Signed-off-by: Gleb Natapov <gleb@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
53166229e9
commit
25d92081ae
@ -286,6 +286,7 @@ struct kvm_mmu {
|
|||||||
u64 *pae_root;
|
u64 *pae_root;
|
||||||
u64 *lm_root;
|
u64 *lm_root;
|
||||||
u64 rsvd_bits_mask[2][4];
|
u64 rsvd_bits_mask[2][4];
|
||||||
|
u64 bad_mt_xwr;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Bitmap: bit set = last pte in walk
|
* Bitmap: bit set = last pte in walk
|
||||||
@ -512,6 +513,9 @@ struct kvm_vcpu_arch {
|
|||||||
* instruction.
|
* instruction.
|
||||||
*/
|
*/
|
||||||
bool write_fault_to_shadow_pgtable;
|
bool write_fault_to_shadow_pgtable;
|
||||||
|
|
||||||
|
/* set at EPT violation at this point */
|
||||||
|
unsigned long exit_qualification;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kvm_lpage_info {
|
struct kvm_lpage_info {
|
||||||
|
@ -3519,6 +3519,8 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
|
|||||||
int maxphyaddr = cpuid_maxphyaddr(vcpu);
|
int maxphyaddr = cpuid_maxphyaddr(vcpu);
|
||||||
u64 exb_bit_rsvd = 0;
|
u64 exb_bit_rsvd = 0;
|
||||||
|
|
||||||
|
context->bad_mt_xwr = 0;
|
||||||
|
|
||||||
if (!context->nx)
|
if (!context->nx)
|
||||||
exb_bit_rsvd = rsvd_bits(63, 63);
|
exb_bit_rsvd = rsvd_bits(63, 63);
|
||||||
switch (context->root_level) {
|
switch (context->root_level) {
|
||||||
@ -3574,7 +3576,40 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
|
static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
|
||||||
|
struct kvm_mmu *context, bool execonly)
|
||||||
|
{
|
||||||
|
int maxphyaddr = cpuid_maxphyaddr(vcpu);
|
||||||
|
int pte;
|
||||||
|
|
||||||
|
context->rsvd_bits_mask[0][3] =
|
||||||
|
rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
|
||||||
|
context->rsvd_bits_mask[0][2] =
|
||||||
|
rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
|
||||||
|
context->rsvd_bits_mask[0][1] =
|
||||||
|
rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
|
||||||
|
context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
|
||||||
|
|
||||||
|
/* large page */
|
||||||
|
context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
|
||||||
|
context->rsvd_bits_mask[1][2] =
|
||||||
|
rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29);
|
||||||
|
context->rsvd_bits_mask[1][1] =
|
||||||
|
rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20);
|
||||||
|
context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
|
||||||
|
|
||||||
|
for (pte = 0; pte < 64; pte++) {
|
||||||
|
int rwx_bits = pte & 7;
|
||||||
|
int mt = pte >> 3;
|
||||||
|
if (mt == 0x2 || mt == 0x3 || mt == 0x7 ||
|
||||||
|
rwx_bits == 0x2 || rwx_bits == 0x6 ||
|
||||||
|
(rwx_bits == 0x4 && !execonly))
|
||||||
|
context->bad_mt_xwr |= (1ull << pte);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void update_permission_bitmask(struct kvm_vcpu *vcpu,
|
||||||
|
struct kvm_mmu *mmu, bool ept)
|
||||||
{
|
{
|
||||||
unsigned bit, byte, pfec;
|
unsigned bit, byte, pfec;
|
||||||
u8 map;
|
u8 map;
|
||||||
@ -3592,12 +3627,16 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu
|
|||||||
w = bit & ACC_WRITE_MASK;
|
w = bit & ACC_WRITE_MASK;
|
||||||
u = bit & ACC_USER_MASK;
|
u = bit & ACC_USER_MASK;
|
||||||
|
|
||||||
/* Not really needed: !nx will cause pte.nx to fault */
|
if (!ept) {
|
||||||
x |= !mmu->nx;
|
/* Not really needed: !nx will cause pte.nx to fault */
|
||||||
/* Allow supervisor writes if !cr0.wp */
|
x |= !mmu->nx;
|
||||||
w |= !is_write_protection(vcpu) && !uf;
|
/* Allow supervisor writes if !cr0.wp */
|
||||||
/* Disallow supervisor fetches of user code if cr4.smep */
|
w |= !is_write_protection(vcpu) && !uf;
|
||||||
x &= !(smep && u && !uf);
|
/* Disallow supervisor fetches of user code if cr4.smep */
|
||||||
|
x &= !(smep && u && !uf);
|
||||||
|
} else
|
||||||
|
/* Not really needed: no U/S accesses on ept */
|
||||||
|
u = 1;
|
||||||
|
|
||||||
fault = (ff && !x) || (uf && !u) || (wf && !w);
|
fault = (ff && !x) || (uf && !u) || (wf && !w);
|
||||||
map |= fault << bit;
|
map |= fault << bit;
|
||||||
@ -3632,7 +3671,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
|
|||||||
context->root_level = level;
|
context->root_level = level;
|
||||||
|
|
||||||
reset_rsvds_bits_mask(vcpu, context);
|
reset_rsvds_bits_mask(vcpu, context);
|
||||||
update_permission_bitmask(vcpu, context);
|
update_permission_bitmask(vcpu, context, false);
|
||||||
update_last_pte_bitmap(vcpu, context);
|
update_last_pte_bitmap(vcpu, context);
|
||||||
|
|
||||||
ASSERT(is_pae(vcpu));
|
ASSERT(is_pae(vcpu));
|
||||||
@ -3662,7 +3701,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
|
|||||||
context->root_level = PT32_ROOT_LEVEL;
|
context->root_level = PT32_ROOT_LEVEL;
|
||||||
|
|
||||||
reset_rsvds_bits_mask(vcpu, context);
|
reset_rsvds_bits_mask(vcpu, context);
|
||||||
update_permission_bitmask(vcpu, context);
|
update_permission_bitmask(vcpu, context, false);
|
||||||
update_last_pte_bitmap(vcpu, context);
|
update_last_pte_bitmap(vcpu, context);
|
||||||
|
|
||||||
context->new_cr3 = paging_new_cr3;
|
context->new_cr3 = paging_new_cr3;
|
||||||
@ -3724,7 +3763,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
|
|||||||
context->gva_to_gpa = paging32_gva_to_gpa;
|
context->gva_to_gpa = paging32_gva_to_gpa;
|
||||||
}
|
}
|
||||||
|
|
||||||
update_permission_bitmask(vcpu, context);
|
update_permission_bitmask(vcpu, context, false);
|
||||||
update_last_pte_bitmap(vcpu, context);
|
update_last_pte_bitmap(vcpu, context);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -3803,7 +3842,7 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
|
|||||||
g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
|
g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
|
||||||
}
|
}
|
||||||
|
|
||||||
update_permission_bitmask(vcpu, g_context);
|
update_permission_bitmask(vcpu, g_context, false);
|
||||||
update_last_pte_bitmap(vcpu, g_context);
|
update_last_pte_bitmap(vcpu, g_context);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -129,10 +129,10 @@ static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte)
|
|||||||
|
|
||||||
static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level)
|
static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level)
|
||||||
{
|
{
|
||||||
int bit7;
|
int bit7 = (gpte >> 7) & 1, low6 = gpte & 0x3f;
|
||||||
|
|
||||||
bit7 = (gpte >> 7) & 1;
|
return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) |
|
||||||
return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
|
((mmu->bad_mt_xwr & (1ull << low6)) != 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int FNAME(is_present_gpte)(unsigned long pte)
|
static inline int FNAME(is_present_gpte)(unsigned long pte)
|
||||||
@ -386,6 +386,25 @@ error:
|
|||||||
walker->fault.vector = PF_VECTOR;
|
walker->fault.vector = PF_VECTOR;
|
||||||
walker->fault.error_code_valid = true;
|
walker->fault.error_code_valid = true;
|
||||||
walker->fault.error_code = errcode;
|
walker->fault.error_code = errcode;
|
||||||
|
|
||||||
|
#if PTTYPE == PTTYPE_EPT
|
||||||
|
/*
|
||||||
|
* Use PFERR_RSVD_MASK in error_code to to tell if EPT
|
||||||
|
* misconfiguration requires to be injected. The detection is
|
||||||
|
* done by is_rsvd_bits_set() above.
|
||||||
|
*
|
||||||
|
* We set up the value of exit_qualification to inject:
|
||||||
|
* [2:0] - Derive from [2:0] of real exit_qualification at EPT violation
|
||||||
|
* [5:3] - Calculated by the page walk of the guest EPT page tables
|
||||||
|
* [7:8] - Derived from [7:8] of real exit_qualification
|
||||||
|
*
|
||||||
|
* The other bits are set to 0.
|
||||||
|
*/
|
||||||
|
if (!(errcode & PFERR_RSVD_MASK)) {
|
||||||
|
vcpu->arch.exit_qualification &= 0x187;
|
||||||
|
vcpu->arch.exit_qualification |= ((pt_access & pte) & 0x7) << 3;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
walker->fault.address = addr;
|
walker->fault.address = addr;
|
||||||
walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu;
|
walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu;
|
||||||
|
|
||||||
|
@ -5317,9 +5317,13 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
|
|||||||
|
|
||||||
/* It is a write fault? */
|
/* It is a write fault? */
|
||||||
error_code = exit_qualification & (1U << 1);
|
error_code = exit_qualification & (1U << 1);
|
||||||
|
/* It is a fetch fault? */
|
||||||
|
error_code |= (exit_qualification & (1U << 2)) << 2;
|
||||||
/* ept page table is present? */
|
/* ept page table is present? */
|
||||||
error_code |= (exit_qualification >> 3) & 0x1;
|
error_code |= (exit_qualification >> 3) & 0x1;
|
||||||
|
|
||||||
|
vcpu->arch.exit_qualification = exit_qualification;
|
||||||
|
|
||||||
return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
|
return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -7348,6 +7352,21 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
|
|||||||
entry->ecx |= bit(X86_FEATURE_VMX);
|
entry->ecx |= bit(X86_FEATURE_VMX);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
|
||||||
|
struct x86_exception *fault)
|
||||||
|
{
|
||||||
|
struct vmcs12 *vmcs12;
|
||||||
|
nested_vmx_vmexit(vcpu);
|
||||||
|
vmcs12 = get_vmcs12(vcpu);
|
||||||
|
|
||||||
|
if (fault->error_code & PFERR_RSVD_MASK)
|
||||||
|
vmcs12->vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
|
||||||
|
else
|
||||||
|
vmcs12->vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
|
||||||
|
vmcs12->exit_qualification = vcpu->arch.exit_qualification;
|
||||||
|
vmcs12->guest_physical_address = fault->address;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
|
* prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
|
||||||
* L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
|
* L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
|
||||||
|
Loading…
Reference in New Issue
Block a user