Merge branch 'queue' into next
Merge patches queued during the run-up to the merge window. * queue: (25 commits) KVM: Choose better candidate for directed yield KVM: Note down when cpu relax intercepted or pause loop exited KVM: Add config to support ple or cpu relax optimzation KVM: switch to symbolic name for irq_states size KVM: x86: Fix typos in pmu.c KVM: x86: Fix typos in lapic.c KVM: x86: Fix typos in cpuid.c KVM: x86: Fix typos in emulate.c KVM: x86: Fix typos in x86.c KVM: SVM: Fix typos KVM: VMX: Fix typos KVM: remove the unused parameter of gfn_to_pfn_memslot KVM: remove is_error_hpa KVM: make bad_pfn static to kvm_main.c KVM: using get_fault_pfn to get the fault pfn KVM: MMU: track the refcount when unmap the page KVM: x86: remove unnecessary mark_page_dirty KVM: MMU: Avoid handling same rmap_pde in kvm_handle_hva_range() KVM: MMU: Push trace_kvm_age_page() into kvm_age_rmapp() KVM: MMU: Add memslot parameter to hva handlers ... Signed-off-by: Avi Kivity <avi@redhat.com>
This commit is contained in:
commit
e9bda6f6f9
@ -52,6 +52,8 @@
|
|||||||
|
|
||||||
struct kvm;
|
struct kvm;
|
||||||
extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
|
extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
|
||||||
|
extern int kvm_unmap_hva_range(struct kvm *kvm,
|
||||||
|
unsigned long start, unsigned long end);
|
||||||
extern int kvm_age_hva(struct kvm *kvm, unsigned long hva);
|
extern int kvm_age_hva(struct kvm *kvm, unsigned long hva);
|
||||||
extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
||||||
extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||||
|
@ -756,9 +756,12 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
|||||||
goto out_put;
|
goto out_put;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
|
static int kvm_handle_hva_range(struct kvm *kvm,
|
||||||
int (*handler)(struct kvm *kvm, unsigned long *rmapp,
|
unsigned long start,
|
||||||
unsigned long gfn))
|
unsigned long end,
|
||||||
|
int (*handler)(struct kvm *kvm,
|
||||||
|
unsigned long *rmapp,
|
||||||
|
unsigned long gfn))
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
int retval = 0;
|
int retval = 0;
|
||||||
@ -767,15 +770,25 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
|
|||||||
|
|
||||||
slots = kvm_memslots(kvm);
|
slots = kvm_memslots(kvm);
|
||||||
kvm_for_each_memslot(memslot, slots) {
|
kvm_for_each_memslot(memslot, slots) {
|
||||||
unsigned long start = memslot->userspace_addr;
|
unsigned long hva_start, hva_end;
|
||||||
unsigned long end;
|
gfn_t gfn, gfn_end;
|
||||||
|
|
||||||
end = start + (memslot->npages << PAGE_SHIFT);
|
hva_start = max(start, memslot->userspace_addr);
|
||||||
if (hva >= start && hva < end) {
|
hva_end = min(end, memslot->userspace_addr +
|
||||||
gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
|
(memslot->npages << PAGE_SHIFT));
|
||||||
|
if (hva_start >= hva_end)
|
||||||
|
continue;
|
||||||
|
/*
|
||||||
|
* {gfn(page) | page intersects with [hva_start, hva_end)} =
|
||||||
|
* {gfn, gfn+1, ..., gfn_end-1}.
|
||||||
|
*/
|
||||||
|
gfn = hva_to_gfn_memslot(hva_start, memslot);
|
||||||
|
gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
|
||||||
|
|
||||||
ret = handler(kvm, &memslot->rmap[gfn_offset],
|
for (; gfn < gfn_end; ++gfn) {
|
||||||
memslot->base_gfn + gfn_offset);
|
gfn_t gfn_offset = gfn - memslot->base_gfn;
|
||||||
|
|
||||||
|
ret = handler(kvm, &memslot->rmap[gfn_offset], gfn);
|
||||||
retval |= ret;
|
retval |= ret;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -783,6 +796,13 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
|
|||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
|
||||||
|
int (*handler)(struct kvm *kvm, unsigned long *rmapp,
|
||||||
|
unsigned long gfn))
|
||||||
|
{
|
||||||
|
return kvm_handle_hva_range(kvm, hva, hva + 1, handler);
|
||||||
|
}
|
||||||
|
|
||||||
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||||
unsigned long gfn)
|
unsigned long gfn)
|
||||||
{
|
{
|
||||||
@ -850,6 +870,13 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||||
|
{
|
||||||
|
if (kvm->arch.using_mmu_notifiers)
|
||||||
|
kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||||
unsigned long gfn)
|
unsigned long gfn)
|
||||||
{
|
{
|
||||||
|
@ -520,7 +520,7 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
|||||||
|
|
||||||
if (likely(!pfnmap)) {
|
if (likely(!pfnmap)) {
|
||||||
unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
|
unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
|
||||||
pfn = gfn_to_pfn_memslot(vcpu_e500->vcpu.kvm, slot, gfn);
|
pfn = gfn_to_pfn_memslot(slot, gfn);
|
||||||
if (is_error_pfn(pfn)) {
|
if (is_error_pfn(pfn)) {
|
||||||
printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
|
printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
|
||||||
(long)gfn);
|
(long)gfn);
|
||||||
|
@ -21,6 +21,7 @@ config KVM
|
|||||||
depends on HAVE_KVM && EXPERIMENTAL
|
depends on HAVE_KVM && EXPERIMENTAL
|
||||||
select PREEMPT_NOTIFIERS
|
select PREEMPT_NOTIFIERS
|
||||||
select ANON_INODES
|
select ANON_INODES
|
||||||
|
select HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||||
---help---
|
---help---
|
||||||
Support hosting paravirtualized guest machines using the SIE
|
Support hosting paravirtualized guest machines using the SIE
|
||||||
virtualization capability on the mainframe. This should work
|
virtualization capability on the mainframe. This should work
|
||||||
|
@ -500,11 +500,11 @@ struct kvm_vcpu_arch {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct kvm_lpage_info {
|
struct kvm_lpage_info {
|
||||||
unsigned long rmap_pde;
|
|
||||||
int write_count;
|
int write_count;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kvm_arch_memory_slot {
|
struct kvm_arch_memory_slot {
|
||||||
|
unsigned long *rmap_pde[KVM_NR_PAGE_SIZES - 1];
|
||||||
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
|
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -957,6 +957,7 @@ extern bool kvm_rebooting;
|
|||||||
|
|
||||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||||
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
|
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
|
||||||
|
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
|
||||||
int kvm_age_hva(struct kvm *kvm, unsigned long hva);
|
int kvm_age_hva(struct kvm *kvm, unsigned long hva);
|
||||||
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
||||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||||
|
@ -37,6 +37,7 @@ config KVM
|
|||||||
select TASK_DELAY_ACCT
|
select TASK_DELAY_ACCT
|
||||||
select PERF_EVENTS
|
select PERF_EVENTS
|
||||||
select HAVE_KVM_MSI
|
select HAVE_KVM_MSI
|
||||||
|
select HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||||
---help---
|
---help---
|
||||||
Support hosting fully virtualized guest machines using hardware
|
Support hosting fully virtualized guest machines using hardware
|
||||||
virtualization extensions. You will need a fairly recent
|
virtualization extensions. You will need a fairly recent
|
||||||
|
@ -316,7 +316,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||||||
}
|
}
|
||||||
case 7: {
|
case 7: {
|
||||||
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||||
/* Mask ebx against host capbability word 9 */
|
/* Mask ebx against host capability word 9 */
|
||||||
if (index == 0) {
|
if (index == 0) {
|
||||||
entry->ebx &= kvm_supported_word9_x86_features;
|
entry->ebx &= kvm_supported_word9_x86_features;
|
||||||
cpuid_mask(&entry->ebx, 9);
|
cpuid_mask(&entry->ebx, 9);
|
||||||
|
@ -642,7 +642,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
|
|||||||
if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim)
|
if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim)
|
||||||
goto bad;
|
goto bad;
|
||||||
} else {
|
} else {
|
||||||
/* exapand-down segment */
|
/* expand-down segment */
|
||||||
if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim)
|
if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim)
|
||||||
goto bad;
|
goto bad;
|
||||||
lim = desc.d ? 0xffffffff : 0xffff;
|
lim = desc.d ? 0xffffffff : 0xffff;
|
||||||
@ -1383,7 +1383,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
|
|||||||
err_code = selector & 0xfffc;
|
err_code = selector & 0xfffc;
|
||||||
err_vec = GP_VECTOR;
|
err_vec = GP_VECTOR;
|
||||||
|
|
||||||
/* can't load system descriptor into segment selecor */
|
/* can't load system descriptor into segment selector */
|
||||||
if (seg <= VCPU_SREG_GS && !seg_desc.s)
|
if (seg <= VCPU_SREG_GS && !seg_desc.s)
|
||||||
goto exception;
|
goto exception;
|
||||||
|
|
||||||
@ -2398,7 +2398,7 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
|
|||||||
set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
|
set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now load segment descriptors. If fault happenes at this stage
|
* Now load segment descriptors. If fault happens at this stage
|
||||||
* it is handled in a context of new task
|
* it is handled in a context of new task
|
||||||
*/
|
*/
|
||||||
ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR);
|
ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR);
|
||||||
@ -2640,7 +2640,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
|
|||||||
*
|
*
|
||||||
* 1. jmp/call/int to task gate: Check against DPL of the task gate
|
* 1. jmp/call/int to task gate: Check against DPL of the task gate
|
||||||
* 2. Exception/IRQ/iret: No check is performed
|
* 2. Exception/IRQ/iret: No check is performed
|
||||||
* 3. jmp/call to TSS: Check agains DPL of the TSS
|
* 3. jmp/call to TSS: Check against DPL of the TSS
|
||||||
*/
|
*/
|
||||||
if (reason == TASK_SWITCH_GATE) {
|
if (reason == TASK_SWITCH_GATE) {
|
||||||
if (idt_index != -1) {
|
if (idt_index != -1) {
|
||||||
@ -2681,7 +2681,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
|
|||||||
ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
|
ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
|
||||||
|
|
||||||
/* set back link to prev task only if NT bit is set in eflags
|
/* set back link to prev task only if NT bit is set in eflags
|
||||||
note that old_tss_sel is not used afetr this point */
|
note that old_tss_sel is not used after this point */
|
||||||
if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
|
if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
|
||||||
old_tss_sel = 0xffff;
|
old_tss_sel = 0xffff;
|
||||||
|
|
||||||
|
@ -70,7 +70,7 @@ struct kvm_pic {
|
|||||||
struct kvm_io_device dev_slave;
|
struct kvm_io_device dev_slave;
|
||||||
struct kvm_io_device dev_eclr;
|
struct kvm_io_device dev_eclr;
|
||||||
void (*ack_notifier)(void *opaque, int irq);
|
void (*ack_notifier)(void *opaque, int irq);
|
||||||
unsigned long irq_states[16];
|
unsigned long irq_states[PIC_NUM_PINS];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kvm_pic *kvm_create_pic(struct kvm *kvm);
|
struct kvm_pic *kvm_create_pic(struct kvm *kvm);
|
||||||
|
@ -719,7 +719,7 @@ static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
|
|||||||
{
|
{
|
||||||
unsigned char alignment = offset & 0xf;
|
unsigned char alignment = offset & 0xf;
|
||||||
u32 result;
|
u32 result;
|
||||||
/* this bitmask has a bit cleared for each reserver register */
|
/* this bitmask has a bit cleared for each reserved register */
|
||||||
static const u64 rmask = 0x43ff01ffffffe70cULL;
|
static const u64 rmask = 0x43ff01ffffffe70cULL;
|
||||||
|
|
||||||
if ((alignment + len) > 4) {
|
if ((alignment + len) > 4) {
|
||||||
@ -792,7 +792,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
|
|||||||
atomic_set(&apic->lapic_timer.pending, 0);
|
atomic_set(&apic->lapic_timer.pending, 0);
|
||||||
|
|
||||||
if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
|
if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
|
||||||
/* lapic timer in oneshot or peroidic mode */
|
/* lapic timer in oneshot or periodic mode */
|
||||||
now = apic->lapic_timer.timer.base->get_time();
|
now = apic->lapic_timer.timer.base->get_time();
|
||||||
apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT)
|
apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT)
|
||||||
* APIC_BUS_CYCLE_NS * apic->divide_count;
|
* APIC_BUS_CYCLE_NS * apic->divide_count;
|
||||||
|
@ -556,6 +556,14 @@ static int mmu_spte_clear_track_bits(u64 *sptep)
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
pfn = spte_to_pfn(old_spte);
|
pfn = spte_to_pfn(old_spte);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* KVM does not hold the refcount of the page used by
|
||||||
|
* kvm mmu, before reclaiming the page, we should
|
||||||
|
* unmap it from mmu first.
|
||||||
|
*/
|
||||||
|
WARN_ON(!kvm_is_mmio_pfn(pfn) && !page_count(pfn_to_page(pfn)));
|
||||||
|
|
||||||
if (!shadow_accessed_mask || old_spte & shadow_accessed_mask)
|
if (!shadow_accessed_mask || old_spte & shadow_accessed_mask)
|
||||||
kvm_set_pfn_accessed(pfn);
|
kvm_set_pfn_accessed(pfn);
|
||||||
if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask))
|
if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask))
|
||||||
@ -960,13 +968,13 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
|
|||||||
static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
|
static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
|
||||||
struct kvm_memory_slot *slot)
|
struct kvm_memory_slot *slot)
|
||||||
{
|
{
|
||||||
struct kvm_lpage_info *linfo;
|
unsigned long idx;
|
||||||
|
|
||||||
if (likely(level == PT_PAGE_TABLE_LEVEL))
|
if (likely(level == PT_PAGE_TABLE_LEVEL))
|
||||||
return &slot->rmap[gfn - slot->base_gfn];
|
return &slot->rmap[gfn - slot->base_gfn];
|
||||||
|
|
||||||
linfo = lpage_info_slot(gfn, slot, level);
|
idx = gfn_to_index(gfn, slot->base_gfn, level);
|
||||||
return &linfo->rmap_pde;
|
return &slot->arch.rmap_pde[level - PT_DIRECTORY_LEVEL][idx];
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1200,7 +1208,7 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||||
unsigned long data)
|
struct kvm_memory_slot *slot, unsigned long data)
|
||||||
{
|
{
|
||||||
u64 *sptep;
|
u64 *sptep;
|
||||||
struct rmap_iterator iter;
|
struct rmap_iterator iter;
|
||||||
@ -1218,7 +1226,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||||
unsigned long data)
|
struct kvm_memory_slot *slot, unsigned long data)
|
||||||
{
|
{
|
||||||
u64 *sptep;
|
u64 *sptep;
|
||||||
struct rmap_iterator iter;
|
struct rmap_iterator iter;
|
||||||
@ -1259,43 +1267,67 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
|
static int kvm_handle_hva_range(struct kvm *kvm,
|
||||||
unsigned long data,
|
unsigned long start,
|
||||||
int (*handler)(struct kvm *kvm, unsigned long *rmapp,
|
unsigned long end,
|
||||||
unsigned long data))
|
unsigned long data,
|
||||||
|
int (*handler)(struct kvm *kvm,
|
||||||
|
unsigned long *rmapp,
|
||||||
|
struct kvm_memory_slot *slot,
|
||||||
|
unsigned long data))
|
||||||
{
|
{
|
||||||
int j;
|
int j;
|
||||||
int ret;
|
int ret = 0;
|
||||||
int retval = 0;
|
|
||||||
struct kvm_memslots *slots;
|
struct kvm_memslots *slots;
|
||||||
struct kvm_memory_slot *memslot;
|
struct kvm_memory_slot *memslot;
|
||||||
|
|
||||||
slots = kvm_memslots(kvm);
|
slots = kvm_memslots(kvm);
|
||||||
|
|
||||||
kvm_for_each_memslot(memslot, slots) {
|
kvm_for_each_memslot(memslot, slots) {
|
||||||
unsigned long start = memslot->userspace_addr;
|
unsigned long hva_start, hva_end;
|
||||||
unsigned long end;
|
gfn_t gfn_start, gfn_end;
|
||||||
|
|
||||||
end = start + (memslot->npages << PAGE_SHIFT);
|
hva_start = max(start, memslot->userspace_addr);
|
||||||
if (hva >= start && hva < end) {
|
hva_end = min(end, memslot->userspace_addr +
|
||||||
gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
|
(memslot->npages << PAGE_SHIFT));
|
||||||
gfn_t gfn = memslot->base_gfn + gfn_offset;
|
if (hva_start >= hva_end)
|
||||||
|
continue;
|
||||||
|
/*
|
||||||
|
* {gfn(page) | page intersects with [hva_start, hva_end)} =
|
||||||
|
* {gfn_start, gfn_start+1, ..., gfn_end-1}.
|
||||||
|
*/
|
||||||
|
gfn_start = hva_to_gfn_memslot(hva_start, memslot);
|
||||||
|
gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
|
||||||
|
|
||||||
ret = handler(kvm, &memslot->rmap[gfn_offset], data);
|
for (j = PT_PAGE_TABLE_LEVEL;
|
||||||
|
j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) {
|
||||||
|
unsigned long idx, idx_end;
|
||||||
|
unsigned long *rmapp;
|
||||||
|
|
||||||
for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
|
/*
|
||||||
struct kvm_lpage_info *linfo;
|
* {idx(page_j) | page_j intersects with
|
||||||
|
* [hva_start, hva_end)} = {idx, idx+1, ..., idx_end}.
|
||||||
|
*/
|
||||||
|
idx = gfn_to_index(gfn_start, memslot->base_gfn, j);
|
||||||
|
idx_end = gfn_to_index(gfn_end - 1, memslot->base_gfn, j);
|
||||||
|
|
||||||
linfo = lpage_info_slot(gfn, memslot,
|
rmapp = __gfn_to_rmap(gfn_start, j, memslot);
|
||||||
PT_DIRECTORY_LEVEL + j);
|
|
||||||
ret |= handler(kvm, &linfo->rmap_pde, data);
|
for (; idx <= idx_end; ++idx)
|
||||||
}
|
ret |= handler(kvm, rmapp++, memslot, data);
|
||||||
trace_kvm_age_page(hva, memslot, ret);
|
|
||||||
retval |= ret;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return retval;
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
|
||||||
|
unsigned long data,
|
||||||
|
int (*handler)(struct kvm *kvm, unsigned long *rmapp,
|
||||||
|
struct kvm_memory_slot *slot,
|
||||||
|
unsigned long data))
|
||||||
|
{
|
||||||
|
return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
|
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
|
||||||
@ -1303,13 +1335,18 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
|
|||||||
return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
|
return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||||
|
{
|
||||||
|
return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
|
||||||
|
}
|
||||||
|
|
||||||
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
|
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
|
||||||
{
|
{
|
||||||
kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
|
kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||||
unsigned long data)
|
struct kvm_memory_slot *slot, unsigned long data)
|
||||||
{
|
{
|
||||||
u64 *sptep;
|
u64 *sptep;
|
||||||
struct rmap_iterator uninitialized_var(iter);
|
struct rmap_iterator uninitialized_var(iter);
|
||||||
@ -1323,8 +1360,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
|||||||
* This has some overhead, but not as much as the cost of swapping
|
* This has some overhead, but not as much as the cost of swapping
|
||||||
* out actively used pages or breaking up actively used hugepages.
|
* out actively used pages or breaking up actively used hugepages.
|
||||||
*/
|
*/
|
||||||
if (!shadow_accessed_mask)
|
if (!shadow_accessed_mask) {
|
||||||
return kvm_unmap_rmapp(kvm, rmapp, data);
|
young = kvm_unmap_rmapp(kvm, rmapp, slot, data);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
for (sptep = rmap_get_first(*rmapp, &iter); sptep;
|
for (sptep = rmap_get_first(*rmapp, &iter); sptep;
|
||||||
sptep = rmap_get_next(&iter)) {
|
sptep = rmap_get_next(&iter)) {
|
||||||
@ -1336,12 +1375,14 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
|||||||
(unsigned long *)sptep);
|
(unsigned long *)sptep);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
out:
|
||||||
|
/* @data has hva passed to kvm_age_hva(). */
|
||||||
|
trace_kvm_age_page(data, slot, young);
|
||||||
return young;
|
return young;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
|
||||||
unsigned long data)
|
struct kvm_memory_slot *slot, unsigned long data)
|
||||||
{
|
{
|
||||||
u64 *sptep;
|
u64 *sptep;
|
||||||
struct rmap_iterator iter;
|
struct rmap_iterator iter;
|
||||||
@ -1379,13 +1420,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
|
|||||||
|
|
||||||
rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
|
rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
|
||||||
|
|
||||||
kvm_unmap_rmapp(vcpu->kvm, rmapp, 0);
|
kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0);
|
||||||
kvm_flush_remote_tlbs(vcpu->kvm);
|
kvm_flush_remote_tlbs(vcpu->kvm);
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_age_hva(struct kvm *kvm, unsigned long hva)
|
int kvm_age_hva(struct kvm *kvm, unsigned long hva)
|
||||||
{
|
{
|
||||||
return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp);
|
return kvm_handle_hva(kvm, hva, hva, kvm_age_rmapp);
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
|
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
|
||||||
@ -2472,14 +2513,12 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
|
|||||||
unsigned long hva;
|
unsigned long hva;
|
||||||
|
|
||||||
slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log);
|
slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log);
|
||||||
if (!slot) {
|
if (!slot)
|
||||||
get_page(fault_page);
|
return get_fault_pfn();
|
||||||
return page_to_pfn(fault_page);
|
|
||||||
}
|
|
||||||
|
|
||||||
hva = gfn_to_hva_memslot(slot, gfn);
|
hva = gfn_to_hva_memslot(slot, gfn);
|
||||||
|
|
||||||
return hva_to_pfn_atomic(vcpu->kvm, hva);
|
return hva_to_pfn_atomic(hva);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
|
static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Kernel-based Virtual Machine -- Performane Monitoring Unit support
|
* Kernel-based Virtual Machine -- Performance Monitoring Unit support
|
||||||
*
|
*
|
||||||
* Copyright 2011 Red Hat, Inc. and/or its affiliates.
|
* Copyright 2011 Red Hat, Inc. and/or its affiliates.
|
||||||
*
|
*
|
||||||
|
@ -2063,7 +2063,7 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm)
|
|||||||
if (svm->nested.intercept & 1ULL) {
|
if (svm->nested.intercept & 1ULL) {
|
||||||
/*
|
/*
|
||||||
* The #vmexit can't be emulated here directly because this
|
* The #vmexit can't be emulated here directly because this
|
||||||
* code path runs with irqs and preemtion disabled. A
|
* code path runs with irqs and preemption disabled. A
|
||||||
* #vmexit emulation might sleep. Only signal request for
|
* #vmexit emulation might sleep. Only signal request for
|
||||||
* the #vmexit here.
|
* the #vmexit here.
|
||||||
*/
|
*/
|
||||||
@ -2409,7 +2409,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
|
|||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* This function merges the msr permission bitmaps of kvm and the
|
* This function merges the msr permission bitmaps of kvm and the
|
||||||
* nested vmcb. It is omptimized in that it only merges the parts where
|
* nested vmcb. It is optimized in that it only merges the parts where
|
||||||
* the kvm msr permission bitmap may contain zero bits
|
* the kvm msr permission bitmap may contain zero bits
|
||||||
*/
|
*/
|
||||||
int i;
|
int i;
|
||||||
|
@ -1343,7 +1343,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
|
|||||||
guest_efer = vmx->vcpu.arch.efer;
|
guest_efer = vmx->vcpu.arch.efer;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* NX is emulated; LMA and LME handled by hardware; SCE meaninless
|
* NX is emulated; LMA and LME handled by hardware; SCE meaningless
|
||||||
* outside long mode
|
* outside long mode
|
||||||
*/
|
*/
|
||||||
ignore_bits = EFER_NX | EFER_SCE;
|
ignore_bits = EFER_NX | EFER_SCE;
|
||||||
@ -3261,7 +3261,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
|
|||||||
* qemu binaries.
|
* qemu binaries.
|
||||||
* IA32 arch specifies that at the time of processor reset the
|
* IA32 arch specifies that at the time of processor reset the
|
||||||
* "Accessed" bit in the AR field of segment registers is 1. And qemu
|
* "Accessed" bit in the AR field of segment registers is 1. And qemu
|
||||||
* is setting it to 0 in the usedland code. This causes invalid guest
|
* is setting it to 0 in the userland code. This causes invalid guest
|
||||||
* state vmexit when "unrestricted guest" mode is turned on.
|
* state vmexit when "unrestricted guest" mode is turned on.
|
||||||
* Fix for this setup issue in cpu_reset is being pushed in the qemu
|
* Fix for this setup issue in cpu_reset is being pushed in the qemu
|
||||||
* tree. Newer qemu binaries with that qemu fix would not need this
|
* tree. Newer qemu binaries with that qemu fix would not need this
|
||||||
@ -4446,7 +4446,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
|
|||||||
hypercall[2] = 0xc1;
|
hypercall[2] = 0xc1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* called to set cr0 as approriate for a mov-to-cr0 exit. */
|
/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
|
||||||
static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
|
static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
|
||||||
{
|
{
|
||||||
if (to_vmx(vcpu)->nested.vmxon &&
|
if (to_vmx(vcpu)->nested.vmxon &&
|
||||||
|
@ -1093,7 +1093,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
|
|||||||
* For each generation, we track the original measured
|
* For each generation, we track the original measured
|
||||||
* nanosecond time, offset, and write, so if TSCs are in
|
* nanosecond time, offset, and write, so if TSCs are in
|
||||||
* sync, we can match exact offset, and if not, we can match
|
* sync, we can match exact offset, and if not, we can match
|
||||||
* exact software computaion in compute_guest_tsc()
|
* exact software computation in compute_guest_tsc()
|
||||||
*
|
*
|
||||||
* These values are tracked in kvm->arch.cur_xxx variables.
|
* These values are tracked in kvm->arch.cur_xxx variables.
|
||||||
*/
|
*/
|
||||||
@ -1500,7 +1500,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
|
|||||||
{
|
{
|
||||||
gpa_t gpa = data & ~0x3f;
|
gpa_t gpa = data & ~0x3f;
|
||||||
|
|
||||||
/* Bits 2:5 are resrved, Should be zero */
|
/* Bits 2:5 are reserved, Should be zero */
|
||||||
if (data & 0x3c)
|
if (data & 0x3c)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
@ -1723,7 +1723,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
|
|||||||
* Ignore all writes to this no longer documented MSR.
|
* Ignore all writes to this no longer documented MSR.
|
||||||
* Writes are only relevant for old K7 processors,
|
* Writes are only relevant for old K7 processors,
|
||||||
* all pre-dating SVM, but a recommended workaround from
|
* all pre-dating SVM, but a recommended workaround from
|
||||||
* AMD for these chips. It is possible to speicify the
|
* AMD for these chips. It is possible to specify the
|
||||||
* affected processor models on the command line, hence
|
* affected processor models on the command line, hence
|
||||||
* the need to ignore the workaround.
|
* the need to ignore the workaround.
|
||||||
*/
|
*/
|
||||||
@ -2632,7 +2632,6 @@ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
|
|||||||
if (!vcpu->arch.time_page)
|
if (!vcpu->arch.time_page)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
src->flags |= PVCLOCK_GUEST_STOPPED;
|
src->flags |= PVCLOCK_GUEST_STOPPED;
|
||||||
mark_page_dirty(vcpu->kvm, vcpu->arch.time >> PAGE_SHIFT);
|
|
||||||
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -4492,7 +4491,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* if emulation was due to access to shadowed page table
|
* if emulation was due to access to shadowed page table
|
||||||
* and it failed try to unshadow page and re-entetr the
|
* and it failed try to unshadow page and re-enter the
|
||||||
* guest to let CPU execute the instruction.
|
* guest to let CPU execute the instruction.
|
||||||
*/
|
*/
|
||||||
if (kvm_mmu_unprotect_page_virt(vcpu, gva))
|
if (kvm_mmu_unprotect_page_virt(vcpu, gva))
|
||||||
@ -5588,7 +5587,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
|||||||
/*
|
/*
|
||||||
* We are here if userspace calls get_regs() in the middle of
|
* We are here if userspace calls get_regs() in the middle of
|
||||||
* instruction emulation. Registers state needs to be copied
|
* instruction emulation. Registers state needs to be copied
|
||||||
* back from emulation context to vcpu. Usrapace shouldn't do
|
* back from emulation context to vcpu. Userspace shouldn't do
|
||||||
* that usually, but some bad designed PV devices (vmware
|
* that usually, but some bad designed PV devices (vmware
|
||||||
* backdoor interface) need this to work
|
* backdoor interface) need this to work
|
||||||
*/
|
*/
|
||||||
@ -6117,7 +6116,7 @@ int kvm_arch_hardware_enable(void *garbage)
|
|||||||
* as we reset last_host_tsc on all VCPUs to stop this from being
|
* as we reset last_host_tsc on all VCPUs to stop this from being
|
||||||
* called multiple times (one for each physical CPU bringup).
|
* called multiple times (one for each physical CPU bringup).
|
||||||
*
|
*
|
||||||
* Platforms with unnreliable TSCs don't have to deal with this, they
|
* Platforms with unreliable TSCs don't have to deal with this, they
|
||||||
* will be compensated by the logic in vcpu_load, which sets the TSC to
|
* will be compensated by the logic in vcpu_load, which sets the TSC to
|
||||||
* catchup mode. This will catchup all VCPUs to real time, but cannot
|
* catchup mode. This will catchup all VCPUs to real time, but cannot
|
||||||
* guarantee that they stay in perfect synchronization.
|
* guarantee that they stay in perfect synchronization.
|
||||||
@ -6314,6 +6313,10 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
|
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
|
||||||
|
if (!dont || free->arch.rmap_pde[i] != dont->arch.rmap_pde[i]) {
|
||||||
|
kvm_kvfree(free->arch.rmap_pde[i]);
|
||||||
|
free->arch.rmap_pde[i] = NULL;
|
||||||
|
}
|
||||||
if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) {
|
if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) {
|
||||||
kvm_kvfree(free->arch.lpage_info[i]);
|
kvm_kvfree(free->arch.lpage_info[i]);
|
||||||
free->arch.lpage_info[i] = NULL;
|
free->arch.lpage_info[i] = NULL;
|
||||||
@ -6333,6 +6336,11 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
|
|||||||
lpages = gfn_to_index(slot->base_gfn + npages - 1,
|
lpages = gfn_to_index(slot->base_gfn + npages - 1,
|
||||||
slot->base_gfn, level) + 1;
|
slot->base_gfn, level) + 1;
|
||||||
|
|
||||||
|
slot->arch.rmap_pde[i] =
|
||||||
|
kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap_pde[i]));
|
||||||
|
if (!slot->arch.rmap_pde[i])
|
||||||
|
goto out_free;
|
||||||
|
|
||||||
slot->arch.lpage_info[i] =
|
slot->arch.lpage_info[i] =
|
||||||
kvm_kvzalloc(lpages * sizeof(*slot->arch.lpage_info[i]));
|
kvm_kvzalloc(lpages * sizeof(*slot->arch.lpage_info[i]));
|
||||||
if (!slot->arch.lpage_info[i])
|
if (!slot->arch.lpage_info[i])
|
||||||
@ -6361,7 +6369,9 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
|
|||||||
|
|
||||||
out_free:
|
out_free:
|
||||||
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
|
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
|
||||||
|
kvm_kvfree(slot->arch.rmap_pde[i]);
|
||||||
kvm_kvfree(slot->arch.lpage_info[i]);
|
kvm_kvfree(slot->arch.lpage_info[i]);
|
||||||
|
slot->arch.rmap_pde[i] = NULL;
|
||||||
slot->arch.lpage_info[i] = NULL;
|
slot->arch.lpage_info[i] = NULL;
|
||||||
}
|
}
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
@ -6381,7 +6391,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
|||||||
map_flags = MAP_SHARED | MAP_ANONYMOUS;
|
map_flags = MAP_SHARED | MAP_ANONYMOUS;
|
||||||
|
|
||||||
/*To keep backward compatibility with older userspace,
|
/*To keep backward compatibility with older userspace,
|
||||||
*x86 needs to hanlde !user_alloc case.
|
*x86 needs to handle !user_alloc case.
|
||||||
*/
|
*/
|
||||||
if (!user_alloc) {
|
if (!user_alloc) {
|
||||||
if (npages && !old.rmap) {
|
if (npages && !old.rmap) {
|
||||||
|
@ -183,6 +183,18 @@ struct kvm_vcpu {
|
|||||||
} async_pf;
|
} async_pf;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||||
|
/*
|
||||||
|
* Cpu relax intercept or pause loop exit optimization
|
||||||
|
* in_spin_loop: set when a vcpu does a pause loop exit
|
||||||
|
* or cpu relax intercepted.
|
||||||
|
* dy_eligible: indicates whether vcpu is eligible for directed yield.
|
||||||
|
*/
|
||||||
|
struct {
|
||||||
|
bool in_spin_loop;
|
||||||
|
bool dy_eligible;
|
||||||
|
} spin_loop;
|
||||||
|
#endif
|
||||||
struct kvm_vcpu_arch arch;
|
struct kvm_vcpu_arch arch;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -378,20 +390,11 @@ id_to_memslot(struct kvm_memslots *slots, int id)
|
|||||||
return slot;
|
return slot;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
|
|
||||||
#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
|
|
||||||
static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
|
|
||||||
|
|
||||||
extern struct page *bad_page;
|
extern struct page *bad_page;
|
||||||
extern struct page *fault_page;
|
|
||||||
|
|
||||||
extern pfn_t bad_pfn;
|
|
||||||
extern pfn_t fault_pfn;
|
|
||||||
|
|
||||||
int is_error_page(struct page *page);
|
int is_error_page(struct page *page);
|
||||||
int is_error_pfn(pfn_t pfn);
|
int is_error_pfn(pfn_t pfn);
|
||||||
int is_hwpoison_pfn(pfn_t pfn);
|
int is_hwpoison_pfn(pfn_t pfn);
|
||||||
int is_fault_pfn(pfn_t pfn);
|
|
||||||
int is_noslot_pfn(pfn_t pfn);
|
int is_noslot_pfn(pfn_t pfn);
|
||||||
int is_invalid_pfn(pfn_t pfn);
|
int is_invalid_pfn(pfn_t pfn);
|
||||||
int kvm_is_error_hva(unsigned long addr);
|
int kvm_is_error_hva(unsigned long addr);
|
||||||
@ -427,20 +430,20 @@ void kvm_release_page_dirty(struct page *page);
|
|||||||
void kvm_set_page_dirty(struct page *page);
|
void kvm_set_page_dirty(struct page *page);
|
||||||
void kvm_set_page_accessed(struct page *page);
|
void kvm_set_page_accessed(struct page *page);
|
||||||
|
|
||||||
pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr);
|
pfn_t hva_to_pfn_atomic(unsigned long addr);
|
||||||
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
|
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
|
||||||
pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
|
pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
|
||||||
bool write_fault, bool *writable);
|
bool write_fault, bool *writable);
|
||||||
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
|
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
|
||||||
pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
|
pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
|
||||||
bool *writable);
|
bool *writable);
|
||||||
pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
|
pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
|
||||||
struct kvm_memory_slot *slot, gfn_t gfn);
|
|
||||||
void kvm_release_pfn_dirty(pfn_t);
|
void kvm_release_pfn_dirty(pfn_t);
|
||||||
void kvm_release_pfn_clean(pfn_t pfn);
|
void kvm_release_pfn_clean(pfn_t pfn);
|
||||||
void kvm_set_pfn_dirty(pfn_t pfn);
|
void kvm_set_pfn_dirty(pfn_t pfn);
|
||||||
void kvm_set_pfn_accessed(pfn_t pfn);
|
void kvm_set_pfn_accessed(pfn_t pfn);
|
||||||
void kvm_get_pfn(pfn_t pfn);
|
void kvm_get_pfn(pfn_t pfn);
|
||||||
|
pfn_t get_fault_pfn(void);
|
||||||
|
|
||||||
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
|
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
|
||||||
int len);
|
int len);
|
||||||
@ -740,6 +743,14 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
|
|||||||
(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
|
(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline gfn_t
|
||||||
|
hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot)
|
||||||
|
{
|
||||||
|
gfn_t gfn_offset = (hva - slot->userspace_addr) >> PAGE_SHIFT;
|
||||||
|
|
||||||
|
return slot->base_gfn + gfn_offset;
|
||||||
|
}
|
||||||
|
|
||||||
static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
|
static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
|
||||||
gfn_t gfn)
|
gfn_t gfn)
|
||||||
{
|
{
|
||||||
@ -899,5 +910,32 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||||
|
|
||||||
|
static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
|
||||||
|
{
|
||||||
|
vcpu->spin_loop.in_spin_loop = val;
|
||||||
|
}
|
||||||
|
static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
|
||||||
|
{
|
||||||
|
vcpu->spin_loop.dy_eligible = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else /* !CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
|
||||||
|
|
||||||
|
static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -21,3 +21,6 @@ config KVM_ASYNC_PF
|
|||||||
|
|
||||||
config HAVE_KVM_MSI
|
config HAVE_KVM_MSI
|
||||||
bool
|
bool
|
||||||
|
|
||||||
|
config HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||||
|
bool
|
||||||
|
@ -42,13 +42,13 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm);
|
|||||||
static void kvm_iommu_put_pages(struct kvm *kvm,
|
static void kvm_iommu_put_pages(struct kvm *kvm,
|
||||||
gfn_t base_gfn, unsigned long npages);
|
gfn_t base_gfn, unsigned long npages);
|
||||||
|
|
||||||
static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot,
|
static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
|
||||||
gfn_t gfn, unsigned long size)
|
unsigned long size)
|
||||||
{
|
{
|
||||||
gfn_t end_gfn;
|
gfn_t end_gfn;
|
||||||
pfn_t pfn;
|
pfn_t pfn;
|
||||||
|
|
||||||
pfn = gfn_to_pfn_memslot(kvm, slot, gfn);
|
pfn = gfn_to_pfn_memslot(slot, gfn);
|
||||||
end_gfn = gfn + (size >> PAGE_SHIFT);
|
end_gfn = gfn + (size >> PAGE_SHIFT);
|
||||||
gfn += 1;
|
gfn += 1;
|
||||||
|
|
||||||
@ -56,7 +56,7 @@ static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot,
|
|||||||
return pfn;
|
return pfn;
|
||||||
|
|
||||||
while (gfn < end_gfn)
|
while (gfn < end_gfn)
|
||||||
gfn_to_pfn_memslot(kvm, slot, gfn++);
|
gfn_to_pfn_memslot(slot, gfn++);
|
||||||
|
|
||||||
return pfn;
|
return pfn;
|
||||||
}
|
}
|
||||||
@ -105,7 +105,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
|
|||||||
* Pin all pages we are about to map in memory. This is
|
* Pin all pages we are about to map in memory. This is
|
||||||
* important because we unmap and unpin in 4kb steps later.
|
* important because we unmap and unpin in 4kb steps later.
|
||||||
*/
|
*/
|
||||||
pfn = kvm_pin_pages(kvm, slot, gfn, page_size);
|
pfn = kvm_pin_pages(slot, gfn, page_size);
|
||||||
if (is_error_pfn(pfn)) {
|
if (is_error_pfn(pfn)) {
|
||||||
gfn += 1;
|
gfn += 1;
|
||||||
continue;
|
continue;
|
||||||
|
@ -321,11 +321,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
|
|||||||
switch (ue->u.irqchip.irqchip) {
|
switch (ue->u.irqchip.irqchip) {
|
||||||
case KVM_IRQCHIP_PIC_MASTER:
|
case KVM_IRQCHIP_PIC_MASTER:
|
||||||
e->set = kvm_set_pic_irq;
|
e->set = kvm_set_pic_irq;
|
||||||
max_pin = 16;
|
max_pin = PIC_NUM_PINS;
|
||||||
break;
|
break;
|
||||||
case KVM_IRQCHIP_PIC_SLAVE:
|
case KVM_IRQCHIP_PIC_SLAVE:
|
||||||
e->set = kvm_set_pic_irq;
|
e->set = kvm_set_pic_irq;
|
||||||
max_pin = 16;
|
max_pin = PIC_NUM_PINS;
|
||||||
delta = 8;
|
delta = 8;
|
||||||
break;
|
break;
|
||||||
case KVM_IRQCHIP_IOAPIC:
|
case KVM_IRQCHIP_IOAPIC:
|
||||||
|
@ -100,11 +100,14 @@ EXPORT_SYMBOL_GPL(kvm_rebooting);
|
|||||||
|
|
||||||
static bool largepages_enabled = true;
|
static bool largepages_enabled = true;
|
||||||
|
|
||||||
|
struct page *bad_page;
|
||||||
|
static pfn_t bad_pfn;
|
||||||
|
|
||||||
static struct page *hwpoison_page;
|
static struct page *hwpoison_page;
|
||||||
static pfn_t hwpoison_pfn;
|
static pfn_t hwpoison_pfn;
|
||||||
|
|
||||||
struct page *fault_page;
|
static struct page *fault_page;
|
||||||
pfn_t fault_pfn;
|
static pfn_t fault_pfn;
|
||||||
|
|
||||||
inline int kvm_is_mmio_pfn(pfn_t pfn)
|
inline int kvm_is_mmio_pfn(pfn_t pfn)
|
||||||
{
|
{
|
||||||
@ -236,6 +239,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
|
|||||||
}
|
}
|
||||||
vcpu->run = page_address(page);
|
vcpu->run = page_address(page);
|
||||||
|
|
||||||
|
kvm_vcpu_set_in_spin_loop(vcpu, false);
|
||||||
|
kvm_vcpu_set_dy_eligible(vcpu, false);
|
||||||
|
|
||||||
r = kvm_arch_vcpu_init(vcpu);
|
r = kvm_arch_vcpu_init(vcpu);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
goto fail_free_run;
|
goto fail_free_run;
|
||||||
@ -332,8 +338,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
|
|||||||
* count is also read inside the mmu_lock critical section.
|
* count is also read inside the mmu_lock critical section.
|
||||||
*/
|
*/
|
||||||
kvm->mmu_notifier_count++;
|
kvm->mmu_notifier_count++;
|
||||||
for (; start < end; start += PAGE_SIZE)
|
need_tlb_flush = kvm_unmap_hva_range(kvm, start, end);
|
||||||
need_tlb_flush |= kvm_unmap_hva(kvm, start);
|
|
||||||
need_tlb_flush |= kvm->tlbs_dirty;
|
need_tlb_flush |= kvm->tlbs_dirty;
|
||||||
/* we've to flush the tlb before the pages can be freed */
|
/* we've to flush the tlb before the pages can be freed */
|
||||||
if (need_tlb_flush)
|
if (need_tlb_flush)
|
||||||
@ -950,12 +955,6 @@ int is_hwpoison_pfn(pfn_t pfn)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
|
EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
|
||||||
|
|
||||||
int is_fault_pfn(pfn_t pfn)
|
|
||||||
{
|
|
||||||
return pfn == fault_pfn;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(is_fault_pfn);
|
|
||||||
|
|
||||||
int is_noslot_pfn(pfn_t pfn)
|
int is_noslot_pfn(pfn_t pfn)
|
||||||
{
|
{
|
||||||
return pfn == bad_pfn;
|
return pfn == bad_pfn;
|
||||||
@ -1039,11 +1038,12 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(gfn_to_hva);
|
EXPORT_SYMBOL_GPL(gfn_to_hva);
|
||||||
|
|
||||||
static pfn_t get_fault_pfn(void)
|
pfn_t get_fault_pfn(void)
|
||||||
{
|
{
|
||||||
get_page(fault_page);
|
get_page(fault_page);
|
||||||
return fault_pfn;
|
return fault_pfn;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(get_fault_pfn);
|
||||||
|
|
||||||
int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
|
int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
|
||||||
unsigned long start, int write, struct page **page)
|
unsigned long start, int write, struct page **page)
|
||||||
@ -1065,8 +1065,8 @@ static inline int check_user_page_hwpoison(unsigned long addr)
|
|||||||
return rc == -EHWPOISON;
|
return rc == -EHWPOISON;
|
||||||
}
|
}
|
||||||
|
|
||||||
static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
|
static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
|
||||||
bool *async, bool write_fault, bool *writable)
|
bool write_fault, bool *writable)
|
||||||
{
|
{
|
||||||
struct page *page[1];
|
struct page *page[1];
|
||||||
int npages = 0;
|
int npages = 0;
|
||||||
@ -1146,9 +1146,9 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
|
|||||||
return pfn;
|
return pfn;
|
||||||
}
|
}
|
||||||
|
|
||||||
pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
|
pfn_t hva_to_pfn_atomic(unsigned long addr)
|
||||||
{
|
{
|
||||||
return hva_to_pfn(kvm, addr, true, NULL, true, NULL);
|
return hva_to_pfn(addr, true, NULL, true, NULL);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
|
EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
|
||||||
|
|
||||||
@ -1166,7 +1166,7 @@ static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
|
|||||||
return page_to_pfn(bad_page);
|
return page_to_pfn(bad_page);
|
||||||
}
|
}
|
||||||
|
|
||||||
return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable);
|
return hva_to_pfn(addr, atomic, async, write_fault, writable);
|
||||||
}
|
}
|
||||||
|
|
||||||
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
|
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
|
||||||
@ -1195,11 +1195,10 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
|
EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
|
||||||
|
|
||||||
pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
|
pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
|
||||||
struct kvm_memory_slot *slot, gfn_t gfn)
|
|
||||||
{
|
{
|
||||||
unsigned long addr = gfn_to_hva_memslot(slot, gfn);
|
unsigned long addr = gfn_to_hva_memslot(slot, gfn);
|
||||||
return hva_to_pfn(kvm, addr, false, NULL, true, NULL);
|
return hva_to_pfn(addr, false, NULL, true, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
|
int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
|
||||||
@ -1580,6 +1579,43 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
|
EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
|
||||||
|
|
||||||
|
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||||
|
/*
|
||||||
|
* Helper that checks whether a VCPU is eligible for directed yield.
|
||||||
|
* Most eligible candidate to yield is decided by following heuristics:
|
||||||
|
*
|
||||||
|
* (a) VCPU which has not done pl-exit or cpu relax intercepted recently
|
||||||
|
* (preempted lock holder), indicated by @in_spin_loop.
|
||||||
|
* Set at the beiginning and cleared at the end of interception/PLE handler.
|
||||||
|
*
|
||||||
|
* (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get
|
||||||
|
* chance last time (mostly it has become eligible now since we have probably
|
||||||
|
* yielded to lockholder in last iteration. This is done by toggling
|
||||||
|
* @dy_eligible each time a VCPU checked for eligibility.)
|
||||||
|
*
|
||||||
|
* Yielding to a recently pl-exited/cpu relax intercepted VCPU before yielding
|
||||||
|
* to preempted lock-holder could result in wrong VCPU selection and CPU
|
||||||
|
* burning. Giving priority for a potential lock-holder increases lock
|
||||||
|
* progress.
|
||||||
|
*
|
||||||
|
* Since algorithm is based on heuristics, accessing another VCPU data without
|
||||||
|
* locking does not harm. It may result in trying to yield to same VCPU, fail
|
||||||
|
* and continue with next VCPU and so on.
|
||||||
|
*/
|
||||||
|
bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
bool eligible;
|
||||||
|
|
||||||
|
eligible = !vcpu->spin_loop.in_spin_loop ||
|
||||||
|
(vcpu->spin_loop.in_spin_loop &&
|
||||||
|
vcpu->spin_loop.dy_eligible);
|
||||||
|
|
||||||
|
if (vcpu->spin_loop.in_spin_loop)
|
||||||
|
kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
|
||||||
|
|
||||||
|
return eligible;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
void kvm_vcpu_on_spin(struct kvm_vcpu *me)
|
void kvm_vcpu_on_spin(struct kvm_vcpu *me)
|
||||||
{
|
{
|
||||||
struct kvm *kvm = me->kvm;
|
struct kvm *kvm = me->kvm;
|
||||||
@ -1589,6 +1625,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
|
|||||||
int pass;
|
int pass;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
kvm_vcpu_set_in_spin_loop(me, true);
|
||||||
/*
|
/*
|
||||||
* We boost the priority of a VCPU that is runnable but not
|
* We boost the priority of a VCPU that is runnable but not
|
||||||
* currently running, because it got preempted by something
|
* currently running, because it got preempted by something
|
||||||
@ -1607,6 +1644,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
|
|||||||
continue;
|
continue;
|
||||||
if (waitqueue_active(&vcpu->wq))
|
if (waitqueue_active(&vcpu->wq))
|
||||||
continue;
|
continue;
|
||||||
|
if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
|
||||||
|
continue;
|
||||||
if (kvm_vcpu_yield_to(vcpu)) {
|
if (kvm_vcpu_yield_to(vcpu)) {
|
||||||
kvm->last_boosted_vcpu = i;
|
kvm->last_boosted_vcpu = i;
|
||||||
yielded = 1;
|
yielded = 1;
|
||||||
@ -1614,6 +1653,10 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
kvm_vcpu_set_in_spin_loop(me, false);
|
||||||
|
|
||||||
|
/* Ensure vcpu is not eligible during next spinloop */
|
||||||
|
kvm_vcpu_set_dy_eligible(me, false);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
|
EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
|
||||||
|
|
||||||
@ -2697,9 +2740,6 @@ static struct syscore_ops kvm_syscore_ops = {
|
|||||||
.resume = kvm_resume,
|
.resume = kvm_resume,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct page *bad_page;
|
|
||||||
pfn_t bad_pfn;
|
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
|
struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user