Merge branch kvm-arm64/tlbi-range into kvmarm-master/next

* kvm-arm64/tlbi-range:
  : .
  : FEAT_TLBIRANGE support, courtesy of Raghavendra Rao Ananta.
  : From the cover letter:
  :
  : "In certain code paths, KVM/ARM currently invalidates the entire VM's
  : page-tables instead of just invalidating a necessary range. For example,
  : when collapsing a table PTE to a block PTE, instead of iterating over
  : each PTE and flushing them, KVM uses 'vmalls12e1is' TLBI operation to
  : flush all the entries. This is inefficient since the guest would have
  : to refill the TLBs again, even for the addresses that aren't covered
  : by the table entry. The performance impact would scale poorly if many
  : addresses in the VM is going through this remapping.
  :
  : For architectures that implement FEAT_TLBIRANGE, KVM can replace such
  : inefficient paths by performing the invalidations only on the range of
  : addresses that are in scope. This series tries to achieve the same in
  : the areas of stage-2 map, unmap and write-protecting the pages."
  : .
  KVM: arm64: Use TLBI range-based instructions for unmap
  KVM: arm64: Invalidate the table entries upon a range
  KVM: arm64: Flush only the memslot after write-protect
  KVM: arm64: Implement kvm_arch_flush_remote_tlbs_range()
  KVM: arm64: Define kvm_tlb_flush_vmid_range()
  KVM: arm64: Implement __kvm_tlb_flush_vmid_range()
  arm64: tlb: Implement __flush_s2_tlb_range_op()
  arm64: tlb: Refactor the core flush algorithm of __flush_tlb_range
  KVM: Move kvm_arch_flush_remote_tlbs_memslot() to common code
  KVM: Allow range-based TLB invalidation from common code
  KVM: Remove CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
  KVM: arm64: Use kvm_arch_flush_remote_tlbs()
  KVM: Declare kvm_arch_flush_remote_tlbs() globally
  KVM: Rename kvm_arch_flush_remote_tlb() to kvm_arch_flush_remote_tlbs()

Signed-off-by: Marc Zyngier <maz@kernel.org>
This commit is contained in:
Marc Zyngier 2023-08-28 09:29:02 +01:00
commit d58335d10f
21 changed files with 285 additions and 131 deletions

View File

@ -70,6 +70,7 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh,
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range,
__KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
__KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr,
@ -229,6 +230,8 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
phys_addr_t ipa,
int level);
extern void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
phys_addr_t start, unsigned long pages);
extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
extern void __kvm_timer_set_cntvoff(u64 cntvoff);

View File

@ -1120,6 +1120,10 @@ int __init kvm_set_ipa_limit(void);
#define __KVM_HAVE_ARCH_VM_ALLOC
struct kvm *kvm_arch_alloc_vm(void);
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
static inline bool kvm_vm_is_protected(struct kvm *kvm)
{
return false;

View File

@ -746,4 +746,14 @@ enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte);
* kvm_pgtable_prot format.
*/
enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
/**
* kvm_tlb_flush_vmid_range() - Invalidate/flush a range of TLB entries
*
* @mmu: Stage-2 KVM MMU struct
* @addr: The base Intermediate physical address from which to invalidate
* @size: Size of the range from the base to invalidate
*/
void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
phys_addr_t addr, size_t size);
#endif /* __ARM64_KVM_PGTABLE_H__ */

View File

@ -278,14 +278,77 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
*/
#define MAX_TLBI_OPS PTRS_PER_PTE
/*
* __flush_tlb_range_op - Perform TLBI operation upon a range
*
* @op: TLBI instruction that operates on a range (has 'r' prefix)
* @start: The start address of the range
* @pages: Range as the number of pages from 'start'
* @stride: Flush granularity
* @asid: The ASID of the task (0 for IPA instructions)
* @tlb_level: Translation Table level hint, if known
* @tlbi_user: If 'true', call an additional __tlbi_user()
* (typically for user ASIDs). 'flase' for IPA instructions
*
* When the CPU does not support TLB range operations, flush the TLB
* entries one by one at the granularity of 'stride'. If the TLB
* range ops are supported, then:
*
* 1. If 'pages' is odd, flush the first page through non-range
* operations;
*
* 2. For remaining pages: the minimum range granularity is decided
* by 'scale', so multiple range TLBI operations may be required.
* Start from scale = 0, flush the corresponding number of pages
* ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
* until no pages left.
*
* Note that certain ranges can be represented by either num = 31 and
* scale or num = 0 and scale + 1. The loop below favours the latter
* since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
*/
#define __flush_tlb_range_op(op, start, pages, stride, \
asid, tlb_level, tlbi_user) \
do { \
int num = 0; \
int scale = 0; \
unsigned long addr; \
\
while (pages > 0) { \
if (!system_supports_tlb_range() || \
pages % 2 == 1) { \
addr = __TLBI_VADDR(start, asid); \
__tlbi_level(op, addr, tlb_level); \
if (tlbi_user) \
__tlbi_user_level(op, addr, tlb_level); \
start += stride; \
pages -= stride >> PAGE_SHIFT; \
continue; \
} \
\
num = __TLBI_RANGE_NUM(pages, scale); \
if (num >= 0) { \
addr = __TLBI_VADDR_RANGE(start, asid, scale, \
num, tlb_level); \
__tlbi(r##op, addr); \
if (tlbi_user) \
__tlbi_user(r##op, addr); \
start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
pages -= __TLBI_RANGE_PAGES(num, scale); \
} \
scale++; \
} \
} while (0)
#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
__flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
static inline void __flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
unsigned long stride, bool last_level,
int tlb_level)
{
int num = 0;
int scale = 0;
unsigned long asid, addr, pages;
unsigned long asid, pages;
start = round_down(start, stride);
end = round_up(end, stride);
@ -307,56 +370,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
dsb(ishst);
asid = ASID(vma->vm_mm);
/*
* When the CPU does not support TLB range operations, flush the TLB
* entries one by one at the granularity of 'stride'. If the TLB
* range ops are supported, then:
*
* 1. If 'pages' is odd, flush the first page through non-range
* operations;
*
* 2. For remaining pages: the minimum range granularity is decided
* by 'scale', so multiple range TLBI operations may be required.
* Start from scale = 0, flush the corresponding number of pages
* ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
* until no pages left.
*
* Note that certain ranges can be represented by either num = 31 and
* scale or num = 0 and scale + 1. The loop below favours the latter
* since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
*/
while (pages > 0) {
if (!system_supports_tlb_range() ||
pages % 2 == 1) {
addr = __TLBI_VADDR(start, asid);
if (last_level) {
__tlbi_level(vale1is, addr, tlb_level);
__tlbi_user_level(vale1is, addr, tlb_level);
} else {
__tlbi_level(vae1is, addr, tlb_level);
__tlbi_user_level(vae1is, addr, tlb_level);
}
start += stride;
pages -= stride >> PAGE_SHIFT;
continue;
}
if (last_level)
__flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true);
else
__flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
num = __TLBI_RANGE_NUM(pages, scale);
if (num >= 0) {
addr = __TLBI_VADDR_RANGE(start, asid, scale,
num, tlb_level);
if (last_level) {
__tlbi(rvale1is, addr);
__tlbi_user(rvale1is, addr);
} else {
__tlbi(rvae1is, addr);
__tlbi_user(rvae1is, addr);
}
start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
pages -= __TLBI_RANGE_PAGES(num, scale);
}
scale++;
}
dsb(ish);
}

View File

@ -25,7 +25,6 @@ menuconfig KVM
select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
select KVM_MMIO
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_XFER_TO_GUEST_WORK

View File

@ -1534,12 +1534,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
}
void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
const struct kvm_memory_slot *memslot)
{
kvm_flush_remote_tlbs(kvm);
}
static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
struct kvm_arm_device_addr *dev_addr)
{

View File

@ -135,6 +135,16 @@ static void handle___kvm_tlb_flush_vmid_ipa_nsh(struct kvm_cpu_context *host_ctx
__kvm_tlb_flush_vmid_ipa_nsh(kern_hyp_va(mmu), ipa, level);
}
static void
handle___kvm_tlb_flush_vmid_range(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
DECLARE_REG(phys_addr_t, start, host_ctxt, 2);
DECLARE_REG(unsigned long, pages, host_ctxt, 3);
__kvm_tlb_flush_vmid_range(kern_hyp_va(mmu), start, pages);
}
static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
@ -327,6 +337,7 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa_nsh),
HANDLE_FUNC(__kvm_tlb_flush_vmid),
HANDLE_FUNC(__kvm_tlb_flush_vmid_range),
HANDLE_FUNC(__kvm_flush_cpu_context),
HANDLE_FUNC(__kvm_timer_set_cntvoff),
HANDLE_FUNC(__vgic_v3_read_vmcr),

View File

@ -182,6 +182,36 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
__tlb_switch_to_host(&cxt);
}
void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
phys_addr_t start, unsigned long pages)
{
struct tlb_inv_context cxt;
unsigned long stride;
/*
* Since the range of addresses may not be mapped at
* the same level, assume the worst case as PAGE_SIZE
*/
stride = PAGE_SIZE;
start = round_down(start, stride);
/* Switch to requested VMID */
__tlb_switch_to_guest(mmu, &cxt, false);
__flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
dsb(ish);
__tlbi(vmalle1is);
dsb(ish);
isb();
/* See the comment in __kvm_tlb_flush_vmid_ipa() */
if (icache_is_vpipt())
icache_inval_all_pou();
__tlb_switch_to_host(&cxt);
}
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
{
struct tlb_inv_context cxt;

View File

@ -670,6 +670,26 @@ static bool stage2_has_fwb(struct kvm_pgtable *pgt)
return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);
}
void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
phys_addr_t addr, size_t size)
{
unsigned long pages, inval_pages;
if (!system_supports_tlb_range()) {
kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
return;
}
pages = size >> PAGE_SHIFT;
while (pages > 0) {
inval_pages = min(pages, MAX_TLBI_RANGE_PAGES);
kvm_call_hyp(__kvm_tlb_flush_vmid_range, mmu, addr, inval_pages);
addr += inval_pages << PAGE_SHIFT;
pages -= inval_pages;
}
}
#define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
@ -786,7 +806,8 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
* evicted pte value (if any).
*/
if (kvm_pte_table(ctx->old, ctx->level))
kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
kvm_tlb_flush_vmid_range(mmu, ctx->addr,
kvm_granule_size(ctx->level));
else if (kvm_pte_valid(ctx->old))
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
ctx->addr, ctx->level);
@ -810,16 +831,36 @@ static void stage2_make_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t n
smp_store_release(ctx->ptep, new);
}
static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu,
struct kvm_pgtable_mm_ops *mm_ops)
static bool stage2_unmap_defer_tlb_flush(struct kvm_pgtable *pgt)
{
/*
* Clear the existing PTE, and perform break-before-make with
* TLB maintenance if it was valid.
* If FEAT_TLBIRANGE is implemented, defer the individual
* TLB invalidations until the entire walk is finished, and
* then use the range-based TLBI instructions to do the
* invalidations. Condition deferred TLB invalidation on the
* system supporting FWB as the optimization is entirely
* pointless when the unmap walker needs to perform CMOs.
*/
return system_supports_tlb_range() && stage2_has_fwb(pgt);
}
static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
struct kvm_s2_mmu *mmu,
struct kvm_pgtable_mm_ops *mm_ops)
{
struct kvm_pgtable *pgt = ctx->arg;
/*
* Clear the existing PTE, and perform break-before-make if it was
* valid. Depending on the system support, defer the TLB maintenance
* for the same until the entire unmap walk is completed.
*/
if (kvm_pte_valid(ctx->old)) {
kvm_clear_pte(ctx->ptep);
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level);
if (!stage2_unmap_defer_tlb_flush(pgt))
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
ctx->addr, ctx->level);
}
mm_ops->put_page(ctx->ptep);
@ -1077,7 +1118,7 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
* block entry and rely on the remaining portions being faulted
* back lazily.
*/
stage2_put_pte(ctx, mmu, mm_ops);
stage2_unmap_put_pte(ctx, mmu, mm_ops);
if (need_flush && mm_ops->dcache_clean_inval_poc)
mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops),
@ -1091,13 +1132,19 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
{
int ret;
struct kvm_pgtable_walker walker = {
.cb = stage2_unmap_walker,
.arg = pgt,
.flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
};
return kvm_pgtable_walk(pgt, addr, size, &walker);
ret = kvm_pgtable_walk(pgt, addr, size, &walker);
if (stage2_unmap_defer_tlb_flush(pgt))
/* Perform the deferred TLB invalidations */
kvm_tlb_flush_vmid_range(pgt->mmu, addr, size);
return ret;
}
struct stage2_attr_data {

View File

@ -143,6 +143,34 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
__tlb_switch_to_host(&cxt);
}
void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
phys_addr_t start, unsigned long pages)
{
struct tlb_inv_context cxt;
unsigned long stride;
/*
* Since the range of addresses may not be mapped at
* the same level, assume the worst case as PAGE_SIZE
*/
stride = PAGE_SIZE;
start = round_down(start, stride);
dsb(ishst);
/* Switch to requested VMID */
__tlb_switch_to_guest(mmu, &cxt);
__flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
dsb(ish);
__tlbi(vmalle1is);
dsb(ish);
isb();
__tlb_switch_to_host(&cxt);
}
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
{
struct tlb_inv_context cxt;

View File

@ -161,15 +161,23 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
}
/**
* kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8
* kvm_arch_flush_remote_tlbs() - flush all VM TLB entries for v7/8
* @kvm: pointer to kvm structure.
*
* Interface to HYP function to flush all VM TLB entries
*/
void kvm_flush_remote_tlbs(struct kvm *kvm)
int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
{
++kvm->stat.generic.remote_tlb_flush_requests;
kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
return 0;
}
int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
gfn_t gfn, u64 nr_pages)
{
kvm_tlb_flush_vmid_range(&kvm->arch.mmu,
gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
return 0;
}
static bool kvm_is_device_pfn(unsigned long pfn)
@ -1075,7 +1083,7 @@ static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
write_lock(&kvm->mmu_lock);
stage2_wp_range(&kvm->arch.mmu, start, end);
write_unlock(&kvm->mmu_lock);
kvm_flush_remote_tlbs(kvm);
kvm_flush_remote_tlbs_memslot(kvm, memslot);
}
/**

View File

@ -896,7 +896,6 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
int kvm_arch_flush_remote_tlb(struct kvm *kvm);
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
#endif /* __MIPS_KVM_HOST_H__ */

View File

@ -199,7 +199,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
/* Flush slot from GPA */
kvm_mips_flush_gpa_pt(kvm, slot->base_gfn,
slot->base_gfn + slot->npages - 1);
kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
kvm_flush_remote_tlbs_memslot(kvm, slot);
spin_unlock(&kvm->mmu_lock);
}
@ -235,7 +235,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
needs_flush = kvm_mips_mkclean_gpa_pt(kvm, new->base_gfn,
new->base_gfn + new->npages - 1);
if (needs_flush)
kvm_arch_flush_remote_tlbs_memslot(kvm, new);
kvm_flush_remote_tlbs_memslot(kvm, new);
spin_unlock(&kvm->mmu_lock);
}
}
@ -981,18 +981,12 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
}
int kvm_arch_flush_remote_tlb(struct kvm *kvm)
int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
{
kvm_mips_callbacks->prepare_flush_shadow(kvm);
return 1;
}
void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
const struct kvm_memory_slot *memslot)
{
kvm_flush_remote_tlbs(kvm);
}
int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
{
int r;

View File

@ -406,12 +406,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
{
}
void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
const struct kvm_memory_slot *memslot)
{
kvm_flush_remote_tlbs(kvm);
}
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free)
{
}

View File

@ -1794,8 +1794,8 @@ static inline struct kvm *kvm_arch_alloc_vm(void)
#define __KVM_HAVE_ARCH_VM_FREE
void kvm_arch_free_vm(struct kvm *kvm);
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
{
if (kvm_x86_ops.flush_remote_tlbs &&
!static_call(kvm_x86_flush_remote_tlbs)(kvm))
@ -1804,6 +1804,8 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
return -ENOTSUPP;
}
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
#define kvm_arch_pmi_in_guest(vcpu) \
((vcpu) && (vcpu)->arch.handling_intr_from_guest)

View File

@ -278,16 +278,12 @@ static inline bool kvm_available_flush_remote_tlbs_range(void)
return kvm_x86_ops.flush_remote_tlbs_range;
}
void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn,
gfn_t nr_pages)
int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages)
{
int ret = -EOPNOTSUPP;
if (!kvm_x86_ops.flush_remote_tlbs_range)
return -EOPNOTSUPP;
if (kvm_x86_ops.flush_remote_tlbs_range)
ret = static_call(kvm_x86_flush_remote_tlbs_range)(kvm, start_gfn,
nr_pages);
if (ret)
kvm_flush_remote_tlbs(kvm);
return static_call(kvm_x86_flush_remote_tlbs_range)(kvm, gfn, nr_pages);
}
static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index);
@ -6670,7 +6666,7 @@ static void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm,
*/
if (walk_slot_rmaps(kvm, slot, kvm_mmu_zap_collapsible_spte,
PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL - 1, true))
kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
kvm_flush_remote_tlbs_memslot(kvm, slot);
}
void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
@ -6689,20 +6685,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
}
}
void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
const struct kvm_memory_slot *memslot)
{
/*
* All current use cases for flushing the TLBs for a specific memslot
* related to dirty logging, and many do the TLB flush out of mmu_lock.
* The interaction between the various operations on memslot must be
* serialized by slots_locks to ensure the TLB flush from one operation
* is observed by any other operation on the same memslot.
*/
lockdep_assert_held(&kvm->slots_lock);
kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages);
}
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
const struct kvm_memory_slot *memslot)
{

View File

@ -170,9 +170,6 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
struct kvm_memory_slot *slot, u64 gfn,
int min_level);
void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn,
gfn_t nr_pages);
/* Flush the given page (huge or not) of guest memory. */
static inline void kvm_flush_remote_tlbs_gfn(struct kvm *kvm, gfn_t gfn, int level)
{

View File

@ -12751,7 +12751,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
* See is_writable_pte() for more details (the case involving
* access-tracked SPTEs is particularly relevant).
*/
kvm_arch_flush_remote_tlbs_memslot(kvm, new);
kvm_flush_remote_tlbs_memslot(kvm, new);
}
}

View File

@ -1359,6 +1359,9 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target);
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode);
void kvm_flush_remote_tlbs(struct kvm *kvm);
void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages);
void kvm_flush_remote_tlbs_memslot(struct kvm *kvm,
const struct kvm_memory_slot *memslot);
#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min);
@ -1387,10 +1390,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
unsigned long mask);
void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot);
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
const struct kvm_memory_slot *memslot);
#else /* !CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */
#ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log);
int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log,
int *is_dirty, struct kvm_memory_slot **memslot);
@ -1479,11 +1479,23 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
}
#endif
#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
{
return -ENOTSUPP;
}
#else
int kvm_arch_flush_remote_tlbs(struct kvm *kvm);
#endif
#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
gfn_t gfn, u64 nr_pages)
{
return -EOPNOTSUPP;
}
#else
int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages);
#endif
#ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA

View File

@ -62,9 +62,6 @@ config HAVE_KVM_CPU_RELAX_INTERCEPT
config KVM_VFIO
bool
config HAVE_KVM_ARCH_TLB_FLUSH_ALL
bool
config HAVE_KVM_INVALID_WAKEUPS
bool

View File

@ -345,7 +345,6 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
}
EXPORT_SYMBOL_GPL(kvm_make_all_cpus_request);
#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
void kvm_flush_remote_tlbs(struct kvm *kvm)
{
++kvm->stat.generic.remote_tlb_flush_requests;
@ -361,12 +360,38 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
* kvm_make_all_cpus_request() reads vcpu->mode. We reuse that
* barrier here.
*/
if (!kvm_arch_flush_remote_tlb(kvm)
if (!kvm_arch_flush_remote_tlbs(kvm)
|| kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
++kvm->stat.generic.remote_tlb_flush;
}
EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
#endif
void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages)
{
if (!kvm_arch_flush_remote_tlbs_range(kvm, gfn, nr_pages))
return;
/*
* Fall back to a flushing entire TLBs if the architecture range-based
* TLB invalidation is unsupported or can't be performed for whatever
* reason.
*/
kvm_flush_remote_tlbs(kvm);
}
void kvm_flush_remote_tlbs_memslot(struct kvm *kvm,
const struct kvm_memory_slot *memslot)
{
/*
* All current use cases for flushing the TLBs for a specific memslot
* are related to dirty logging, and many do the TLB flush out of
* mmu_lock. The interaction between the various operations on memslot
* must be serialized by slots_locks to ensure the TLB flush from one
* operation is observed by any other operation on the same memslot.
*/
lockdep_assert_held(&kvm->slots_lock);
kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages);
}
static void kvm_flush_shadow_all(struct kvm *kvm)
{
@ -2180,7 +2205,7 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
}
if (flush)
kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
kvm_flush_remote_tlbs_memslot(kvm, memslot);
if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
return -EFAULT;
@ -2297,7 +2322,7 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
KVM_MMU_UNLOCK(kvm);
if (flush)
kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
kvm_flush_remote_tlbs_memslot(kvm, memslot);
return 0;
}