KVM: Rename/refactor kvm_is_reserved_pfn() to kvm_pfn_to_refcounted_page()

Rename and refactor kvm_is_reserved_pfn() to kvm_pfn_to_refcounted_page()
to better reflect what KVM is actually checking, and to eliminate extra
pfn_to_page() lookups.  The kvm_release_pfn_*() an kvm_try_get_pfn()
helpers in particular benefit from "refouncted" nomenclature, as it's not
all that obvious why KVM needs to get/put refcounts for some PG_reserved
pages (ZERO_PAGE and ZONE_DEVICE).

Add a comment to call out that the list of exceptions to PG_reserved is
all but guaranteed to be incomplete.  The list has mostly been compiled
by people throwing noodles at KVM and finding out they stick a little too
well, e.g. the ZERO_PAGE's refcount overflowed and ZONE_DEVICE pages
didn't get freed.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220429010416.2788472-10-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Sean Christopherson 2022-04-29 01:04:15 +00:00 committed by Paolo Bonzini
parent 284dc49307
commit b14b2690c5
4 changed files with 63 additions and 22 deletions

View File

@ -534,6 +534,7 @@ static int mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
kvm_pfn_t pfn;
u64 old_spte = *sptep;
int level = sptep_to_sp(sptep)->role.level;
struct page *page;
if (!is_shadow_present_pte(old_spte) ||
!spte_has_volatile_bits(old_spte))
@ -549,11 +550,13 @@ static int mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
pfn = spte_to_pfn(old_spte);
/*
* KVM does not hold the refcount of the page used by
* kvm mmu, before reclaiming the page, we should
* unmap it from mmu first.
* KVM doesn't hold a reference to any pages mapped into the guest, and
* instead uses the mmu_notifier to ensure that KVM unmaps any pages
* before they are reclaimed. Sanity check that, if the pfn is backed
* by a refcounted page, the refcount is elevated.
*/
WARN_ON(!kvm_is_reserved_pfn(pfn) && !page_count(pfn_to_page(pfn)));
page = kvm_pfn_to_refcounted_page(pfn);
WARN_ON(page && !page_count(page));
if (is_accessed_spte(old_spte))
kvm_set_pfn_accessed(pfn);
@ -2881,7 +2884,7 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
if (unlikely(fault->max_level == PG_LEVEL_4K))
return;
if (is_error_noslot_pfn(fault->pfn) || kvm_is_reserved_pfn(fault->pfn))
if (is_error_noslot_pfn(fault->pfn) || !kvm_pfn_to_refcounted_page(fault->pfn))
return;
if (kvm_slot_dirty_track_enabled(slot))
@ -5993,7 +5996,7 @@ restart:
* the guest, and the guest page table is using 4K page size
* mapping if the indirect sp has level = 1.
*/
if (sp->role.direct && !kvm_is_reserved_pfn(pfn) &&
if (sp->role.direct && kvm_pfn_to_refcounted_page(pfn) &&
sp->role.level < kvm_mmu_max_mapping_level(kvm, slot, sp->gfn,
pfn, PG_LEVEL_NUM)) {
pte_list_remove(kvm, rmap_head, sptep);

View File

@ -1751,7 +1751,7 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
*/
pfn = spte_to_pfn(iter.old_spte);
if (kvm_is_reserved_pfn(pfn))
if (!kvm_pfn_to_refcounted_page(pfn))
continue;
max_mapping_level = kvm_mmu_max_mapping_level(kvm, slot,

View File

@ -1570,7 +1570,7 @@ void kvm_arch_sync_events(struct kvm *kvm);
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
bool kvm_is_reserved_pfn(kvm_pfn_t pfn);
struct page *kvm_pfn_to_refcounted_page(kvm_pfn_t pfn);
bool kvm_is_zone_device_page(struct page *page);
struct kvm_irq_ack_notifier {

View File

@ -182,19 +182,36 @@ bool kvm_is_zone_device_page(struct page *page)
return is_zone_device_page(page);
}
bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
/*
* Returns a 'struct page' if the pfn is "valid" and backed by a refcounted
* page, NULL otherwise. Note, the list of refcounted PG_reserved page types
* is likely incomplete, it has been compiled purely through people wanting to
* back guest with a certain type of memory and encountering issues.
*/
struct page *kvm_pfn_to_refcounted_page(kvm_pfn_t pfn)
{
struct page *page;
if (!pfn_valid(pfn))
return NULL;
page = pfn_to_page(pfn);
if (!PageReserved(page))
return page;
/* The ZERO_PAGE(s) is marked PG_reserved, but is refcounted. */
if (is_zero_pfn(pfn))
return page;
/*
* ZONE_DEVICE pages currently set PG_reserved, but from a refcounting
* perspective they are "normal" pages, albeit with slightly different
* usage rules.
*/
if (pfn_valid(pfn))
return PageReserved(pfn_to_page(pfn)) &&
!is_zero_pfn(pfn) &&
!kvm_is_zone_device_page(pfn_to_page(pfn));
if (kvm_is_zone_device_page(page))
return page;
return true;
return NULL;
}
/*
@ -2501,9 +2518,12 @@ static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault)
static int kvm_try_get_pfn(kvm_pfn_t pfn)
{
if (kvm_is_reserved_pfn(pfn))
struct page *page = kvm_pfn_to_refcounted_page(pfn);
if (!page)
return 1;
return get_page_unless_zero(pfn_to_page(pfn));
return get_page_unless_zero(page);
}
static int hva_to_pfn_remapped(struct vm_area_struct *vma,
@ -2728,6 +2748,7 @@ EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic);
*/
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
{
struct page *page;
kvm_pfn_t pfn;
pfn = gfn_to_pfn(kvm, gfn);
@ -2735,10 +2756,11 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
if (is_error_noslot_pfn(pfn))
return KVM_ERR_PTR_BAD_PAGE;
if (kvm_is_reserved_pfn(pfn))
page = kvm_pfn_to_refcounted_page(pfn);
if (!page)
return KVM_ERR_PTR_BAD_PAGE;
return pfn_to_page(pfn);
return page;
}
EXPORT_SYMBOL_GPL(gfn_to_page);
@ -2841,8 +2863,16 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean);
void kvm_release_pfn_clean(kvm_pfn_t pfn)
{
if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn))
kvm_release_page_clean(pfn_to_page(pfn));
struct page *page;
if (is_error_noslot_pfn(pfn))
return;
page = kvm_pfn_to_refcounted_page(pfn);
if (!page)
return;
kvm_release_page_clean(page);
}
EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
@ -2857,8 +2887,16 @@ EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
void kvm_release_pfn_dirty(kvm_pfn_t pfn)
{
if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn))
kvm_release_page_dirty(pfn_to_page(pfn));
struct page *page;
if (is_error_noslot_pfn(pfn))
return;
page = kvm_pfn_to_refcounted_page(pfn);
if (!page)
return;
kvm_release_page_dirty(page);
}
EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);