KVM: x86/mmu: always take tdp_mmu_pages_lock

It is cheap to take tdp_mmu_pages_lock in all write-side critical sections. We already do it all the time when zapping with read_lock(), so it is not a problem to do it from the kvm_tdp_mmu_zap_all() path (aka kvm_arch_flush_shadow_all(), aka VM destruction and MMU notifier release). Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Link: https://lore.kernel.org/r/20231125083400.1399197-4-pbonzini@redhat.com Signed-off-by: Sean Christopherson <seanjc@google.com>
2024-11-10 14:11:52 +00:00 · 2023-11-25 03:33:59 -05:00 · 2023-11-25 03:33:59 -05:00 · 250ce1b4d2
commit 250ce1b4d2
parent 484dd27c06
3 changed files with 13 additions and 29 deletions
--- a/Documentation/virt/kvm/locking.rst
+++ b/Documentation/virt/kvm/locking.rst
@ -43,10 +43,9 @@ On x86:

 - vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock and kvm->arch.xen.xen_lock

- kvm->arch.mmu_lock is an rwlock.  kvm->arch.tdp_mmu_pages_lock and
-  kvm->arch.mmu_unsync_pages_lock are taken inside kvm->arch.mmu_lock, and
-  cannot be taken without already holding kvm->arch.mmu_lock (typically with
-  ``read_lock`` for the TDP MMU, thus the need for additional spinlocks).
+- kvm->arch.mmu_lock is an rwlock; critical sections for
+  kvm->arch.tdp_mmu_pages_lock and kvm->arch.mmu_unsync_pages_lock must
+  also take kvm->arch.mmu_lock

 Everything else is a leaf: no other lock is taken inside the critical
 sections.
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@ -1407,9 +1407,8 @@ struct kvm_arch {
 	 *	the MMU lock in read mode + RCU or
 	 *	the MMU lock in write mode
 	 *
-	 * For writes, this list is protected by:
-	 *	the MMU lock in read mode + the tdp_mmu_pages_lock or
-	 *	the MMU lock in write mode
+	 * For writes, this list is protected by tdp_mmu_pages_lock; see
+	 * below for the details.
 	 *
 	 * Roots will remain in the list until their tdp_mmu_root_count
 	 * drops to zero, at which point the thread that decremented the
@ -1426,8 +1425,10 @@ struct kvm_arch {
 	 *  - possible_nx_huge_pages;
 	 *  - the possible_nx_huge_page_link field of kvm_mmu_page structs used
 	 *    by the TDP MMU
-	 * It is acceptable, but not necessary, to acquire this lock when
-	 * the thread holds the MMU lock in write mode.
+	 * Because the lock is only taken within the MMU lock, strictly
+	 * speaking it is redundant to acquire this lock when the thread
+	 * holds the MMU lock in write mode.  However it often simplifies
+	 * the code to do so.
 	 */
 	spinlock_t tdp_mmu_pages_lock;
 #endif /* CONFIG_X86_64 */
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@ -75,12 +75,6 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head)

 void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root)
 {
-	/*
-	 * Either read or write is okay, but mmu_lock must be held because
-	 * writers are not required to take tdp_mmu_pages_lock.
-	 */
-	lockdep_assert_held(&kvm->mmu_lock);
-
 	if (!refcount_dec_and_test(&root->tdp_mmu_root_count))
 		return;

@ -281,27 +275,17 @@ static void tdp_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 *
 * @kvm: kvm instance
 * @sp: the page to be removed
- * @shared: This operation may not be running under the exclusive use of
- *	    the MMU lock and the operation must synchronize with other
- *	    threads that might be adding or removing pages.
 */
-static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp,
-			      bool shared)
+static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	tdp_unaccount_mmu_page(kvm, sp);

 	if (!sp->nx_huge_page_disallowed)
 		return;

-	if (shared)
 	spin_lock(&kvm->arch.tdp_mmu_pages_lock);
-	else
-		lockdep_assert_held_write(&kvm->mmu_lock);
-
 	sp->nx_huge_page_disallowed = false;
 	untrack_possible_nx_huge_page(kvm, sp);
-
-	if (shared)
 	spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
 }

@ -331,7 +315,7 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared)

 	trace_kvm_mmu_prepare_zap_page(sp);

-	tdp_mmu_unlink_sp(kvm, sp, shared);
+	tdp_mmu_unlink_sp(kvm, sp);

 	for (i = 0; i < SPTE_ENT_PER_PAGE; i++) {
 		tdp_ptep_t sptep = pt + i;