diff --git a/Documentation/mm/arch_pgtable_helpers.rst b/Documentation/mm/arch_pgtable_helpers.rst index 2466d3363af7..ad50ca6f495e 100644 --- a/Documentation/mm/arch_pgtable_helpers.rst +++ b/Documentation/mm/arch_pgtable_helpers.rst @@ -140,7 +140,8 @@ PMD Page Table Helpers +---------------------------+--------------------------------------------------+ | pmd_swp_clear_soft_dirty | Clears a soft dirty swapped PMD | +---------------------------+--------------------------------------------------+ -| pmd_mkinvalid | Invalidates a mapped PMD [1] | +| pmd_mkinvalid | Invalidates a present PMD; do not call for | +| | non-present PMD [1] | +---------------------------+--------------------------------------------------+ | pmd_set_huge | Creates a PMD huge mapping | +---------------------------+--------------------------------------------------+ @@ -196,7 +197,8 @@ PUD Page Table Helpers +---------------------------+--------------------------------------------------+ | pud_mkdevmap | Creates a ZONE_DEVICE mapped PUD | +---------------------------+--------------------------------------------------+ -| pud_mkinvalid | Invalidates a mapped PUD [1] | +| pud_mkinvalid | Invalidates a present PUD; do not call for | +| | non-present PUD [1] | +---------------------------+--------------------------------------------------+ | pud_set_huge | Creates a PUD huge mapping | +---------------------------+--------------------------------------------------+ diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 83823db3488b..2975ea0841ba 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -170,6 +170,7 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, { unsigned long old_pmd; + VM_WARN_ON_ONCE(!pmd_present(*pmdp)); old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, _PAGE_INVALID); flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return __pmd(old_pmd); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 2cb2a2e7b34b..558902edbfec 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1769,8 +1769,10 @@ static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, static inline pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { - pmd_t pmd = __pmd(pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID); + pmd_t pmd; + VM_WARN_ON_ONCE(!pmd_present(*pmdp)); + pmd = __pmd(pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID); return pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd); } diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 19642f7ffb52..8648a50afe88 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -249,6 +249,7 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, { pmd_t old, entry; + VM_WARN_ON_ONCE(!pmd_present(*pmdp)); entry = __pmd(pmd_val(*pmdp) & ~_PAGE_VALID); old = pmdp_establish(vma, address, pmdp, entry); flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 94767c82fc0d..93e54ba91fbf 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -631,6 +631,8 @@ int pmdp_clear_flush_young(struct vm_area_struct *vma, pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { + VM_WARN_ON_ONCE(!pmd_present(*pmdp)); + /* * No flush is necessary. Once an invalid PTE is established, the PTE's * access and dirty bits cannot be updated. diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 08e4f3343bcd..ccdcff73284a 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2430,32 +2430,11 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, return __split_huge_zero_page_pmd(vma, haddr, pmd); } - /* - * Up to this point the pmd is present and huge and userland has the - * whole access to the hugepage during the split (which happens in - * place). If we overwrite the pmd with the not-huge version pointing - * to the pte here (which of course we could if all CPUs were bug - * free), userland could trigger a small page size TLB miss on the - * small sized TLB while the hugepage TLB entry is still established in - * the huge TLB. Some CPU doesn't like that. - * See http://support.amd.com/TechDocs/41322_10h_Rev_Gd.pdf, Erratum - * 383 on page 105. Intel should be safe but is also warns that it's - * only safe if the permission and cache attributes of the two entries - * loaded in the two TLB is identical (which should be the case here). - * But it is generally safer to never allow small and huge TLB entries - * for the same virtual address to be loaded simultaneously. So instead - * of doing "pmd_populate(); flush_pmd_tlb_range();" we first mark the - * current pmd notpresent (atomically because here the pmd_trans_huge - * must remain set at all times on the pmd until the split is complete - * for this pmd), then we flush the SMP TLB and finally we write the - * non-huge version of the pmd entry with pmd_populate. - */ - old_pmd = pmdp_invalidate(vma, haddr, pmd); - - pmd_migration = is_pmd_migration_entry(old_pmd); + pmd_migration = is_pmd_migration_entry(*pmd); if (unlikely(pmd_migration)) { swp_entry_t entry; + old_pmd = *pmd; entry = pmd_to_swp_entry(old_pmd); page = pfn_swap_entry_to_page(entry); write = is_writable_migration_entry(entry); @@ -2466,6 +2445,30 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, soft_dirty = pmd_swp_soft_dirty(old_pmd); uffd_wp = pmd_swp_uffd_wp(old_pmd); } else { + /* + * Up to this point the pmd is present and huge and userland has + * the whole access to the hugepage during the split (which + * happens in place). If we overwrite the pmd with the not-huge + * version pointing to the pte here (which of course we could if + * all CPUs were bug free), userland could trigger a small page + * size TLB miss on the small sized TLB while the hugepage TLB + * entry is still established in the huge TLB. Some CPU doesn't + * like that. See + * http://support.amd.com/TechDocs/41322_10h_Rev_Gd.pdf, Erratum + * 383 on page 105. Intel should be safe but is also warns that + * it's only safe if the permission and cache attributes of the + * two entries loaded in the two TLB is identical (which should + * be the case here). But it is generally safer to never allow + * small and huge TLB entries for the same virtual address to be + * loaded simultaneously. So instead of doing "pmd_populate(); + * flush_pmd_tlb_range();" we first mark the current pmd + * notpresent (atomically because here the pmd_trans_huge must + * remain set at all times on the pmd until the split is + * complete for this pmd), then we flush the SMP TLB and finally + * we write the non-huge version of the pmd entry with + * pmd_populate. + */ + old_pmd = pmdp_invalidate(vma, haddr, pmd); page = pmd_page(old_pmd); folio = page_folio(page); if (pmd_dirty(old_pmd)) { diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 4fcd959dcc4d..a78a4adf711a 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -198,6 +198,7 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { + VM_WARN_ON_ONCE(!pmd_present(*pmdp)); pmd_t old = pmdp_establish(vma, address, pmdp, pmd_mkinvalid(*pmdp)); flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return old; @@ -208,6 +209,7 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { + VM_WARN_ON_ONCE(!pmd_present(*pmdp)); return pmdp_invalidate(vma, address, pmdp); } #endif