mirror of
https://github.com/torvalds/linux.git
synced 2024-11-10 14:11:52 +00:00
26 hotfixes. 8 are for issues which were introduced during this -rc
cycle, 18 are for earlier issues, and are cc:stable. -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCYzH+NgAKCRDdBJ7gKXxA ju4AAQDrFWErVp+ra5P66SSbiFmm8NAW1awt4nHwAPcihNf3yQD/eQcB3w2q0Dm1 9HjsyEVkTYIeaJSAbCraDnMwUdWTIgY= =p5+0 -----END PGP SIGNATURE----- Merge tag 'mm-hotfixes-stable-2022-09-26' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Pull last (?) hotfixes from Andrew Morton: "26 hotfixes. 8 are for issues which were introduced during this -rc cycle, 18 are for earlier issues, and are cc:stable" * tag 'mm-hotfixes-stable-2022-09-26' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (26 commits) x86/uaccess: avoid check_object_size() in copy_from_user_nmi() mm/page_isolation: fix isolate_single_pageblock() isolation behavior mm,hwpoison: check mm when killing accessing process mm/hugetlb: correct demote page offset logic mm: prevent page_frag_alloc() from corrupting the memory mm: bring back update_mmu_cache() to finish_fault() frontswap: don't call ->init if no ops are registered mm/huge_memory: use pfn_to_online_page() in split_huge_pages_all() mm: fix madivse_pageout mishandling on non-LRU page powerpc/64s/radix: don't need to broadcast IPI for radix pmd collapse flush mm: gup: fix the fast GUP race against THP collapse mm: fix dereferencing possible ERR_PTR vmscan: check folio_test_private(), not folio_get_private() mm: fix VM_BUG_ON in __delete_from_swap_cache() tools: fix compilation after gfp_types.h split mm/damon/dbgfs: fix memory leak when using debugfs_lookup() mm/migrate_device.c: copy pte dirty bit to page mm/migrate_device.c: add missing flush_cache_page() mm/migrate_device.c: flush TLB while holding PTL x86/mm: disable instrumentations of mm/pgprot.c ...
This commit is contained in:
commit
3800a713b6
@ -937,15 +937,6 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addre
|
||||
pmd = *pmdp;
|
||||
pmd_clear(pmdp);
|
||||
|
||||
/*
|
||||
* pmdp collapse_flush need to ensure that there are no parallel gup
|
||||
* walk after this call. This is needed so that we can have stable
|
||||
* page ref count when collapsing a page. We don't allow a collapse page
|
||||
* if we have gup taken on the page. We can ensure that by sending IPI
|
||||
* because gup walk happens with IRQ disabled.
|
||||
*/
|
||||
serialize_against_pte_lookup(vma->vm_mm);
|
||||
|
||||
radix__flush_tlb_collapsed_pmd(vma->vm_mm, address);
|
||||
|
||||
return pmd;
|
||||
|
@ -44,7 +44,7 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
|
||||
* called from other contexts.
|
||||
*/
|
||||
pagefault_disable();
|
||||
ret = __copy_from_user_inatomic(to, from, n);
|
||||
ret = raw_copy_from_user(to, from, n);
|
||||
pagefault_enable();
|
||||
|
||||
return ret;
|
||||
|
@ -4,10 +4,12 @@ KCOV_INSTRUMENT_tlb.o := n
|
||||
KCOV_INSTRUMENT_mem_encrypt.o := n
|
||||
KCOV_INSTRUMENT_mem_encrypt_amd.o := n
|
||||
KCOV_INSTRUMENT_mem_encrypt_identity.o := n
|
||||
KCOV_INSTRUMENT_pgprot.o := n
|
||||
|
||||
KASAN_SANITIZE_mem_encrypt.o := n
|
||||
KASAN_SANITIZE_mem_encrypt_amd.o := n
|
||||
KASAN_SANITIZE_mem_encrypt_identity.o := n
|
||||
KASAN_SANITIZE_pgprot.o := n
|
||||
|
||||
# Disable KCSAN entirely, because otherwise we get warnings that some functions
|
||||
# reference __initdata sections.
|
||||
@ -17,6 +19,7 @@ ifdef CONFIG_FUNCTION_TRACER
|
||||
CFLAGS_REMOVE_mem_encrypt.o = -pg
|
||||
CFLAGS_REMOVE_mem_encrypt_amd.o = -pg
|
||||
CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
|
||||
CFLAGS_REMOVE_pgprot.o = -pg
|
||||
endif
|
||||
|
||||
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o mmap.o \
|
||||
|
@ -2092,7 +2092,8 @@ get_ctx_vol_failed:
|
||||
// TODO: Initialize security.
|
||||
/* Get the extended system files' directory inode. */
|
||||
vol->extend_ino = ntfs_iget(sb, FILE_Extend);
|
||||
if (IS_ERR(vol->extend_ino) || is_bad_inode(vol->extend_ino)) {
|
||||
if (IS_ERR(vol->extend_ino) || is_bad_inode(vol->extend_ino) ||
|
||||
!S_ISDIR(vol->extend_ino->i_mode)) {
|
||||
if (!IS_ERR(vol->extend_ino))
|
||||
iput(vol->extend_ino);
|
||||
ntfs_error(sb, "Failed to load $Extend.");
|
||||
|
@ -175,13 +175,13 @@ xfs_dax_notify_failure(
|
||||
u64 ddev_start;
|
||||
u64 ddev_end;
|
||||
|
||||
if (!(mp->m_sb.sb_flags & SB_BORN)) {
|
||||
if (!(mp->m_super->s_flags & SB_BORN)) {
|
||||
xfs_warn(mp, "filesystem is not ready for notify_failure()!");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_daxdev == dax_dev) {
|
||||
xfs_warn(mp,
|
||||
xfs_debug(mp,
|
||||
"notify_failure() not supported on realtime device!");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
@ -194,7 +194,7 @@ xfs_dax_notify_failure(
|
||||
}
|
||||
|
||||
if (!xfs_has_rmapbt(mp)) {
|
||||
xfs_warn(mp, "notify_failure() needs rmapbt enabled!");
|
||||
xfs_debug(mp, "notify_failure() needs rmapbt enabled!");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
|
@ -139,6 +139,11 @@ struct dev_pagemap {
|
||||
};
|
||||
};
|
||||
|
||||
static inline bool pgmap_has_memory_failure(struct dev_pagemap *pgmap)
|
||||
{
|
||||
return pgmap->ops && pgmap->ops->memory_failure;
|
||||
}
|
||||
|
||||
static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap)
|
||||
{
|
||||
if (pgmap->flags & PGMAP_ALTMAP_VALID)
|
||||
|
@ -884,6 +884,7 @@ static int dbgfs_rm_context(char *name)
|
||||
struct dentry *root, *dir, **new_dirs;
|
||||
struct damon_ctx **new_ctxs;
|
||||
int i, j;
|
||||
int ret = 0;
|
||||
|
||||
if (damon_nr_running_ctxs())
|
||||
return -EBUSY;
|
||||
@ -898,14 +899,16 @@ static int dbgfs_rm_context(char *name)
|
||||
|
||||
new_dirs = kmalloc_array(dbgfs_nr_ctxs - 1, sizeof(*dbgfs_dirs),
|
||||
GFP_KERNEL);
|
||||
if (!new_dirs)
|
||||
return -ENOMEM;
|
||||
if (!new_dirs) {
|
||||
ret = -ENOMEM;
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
new_ctxs = kmalloc_array(dbgfs_nr_ctxs - 1, sizeof(*dbgfs_ctxs),
|
||||
GFP_KERNEL);
|
||||
if (!new_ctxs) {
|
||||
kfree(new_dirs);
|
||||
return -ENOMEM;
|
||||
ret = -ENOMEM;
|
||||
goto out_new_dirs;
|
||||
}
|
||||
|
||||
for (i = 0, j = 0; i < dbgfs_nr_ctxs; i++) {
|
||||
@ -925,7 +928,13 @@ static int dbgfs_rm_context(char *name)
|
||||
dbgfs_ctxs = new_ctxs;
|
||||
dbgfs_nr_ctxs--;
|
||||
|
||||
return 0;
|
||||
goto out_dput;
|
||||
|
||||
out_new_dirs:
|
||||
kfree(new_dirs);
|
||||
out_dput:
|
||||
dput(dir);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t dbgfs_rm_context_write(struct file *file,
|
||||
|
@ -125,6 +125,9 @@ void frontswap_init(unsigned type, unsigned long *map)
|
||||
* p->frontswap set to something valid to work properly.
|
||||
*/
|
||||
frontswap_map_set(sis, map);
|
||||
|
||||
if (!frontswap_enabled())
|
||||
return;
|
||||
frontswap_ops->init(type);
|
||||
}
|
||||
|
||||
|
34
mm/gup.c
34
mm/gup.c
@ -2345,8 +2345,28 @@ static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start,
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
|
||||
static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
|
||||
unsigned int flags, struct page **pages, int *nr)
|
||||
/*
|
||||
* Fast-gup relies on pte change detection to avoid concurrent pgtable
|
||||
* operations.
|
||||
*
|
||||
* To pin the page, fast-gup needs to do below in order:
|
||||
* (1) pin the page (by prefetching pte), then (2) check pte not changed.
|
||||
*
|
||||
* For the rest of pgtable operations where pgtable updates can be racy
|
||||
* with fast-gup, we need to do (1) clear pte, then (2) check whether page
|
||||
* is pinned.
|
||||
*
|
||||
* Above will work for all pte-level operations, including THP split.
|
||||
*
|
||||
* For THP collapse, it's a bit more complicated because fast-gup may be
|
||||
* walking a pgtable page that is being freed (pte is still valid but pmd
|
||||
* can be cleared already). To avoid race in such condition, we need to
|
||||
* also check pmd here to make sure pmd doesn't change (corresponds to
|
||||
* pmdp_collapse_flush() in the THP collapse code path).
|
||||
*/
|
||||
static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
|
||||
unsigned long end, unsigned int flags,
|
||||
struct page **pages, int *nr)
|
||||
{
|
||||
struct dev_pagemap *pgmap = NULL;
|
||||
int nr_start = *nr, ret = 0;
|
||||
@ -2392,7 +2412,8 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
|
||||
goto pte_unmap;
|
||||
}
|
||||
|
||||
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
|
||||
if (unlikely(pmd_val(pmd) != pmd_val(*pmdp)) ||
|
||||
unlikely(pte_val(pte) != pte_val(*ptep))) {
|
||||
gup_put_folio(folio, 1, flags);
|
||||
goto pte_unmap;
|
||||
}
|
||||
@ -2439,8 +2460,9 @@ pte_unmap:
|
||||
* get_user_pages_fast_only implementation that can pin pages. Thus it's still
|
||||
* useful to have gup_huge_pmd even if we can't operate on ptes.
|
||||
*/
|
||||
static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
|
||||
unsigned int flags, struct page **pages, int *nr)
|
||||
static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
|
||||
unsigned long end, unsigned int flags,
|
||||
struct page **pages, int *nr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@ -2764,7 +2786,7 @@ static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned lo
|
||||
if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
|
||||
PMD_SHIFT, next, flags, pages, nr))
|
||||
return 0;
|
||||
} else if (!gup_pte_range(pmd, addr, next, flags, pages, nr))
|
||||
} else if (!gup_pte_range(pmd, pmdp, addr, next, flags, pages, nr))
|
||||
return 0;
|
||||
} while (pmdp++, addr = next, addr != end);
|
||||
|
||||
|
@ -2894,11 +2894,9 @@ static void split_huge_pages_all(void)
|
||||
max_zone_pfn = zone_end_pfn(zone);
|
||||
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) {
|
||||
int nr_pages;
|
||||
if (!pfn_valid(pfn))
|
||||
continue;
|
||||
|
||||
page = pfn_to_page(pfn);
|
||||
if (!get_page_unless_zero(page))
|
||||
page = pfn_to_online_page(pfn);
|
||||
if (!page || !get_page_unless_zero(page))
|
||||
continue;
|
||||
|
||||
if (zone != page_zone(page))
|
||||
|
14
mm/hugetlb.c
14
mm/hugetlb.c
@ -3420,6 +3420,7 @@ static int demote_free_huge_page(struct hstate *h, struct page *page)
|
||||
{
|
||||
int i, nid = page_to_nid(page);
|
||||
struct hstate *target_hstate;
|
||||
struct page *subpage;
|
||||
int rc = 0;
|
||||
|
||||
target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order);
|
||||
@ -3453,15 +3454,16 @@ static int demote_free_huge_page(struct hstate *h, struct page *page)
|
||||
mutex_lock(&target_hstate->resize_lock);
|
||||
for (i = 0; i < pages_per_huge_page(h);
|
||||
i += pages_per_huge_page(target_hstate)) {
|
||||
subpage = nth_page(page, i);
|
||||
if (hstate_is_gigantic(target_hstate))
|
||||
prep_compound_gigantic_page_for_demote(page + i,
|
||||
prep_compound_gigantic_page_for_demote(subpage,
|
||||
target_hstate->order);
|
||||
else
|
||||
prep_compound_page(page + i, target_hstate->order);
|
||||
set_page_private(page + i, 0);
|
||||
set_page_refcounted(page + i);
|
||||
prep_new_huge_page(target_hstate, page + i, nid);
|
||||
put_page(page + i);
|
||||
prep_compound_page(subpage, target_hstate->order);
|
||||
set_page_private(subpage, 0);
|
||||
set_page_refcounted(subpage);
|
||||
prep_new_huge_page(target_hstate, subpage, nid);
|
||||
put_page(subpage);
|
||||
}
|
||||
mutex_unlock(&target_hstate->resize_lock);
|
||||
|
||||
|
@ -1083,10 +1083,12 @@ static void collapse_huge_page(struct mm_struct *mm,
|
||||
|
||||
pmd_ptl = pmd_lock(mm, pmd); /* probably unnecessary */
|
||||
/*
|
||||
* After this gup_fast can't run anymore. This also removes
|
||||
* any huge TLB entry from the CPU so we won't allow
|
||||
* huge and small TLB entries for the same virtual address
|
||||
* to avoid the risk of CPU bugs in that area.
|
||||
* This removes any huge TLB entry from the CPU so we won't allow
|
||||
* huge and small TLB entries for the same virtual address to
|
||||
* avoid the risk of CPU bugs in that area.
|
||||
*
|
||||
* Parallel fast GUP is fine since fast GUP will back off when
|
||||
* it detects PMD is changed.
|
||||
*/
|
||||
_pmd = pmdp_collapse_flush(vma, address, pmd);
|
||||
spin_unlock(pmd_ptl);
|
||||
|
@ -451,8 +451,11 @@ regular_page:
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Do not interfere with other mappings of this page */
|
||||
if (page_mapcount(page) != 1)
|
||||
/*
|
||||
* Do not interfere with other mappings of this page and
|
||||
* non-LRU page.
|
||||
*/
|
||||
if (!PageLRU(page) || page_mapcount(page) != 1)
|
||||
continue;
|
||||
|
||||
VM_BUG_ON_PAGE(PageTransCompound(page), page);
|
||||
|
@ -345,13 +345,17 @@ static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
|
||||
* not much we can do. We just print a message and ignore otherwise.
|
||||
*/
|
||||
|
||||
#define FSDAX_INVALID_PGOFF ULONG_MAX
|
||||
|
||||
/*
|
||||
* Schedule a process for later kill.
|
||||
* Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
|
||||
*
|
||||
* Notice: @fsdax_pgoff is used only when @p is a fsdax page.
|
||||
* In other cases, such as anonymous and file-backend page, the address to be
|
||||
* killed can be caculated by @p itself.
|
||||
* Note: @fsdax_pgoff is used only when @p is a fsdax page and a
|
||||
* filesystem with a memory failure handler has claimed the
|
||||
* memory_failure event. In all other cases, page->index and
|
||||
* page->mapping are sufficient for mapping the page back to its
|
||||
* corresponding user virtual address.
|
||||
*/
|
||||
static void add_to_kill(struct task_struct *tsk, struct page *p,
|
||||
pgoff_t fsdax_pgoff, struct vm_area_struct *vma,
|
||||
@ -367,11 +371,7 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
|
||||
|
||||
tk->addr = page_address_in_vma(p, vma);
|
||||
if (is_zone_device_page(p)) {
|
||||
/*
|
||||
* Since page->mapping is not used for fsdax, we need
|
||||
* calculate the address based on the vma.
|
||||
*/
|
||||
if (p->pgmap->type == MEMORY_DEVICE_FS_DAX)
|
||||
if (fsdax_pgoff != FSDAX_INVALID_PGOFF)
|
||||
tk->addr = vma_pgoff_address(fsdax_pgoff, 1, vma);
|
||||
tk->size_shift = dev_pagemap_mapping_shift(vma, tk->addr);
|
||||
} else
|
||||
@ -523,7 +523,8 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
|
||||
if (!page_mapped_in_vma(page, vma))
|
||||
continue;
|
||||
if (vma->vm_mm == t->mm)
|
||||
add_to_kill(t, page, 0, vma, to_kill);
|
||||
add_to_kill(t, page, FSDAX_INVALID_PGOFF, vma,
|
||||
to_kill);
|
||||
}
|
||||
}
|
||||
read_unlock(&tasklist_lock);
|
||||
@ -559,7 +560,8 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
|
||||
* to be informed of all such data corruptions.
|
||||
*/
|
||||
if (vma->vm_mm == t->mm)
|
||||
add_to_kill(t, page, 0, vma, to_kill);
|
||||
add_to_kill(t, page, FSDAX_INVALID_PGOFF, vma,
|
||||
to_kill);
|
||||
}
|
||||
}
|
||||
read_unlock(&tasklist_lock);
|
||||
@ -743,6 +745,9 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn,
|
||||
};
|
||||
priv.tk.tsk = p;
|
||||
|
||||
if (!p->mm)
|
||||
return -EFAULT;
|
||||
|
||||
mmap_read_lock(p->mm);
|
||||
ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops,
|
||||
(void *)&priv);
|
||||
@ -1928,7 +1933,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
|
||||
* Call driver's implementation to handle the memory failure, otherwise
|
||||
* fall back to generic handler.
|
||||
*/
|
||||
if (pgmap->ops->memory_failure) {
|
||||
if (pgmap_has_memory_failure(pgmap)) {
|
||||
rc = pgmap->ops->memory_failure(pgmap, pfn, 1, flags);
|
||||
/*
|
||||
* Fall back to generic handler too if operation is not
|
||||
|
20
mm/memory.c
20
mm/memory.c
@ -4386,14 +4386,20 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
|
||||
|
||||
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
|
||||
vmf->address, &vmf->ptl);
|
||||
ret = 0;
|
||||
/* Re-check under ptl */
|
||||
if (likely(!vmf_pte_changed(vmf)))
|
||||
do_set_pte(vmf, page, vmf->address);
|
||||
else
|
||||
ret = VM_FAULT_NOPAGE;
|
||||
|
||||
update_mmu_tlb(vma, vmf->address, vmf->pte);
|
||||
/* Re-check under ptl */
|
||||
if (likely(!vmf_pte_changed(vmf))) {
|
||||
do_set_pte(vmf, page, vmf->address);
|
||||
|
||||
/* no need to invalidate: a not-present page won't be cached */
|
||||
update_mmu_cache(vma, vmf->address, vmf->pte);
|
||||
|
||||
ret = 0;
|
||||
} else {
|
||||
update_mmu_tlb(vma, vmf->address, vmf->pte);
|
||||
ret = VM_FAULT_NOPAGE;
|
||||
}
|
||||
|
||||
pte_unmap_unlock(vmf->pte, vmf->ptl);
|
||||
return ret;
|
||||
}
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <linux/export.h>
|
||||
#include <linux/memremap.h>
|
||||
#include <linux/migrate.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mm_inline.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include <linux/oom.h>
|
||||
@ -193,10 +194,10 @@ again:
|
||||
bool anon_exclusive;
|
||||
pte_t swp_pte;
|
||||
|
||||
flush_cache_page(vma, addr, pte_pfn(*ptep));
|
||||
anon_exclusive = PageAnon(page) && PageAnonExclusive(page);
|
||||
if (anon_exclusive) {
|
||||
flush_cache_page(vma, addr, pte_pfn(*ptep));
|
||||
ptep_clear_flush(vma, addr, ptep);
|
||||
pte = ptep_clear_flush(vma, addr, ptep);
|
||||
|
||||
if (page_try_share_anon_rmap(page)) {
|
||||
set_pte_at(mm, addr, ptep, pte);
|
||||
@ -206,11 +207,15 @@ again:
|
||||
goto next;
|
||||
}
|
||||
} else {
|
||||
ptep_get_and_clear(mm, addr, ptep);
|
||||
pte = ptep_get_and_clear(mm, addr, ptep);
|
||||
}
|
||||
|
||||
migrate->cpages++;
|
||||
|
||||
/* Set the dirty flag on the folio now the pte is gone. */
|
||||
if (pte_dirty(pte))
|
||||
folio_mark_dirty(page_folio(page));
|
||||
|
||||
/* Setup special migration page table entry */
|
||||
if (mpfn & MIGRATE_PFN_WRITE)
|
||||
entry = make_writable_migration_entry(
|
||||
@ -254,13 +259,14 @@ next:
|
||||
migrate->dst[migrate->npages] = 0;
|
||||
migrate->src[migrate->npages++] = mpfn;
|
||||
}
|
||||
arch_leave_lazy_mmu_mode();
|
||||
pte_unmap_unlock(ptep - 1, ptl);
|
||||
|
||||
/* Only flush the TLB if we actually modified any entries */
|
||||
if (unmapped)
|
||||
flush_tlb_range(walk->vma, start, end);
|
||||
|
||||
arch_leave_lazy_mmu_mode();
|
||||
pte_unmap_unlock(ptep - 1, ptl);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -4708,6 +4708,30 @@ void fs_reclaim_release(gfp_t gfp_mask)
|
||||
EXPORT_SYMBOL_GPL(fs_reclaim_release);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Zonelists may change due to hotplug during allocation. Detect when zonelists
|
||||
* have been rebuilt so allocation retries. Reader side does not lock and
|
||||
* retries the allocation if zonelist changes. Writer side is protected by the
|
||||
* embedded spin_lock.
|
||||
*/
|
||||
static DEFINE_SEQLOCK(zonelist_update_seq);
|
||||
|
||||
static unsigned int zonelist_iter_begin(void)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_MEMORY_HOTREMOVE))
|
||||
return read_seqbegin(&zonelist_update_seq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned int check_retry_zonelist(unsigned int seq)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_MEMORY_HOTREMOVE))
|
||||
return read_seqretry(&zonelist_update_seq, seq);
|
||||
|
||||
return seq;
|
||||
}
|
||||
|
||||
/* Perform direct synchronous page reclaim */
|
||||
static unsigned long
|
||||
__perform_reclaim(gfp_t gfp_mask, unsigned int order,
|
||||
@ -5001,6 +5025,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
|
||||
int compaction_retries;
|
||||
int no_progress_loops;
|
||||
unsigned int cpuset_mems_cookie;
|
||||
unsigned int zonelist_iter_cookie;
|
||||
int reserve_flags;
|
||||
|
||||
/*
|
||||
@ -5011,11 +5036,12 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
|
||||
(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
|
||||
gfp_mask &= ~__GFP_ATOMIC;
|
||||
|
||||
retry_cpuset:
|
||||
restart:
|
||||
compaction_retries = 0;
|
||||
no_progress_loops = 0;
|
||||
compact_priority = DEF_COMPACT_PRIORITY;
|
||||
cpuset_mems_cookie = read_mems_allowed_begin();
|
||||
zonelist_iter_cookie = zonelist_iter_begin();
|
||||
|
||||
/*
|
||||
* The fast path uses conservative alloc_flags to succeed only until
|
||||
@ -5187,9 +5213,13 @@ retry:
|
||||
goto retry;
|
||||
|
||||
|
||||
/* Deal with possible cpuset update races before we start OOM killing */
|
||||
if (check_retry_cpuset(cpuset_mems_cookie, ac))
|
||||
goto retry_cpuset;
|
||||
/*
|
||||
* Deal with possible cpuset update races or zonelist updates to avoid
|
||||
* a unnecessary OOM kill.
|
||||
*/
|
||||
if (check_retry_cpuset(cpuset_mems_cookie, ac) ||
|
||||
check_retry_zonelist(zonelist_iter_cookie))
|
||||
goto restart;
|
||||
|
||||
/* Reclaim has failed us, start killing things */
|
||||
page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress);
|
||||
@ -5209,9 +5239,13 @@ retry:
|
||||
}
|
||||
|
||||
nopage:
|
||||
/* Deal with possible cpuset update races before we fail */
|
||||
if (check_retry_cpuset(cpuset_mems_cookie, ac))
|
||||
goto retry_cpuset;
|
||||
/*
|
||||
* Deal with possible cpuset update races or zonelist updates to avoid
|
||||
* a unnecessary OOM kill.
|
||||
*/
|
||||
if (check_retry_cpuset(cpuset_mems_cookie, ac) ||
|
||||
check_retry_zonelist(zonelist_iter_cookie))
|
||||
goto restart;
|
||||
|
||||
/*
|
||||
* Make sure that __GFP_NOFAIL request doesn't leak out and make sure
|
||||
@ -5706,6 +5740,18 @@ refill:
|
||||
/* reset page count bias and offset to start of new frag */
|
||||
nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
|
||||
offset = size - fragsz;
|
||||
if (unlikely(offset < 0)) {
|
||||
/*
|
||||
* The caller is trying to allocate a fragment
|
||||
* with fragsz > PAGE_SIZE but the cache isn't big
|
||||
* enough to satisfy the request, this may
|
||||
* happen in low memory conditions.
|
||||
* We don't release the cache page because
|
||||
* it could make memory pressure worse
|
||||
* so we simply return NULL here.
|
||||
*/
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
nc->pagecnt_bias--;
|
||||
@ -6514,9 +6560,8 @@ static void __build_all_zonelists(void *data)
|
||||
int nid;
|
||||
int __maybe_unused cpu;
|
||||
pg_data_t *self = data;
|
||||
static DEFINE_SPINLOCK(lock);
|
||||
|
||||
spin_lock(&lock);
|
||||
write_seqlock(&zonelist_update_seq);
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
memset(node_load, 0, sizeof(node_load));
|
||||
@ -6553,7 +6598,7 @@ static void __build_all_zonelists(void *data)
|
||||
#endif
|
||||
}
|
||||
|
||||
spin_unlock(&lock);
|
||||
write_sequnlock(&zonelist_update_seq);
|
||||
}
|
||||
|
||||
static noinline void __init
|
||||
|
@ -288,6 +288,7 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
|
||||
* @isolate_before: isolate the pageblock before the boundary_pfn
|
||||
* @skip_isolation: the flag to skip the pageblock isolation in second
|
||||
* isolate_single_pageblock()
|
||||
* @migratetype: migrate type to set in error recovery.
|
||||
*
|
||||
* Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one
|
||||
* pageblock. When not all pageblocks within a page are isolated at the same
|
||||
@ -302,9 +303,9 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
|
||||
* the in-use page then splitting the free page.
|
||||
*/
|
||||
static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
|
||||
gfp_t gfp_flags, bool isolate_before, bool skip_isolation)
|
||||
gfp_t gfp_flags, bool isolate_before, bool skip_isolation,
|
||||
int migratetype)
|
||||
{
|
||||
unsigned char saved_mt;
|
||||
unsigned long start_pfn;
|
||||
unsigned long isolate_pageblock;
|
||||
unsigned long pfn;
|
||||
@ -328,13 +329,13 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
|
||||
start_pfn = max(ALIGN_DOWN(isolate_pageblock, MAX_ORDER_NR_PAGES),
|
||||
zone->zone_start_pfn);
|
||||
|
||||
saved_mt = get_pageblock_migratetype(pfn_to_page(isolate_pageblock));
|
||||
if (skip_isolation) {
|
||||
int mt = get_pageblock_migratetype(pfn_to_page(isolate_pageblock));
|
||||
|
||||
if (skip_isolation)
|
||||
VM_BUG_ON(!is_migrate_isolate(saved_mt));
|
||||
else {
|
||||
ret = set_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt, flags,
|
||||
isolate_pageblock, isolate_pageblock + pageblock_nr_pages);
|
||||
VM_BUG_ON(!is_migrate_isolate(mt));
|
||||
} else {
|
||||
ret = set_migratetype_isolate(pfn_to_page(isolate_pageblock), migratetype,
|
||||
flags, isolate_pageblock, isolate_pageblock + pageblock_nr_pages);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -475,7 +476,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
|
||||
failed:
|
||||
/* restore the original migratetype */
|
||||
if (!skip_isolation)
|
||||
unset_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt);
|
||||
unset_migratetype_isolate(pfn_to_page(isolate_pageblock), migratetype);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
@ -537,7 +538,8 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
|
||||
bool skip_isolation = false;
|
||||
|
||||
/* isolate [isolate_start, isolate_start + pageblock_nr_pages) pageblock */
|
||||
ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false, skip_isolation);
|
||||
ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false,
|
||||
skip_isolation, migratetype);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -545,7 +547,8 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
|
||||
skip_isolation = true;
|
||||
|
||||
/* isolate [isolate_end - pageblock_nr_pages, isolate_end) pageblock */
|
||||
ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true, skip_isolation);
|
||||
ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true,
|
||||
skip_isolation, migratetype);
|
||||
if (ret) {
|
||||
unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
|
||||
return ret;
|
||||
|
@ -285,7 +285,7 @@ static int secretmem_init(void)
|
||||
|
||||
secretmem_mnt = kern_mount(&secretmem_fs);
|
||||
if (IS_ERR(secretmem_mnt))
|
||||
ret = PTR_ERR(secretmem_mnt);
|
||||
return PTR_ERR(secretmem_mnt);
|
||||
|
||||
/* prevent secretmem mappings from ever getting PROT_EXEC */
|
||||
secretmem_mnt->mnt_flags |= MNT_NOEXEC;
|
||||
|
@ -151,7 +151,7 @@ void __delete_from_swap_cache(struct folio *folio,
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
void *entry = xas_store(&xas, shadow);
|
||||
VM_BUG_ON_FOLIO(entry != folio, folio);
|
||||
VM_BUG_ON_PAGE(entry != folio, entry);
|
||||
set_page_private(folio_page(folio, i), 0);
|
||||
xas_next(&xas);
|
||||
}
|
||||
|
@ -2550,8 +2550,8 @@ static void shrink_active_list(unsigned long nr_to_scan,
|
||||
}
|
||||
|
||||
if (unlikely(buffer_heads_over_limit)) {
|
||||
if (folio_get_private(folio) && folio_trylock(folio)) {
|
||||
if (folio_get_private(folio))
|
||||
if (folio_test_private(folio) && folio_trylock(folio)) {
|
||||
if (folio_test_private(folio))
|
||||
filemap_release_folio(folio, 0);
|
||||
folio_unlock(folio);
|
||||
}
|
||||
|
@ -3,26 +3,7 @@
|
||||
#define _TOOLS_INCLUDE_LINUX_GFP_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#define __GFP_BITS_SHIFT 26
|
||||
#define __GFP_BITS_MASK ((gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
|
||||
|
||||
#define __GFP_HIGH 0x20u
|
||||
#define __GFP_IO 0x40u
|
||||
#define __GFP_FS 0x80u
|
||||
#define __GFP_NOWARN 0x200u
|
||||
#define __GFP_ZERO 0x8000u
|
||||
#define __GFP_ATOMIC 0x80000u
|
||||
#define __GFP_ACCOUNT 0x100000u
|
||||
#define __GFP_DIRECT_RECLAIM 0x400000u
|
||||
#define __GFP_KSWAPD_RECLAIM 0x2000000u
|
||||
|
||||
#define __GFP_RECLAIM (__GFP_DIRECT_RECLAIM | __GFP_KSWAPD_RECLAIM)
|
||||
|
||||
#define GFP_ZONEMASK 0x0fu
|
||||
#define GFP_ATOMIC (__GFP_HIGH | __GFP_ATOMIC | __GFP_KSWAPD_RECLAIM)
|
||||
#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
|
||||
#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM)
|
||||
#include <linux/gfp_types.h>
|
||||
|
||||
static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
|
||||
{
|
||||
|
1
tools/include/linux/gfp_types.h
Normal file
1
tools/include/linux/gfp_types.h
Normal file
@ -0,0 +1 @@
|
||||
#include "../../../include/linux/gfp_types.h"
|
Loading…
Reference in New Issue
Block a user