Merge branch 'akpm' (patches from Andrew)

Merge misx fixes from Andrew Morton:
 "31 patches.

  Subsystems affected by this patch series: hotfixes, mm/pagealloc,
  kexec, ocfs2, lib, mm/slab, mm/slab, mm/slub, mm/swap, mm/pagemap,
  mm/vmalloc, mm/memcg, mm/gup, mm/thp, mm/vmscan, x86,
  mm/memory-hotplug, MAINTAINERS"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (31 commits)
  MAINTAINERS: update info for sparse
  mm/memory_hotplug.c: fix false softlockup during pfn range removal
  mm: remove vmalloc_exec
  arm64: use PAGE_KERNEL_ROX directly in alloc_insn_page
  x86/hyperv: allocate the hypercall page with only read and execute bits
  mm/memory: fix IO cost for anonymous page
  mm/swap: fix for "mm: workingset: age nonresident information alongside anonymous pages"
  mm: workingset: age nonresident information alongside anonymous pages
  doc: THP CoW fault no longer allocate THP
  docs: mm/gup: minor documentation update
  mm/memcontrol.c: prevent missed memory.low load tears
  mm/memcontrol.c: add missed css_put()
  mm: memcontrol: handle div0 crash race condition in memory.low
  mm/vmalloc.c: fix a warning while make xmldocs
  media: omap3isp: remove cacheflush.h
  make asm-generic/cacheflush.h more standalone
  mm/debug_vm_pgtable: fix build failure with powerpc 8xx
  mm/memory.c: properly pte_offset_map_lock/unlock in vm_insert_pages()
  mm: fix swap cache node allocation mask
  slub: cure list_slab_objects() from double fix
  ...
This commit is contained in:
Linus Torvalds 2020-06-26 12:19:36 -07:00
commit 7c902e2730
35 changed files with 165 additions and 161 deletions

View File

@ -1356,8 +1356,8 @@ PAGE_SIZE multiple when read back.
thp_fault_alloc
Number of transparent hugepages which were allocated to satisfy
a page fault, including COW faults. This counter is not present
when CONFIG_TRANSPARENT_HUGEPAGE is not set.
a page fault. This counter is not present when CONFIG_TRANSPARENT_HUGEPAGE
is not set.
thp_collapse_alloc
Number of transparent hugepages which were allocated to allow

View File

@ -305,8 +305,7 @@ monitor how successfully the system is providing huge pages for use.
thp_fault_alloc
is incremented every time a huge page is successfully
allocated to handle a page fault. This applies to both the
first time a page is faulted and for COW faults.
allocated to handle a page fault.
thp_collapse_alloc
is incremented by khugepaged when it has found

View File

@ -33,7 +33,7 @@ all combinations of get*(), pin*(), FOLL_LONGTERM, and more. Also, the
pin_user_pages*() APIs are clearly distinct from the get_user_pages*() APIs, so
that's a natural dividing line, and a good point to make separate wrapper calls.
In other words, use pin_user_pages*() for DMA-pinned pages, and
get_user_pages*() for other cases. There are four cases described later on in
get_user_pages*() for other cases. There are five cases described later on in
this document, to further clarify that concept.
FOLL_PIN and FOLL_GET are mutually exclusive for a given gup call. However,

View File

@ -16058,8 +16058,10 @@ SPARSE CHECKER
M: "Luc Van Oostenryck" <luc.vanoostenryck@gmail.com>
L: linux-sparse@vger.kernel.org
S: Maintained
W: https://sparse.wiki.kernel.org/
W: https://sparse.docs.kernel.org/
T: git git://git.kernel.org/pub/scm/devel/sparse/sparse.git
Q: https://patchwork.kernel.org/project/linux-sparse/list/
B: https://bugzilla.kernel.org/enter_bug.cgi?component=Sparse&product=Tools
F: include/linux/compiler.h
SPEAR CLOCK FRAMEWORK SUPPORT

View File

@ -120,15 +120,9 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
void *alloc_insn_page(void)
{
void *page;
page = vmalloc_exec(PAGE_SIZE);
if (page) {
set_memory_ro((unsigned long)page, 1);
set_vm_flush_reset_perms(page);
}
return page;
return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS,
NUMA_NO_NODE, __func__);
}
/* arm kprobe: install breakpoint in text */

View File

@ -74,8 +74,11 @@ void *arch_dma_set_uncached(void *cpu_addr, size_t size)
* We need to iterate through the pages, clearing the dcache for
* them and setting the cache-inhibit bit.
*/
mmap_read_lock(&init_mm);
error = walk_page_range(&init_mm, va, va + size, &set_nocache_walk_ops,
NULL);
mmap_read_unlock(&init_mm);
if (error)
return ERR_PTR(error);
return cpu_addr;
@ -85,9 +88,11 @@ void arch_dma_clear_uncached(void *cpu_addr, size_t size)
{
unsigned long va = (unsigned long)cpu_addr;
mmap_read_lock(&init_mm);
/* walk_page_range shouldn't be able to fail here */
WARN_ON(walk_page_range(&init_mm, va, va + size,
&clear_nocache_walk_ops, NULL));
mmap_read_unlock(&init_mm);
}
void arch_sync_dma_for_device(phys_addr_t addr, size_t size,

View File

@ -375,7 +375,9 @@ void __init hyperv_init(void)
guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0);
wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id);
hv_hypercall_pg = vmalloc_exec(PAGE_SIZE);
hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START,
VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX,
VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, __func__);
if (hv_hypercall_pg == NULL) {
wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
goto remove_cpuhp_state;

View File

@ -194,6 +194,7 @@ enum page_cache_mode {
#define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0)
#define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC)
#define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G)
#define __PAGE_KERNEL_ROX (__PP| 0| 0|___A| 0|___D| 0|___G)
#define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC)
#define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G)
#define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G)
@ -219,6 +220,7 @@ enum page_cache_mode {
#define PAGE_KERNEL_RO __pgprot_mask(__PAGE_KERNEL_RO | _ENC)
#define PAGE_KERNEL_EXEC __pgprot_mask(__PAGE_KERNEL_EXEC | _ENC)
#define PAGE_KERNEL_EXEC_NOENC __pgprot_mask(__PAGE_KERNEL_EXEC | 0)
#define PAGE_KERNEL_ROX __pgprot_mask(__PAGE_KERNEL_ROX | _ENC)
#define PAGE_KERNEL_NOCACHE __pgprot_mask(__PAGE_KERNEL_NOCACHE | _ENC)
#define PAGE_KERNEL_LARGE __pgprot_mask(__PAGE_KERNEL_LARGE | _ENC)
#define PAGE_KERNEL_LARGE_EXEC __pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC)

View File

@ -39,8 +39,6 @@
* Troy Laramy <t-laramy@ti.com>
*/
#include <asm/cacheflush.h>
#include <linux/clk.h>
#include <linux/clkdev.h>
#include <linux/delay.h>

View File

@ -18,7 +18,6 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <asm/cacheflush.h>
#include <media/v4l2-dev.h>
#include <media/v4l2-ioctl.h>

View File

@ -689,6 +689,12 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
&ocfs2_nfs_sync_lops, osb);
}
static void ocfs2_nfs_sync_lock_init(struct ocfs2_super *osb)
{
ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
init_rwsem(&osb->nfs_sync_rwlock);
}
void ocfs2_trim_fs_lock_res_init(struct ocfs2_super *osb)
{
struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
@ -2855,6 +2861,11 @@ int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex)
if (ocfs2_is_hard_readonly(osb))
return -EROFS;
if (ex)
down_write(&osb->nfs_sync_rwlock);
else
down_read(&osb->nfs_sync_rwlock);
if (ocfs2_mount_local(osb))
return 0;
@ -2873,6 +2884,10 @@ void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)
if (!ocfs2_mount_local(osb))
ocfs2_cluster_unlock(osb, lockres,
ex ? LKM_EXMODE : LKM_PRMODE);
if (ex)
up_write(&osb->nfs_sync_rwlock);
else
up_read(&osb->nfs_sync_rwlock);
}
int ocfs2_trim_fs_lock(struct ocfs2_super *osb,
@ -3340,7 +3355,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
local:
ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
ocfs2_nfs_sync_lock_init(osb);
ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);
osb->cconn = conn;

View File

@ -395,6 +395,7 @@ struct ocfs2_super
struct ocfs2_lock_res osb_super_lockres;
struct ocfs2_lock_res osb_rename_lockres;
struct ocfs2_lock_res osb_nfs_sync_lockres;
struct rw_semaphore nfs_sync_rwlock;
struct ocfs2_lock_res osb_trim_fs_lockres;
struct mutex obs_trim_fs_mutex;
struct ocfs2_dlm_debug *osb_dlm_debug;

View File

@ -290,7 +290,7 @@
#define OCFS2_MAX_SLOTS 255
/* Slot map indicator for an empty slot */
#define OCFS2_INVALID_SLOT -1
#define OCFS2_INVALID_SLOT ((u16)-1)
#define OCFS2_VOL_UUID_LEN 16
#define OCFS2_MAX_VOL_LABEL_LEN 64
@ -326,8 +326,8 @@ struct ocfs2_system_inode_info {
enum {
BAD_BLOCK_SYSTEM_INODE = 0,
GLOBAL_INODE_ALLOC_SYSTEM_INODE,
#define OCFS2_FIRST_ONLINE_SYSTEM_INODE GLOBAL_INODE_ALLOC_SYSTEM_INODE
SLOT_MAP_SYSTEM_INODE,
#define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE
HEARTBEAT_SYSTEM_INODE,
GLOBAL_BITMAP_SYSTEM_INODE,
USER_QUOTA_SYSTEM_INODE,

View File

@ -2825,9 +2825,12 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
goto bail;
}
inode_alloc_inode =
ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE,
suballoc_slot);
if (suballoc_slot == (u16)OCFS2_INVALID_SLOT)
inode_alloc_inode = ocfs2_get_system_file_inode(osb,
GLOBAL_INODE_ALLOC_SYSTEM_INODE, suballoc_slot);
else
inode_alloc_inode = ocfs2_get_system_file_inode(osb,
INODE_ALLOC_SYSTEM_INODE, suballoc_slot);
if (!inode_alloc_inode) {
/* the error code could be inaccurate, but we are not able to
* get the correct one. */

View File

@ -2,6 +2,11 @@
#ifndef _ASM_GENERIC_CACHEFLUSH_H
#define _ASM_GENERIC_CACHEFLUSH_H
struct mm_struct;
struct vm_area_struct;
struct page;
struct address_space;
/*
* The cache doesn't need to be flushed when TLB entries change when
* the cache is mapped to physical memory, not virtual memory

View File

@ -257,8 +257,8 @@ struct lruvec {
*/
unsigned long anon_cost;
unsigned long file_cost;
/* Evictions & activations on the inactive file list */
atomic_long_t inactive_age;
/* Non-resident age, driven by LRU movement */
atomic_long_t nonresident_age;
/* Refaults at the time of last reclaim cycle */
unsigned long refaults;
/* Various lruvec state flags (enum lruvec_flags) */

View File

@ -313,6 +313,7 @@ struct vma_swap_readahead {
};
/* linux/mm/workingset.c */
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg);
void workingset_refault(struct page *page, void *shadow);
void workingset_activation(struct page *page);

View File

@ -106,7 +106,6 @@ extern void *vzalloc(unsigned long size);
extern void *vmalloc_user(unsigned long size);
extern void *vmalloc_node(unsigned long size, int node);
extern void *vzalloc_node(unsigned long size, int node);
extern void *vmalloc_exec(unsigned long size);
extern void *vmalloc_32(unsigned long size);
extern void *vmalloc_32_user(unsigned long size);
extern void *__vmalloc(unsigned long size, gfp_t gfp_mask);

View File

@ -181,34 +181,19 @@ void kimage_file_post_load_cleanup(struct kimage *image)
static int
kimage_validate_signature(struct kimage *image)
{
const char *reason;
int ret;
ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
image->kernel_buf_len);
switch (ret) {
case 0:
break;
if (ret) {
/* Certain verification errors are non-fatal if we're not
* checking errors, provided we aren't mandating that there
* must be a valid signature.
*/
case -ENODATA:
reason = "kexec of unsigned image";
goto decide;
case -ENOPKG:
reason = "kexec of image with unsupported crypto";
goto decide;
case -ENOKEY:
reason = "kexec of image with unavailable key";
decide:
if (IS_ENABLED(CONFIG_KEXEC_SIG_FORCE)) {
pr_notice("%s rejected\n", reason);
pr_notice("Enforced kernel signature verification failed (%d).\n", ret);
return ret;
}
/* If IMA is guaranteed to appraise a signature on the kexec
/*
* If IMA is guaranteed to appraise a signature on the kexec
* image, permit it even if the kernel is otherwise locked
* down.
*/
@ -216,17 +201,10 @@ kimage_validate_signature(struct kimage *image)
security_locked_down(LOCKDOWN_KEXEC))
return -EPERM;
return 0;
/* All other errors are fatal, including nomem, unparseable
* signatures and signature check failures - even if signatures
* aren't required.
*/
default:
pr_notice("kernel signature verification failed (%d).\n", ret);
pr_debug("kernel signature verification failed (%d).\n", ret);
}
return ret;
return 0;
}
#endif

View File

@ -2783,7 +2783,9 @@ static void dynamic_debug_remove(struct module *mod, struct _ddebug *debug)
void * __weak module_alloc(unsigned long size)
{
return vmalloc_exec(size);
return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
NUMA_NO_NODE, __func__);
}
bool __weak module_init_section(const char *name)

View File

@ -520,8 +520,7 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice,
err_free:
kfree(devmem);
err_release:
release_mem_region(devmem->pagemap.res.start,
resource_size(&devmem->pagemap.res));
release_mem_region(res->start, resource_size(res));
err:
mutex_unlock(&mdevice->devmem_lock);
return false;

View File

@ -2316,15 +2316,26 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
.page = NULL,
};
current->capture_control = &capc;
/*
* Make sure the structs are really initialized before we expose the
* capture control, in case we are interrupted and the interrupt handler
* frees a page.
*/
barrier();
WRITE_ONCE(current->capture_control, &capc);
ret = compact_zone(&cc, &capc);
VM_BUG_ON(!list_empty(&cc.freepages));
VM_BUG_ON(!list_empty(&cc.migratepages));
*capture = capc.page;
current->capture_control = NULL;
/*
* Make sure we hide capture control first before we read the captured
* page pointer, otherwise an interrupt could free and capture a page
* and we would leak it.
*/
WRITE_ONCE(current->capture_control, NULL);
*capture = READ_ONCE(capc.page);
return ret;
}

View File

@ -246,13 +246,13 @@ static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp,
static void __init pte_clear_tests(struct mm_struct *mm, pte_t *ptep,
unsigned long vaddr)
{
pte_t pte = READ_ONCE(*ptep);
pte_t pte = ptep_get(ptep);
pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
set_pte_at(mm, vaddr, ptep, pte);
barrier();
pte_clear(mm, vaddr, ptep);
pte = READ_ONCE(*ptep);
pte = ptep_get(ptep);
WARN_ON(!pte_none(pte));
}

View File

@ -2772,8 +2772,10 @@ static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
return;
cw = kmalloc(sizeof(*cw), GFP_NOWAIT | __GFP_NOWARN);
if (!cw)
if (!cw) {
css_put(&memcg->css);
return;
}
cw->memcg = memcg;
cw->cachep = cachep;
@ -6360,11 +6362,16 @@ static unsigned long effective_protection(unsigned long usage,
* We're using unprotected memory for the weight so that if
* some cgroups DO claim explicit protection, we don't protect
* the same bytes twice.
*
* Check both usage and parent_usage against the respective
* protected values. One should imply the other, but they
* aren't read atomically - make sure the division is sane.
*/
if (!(cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT))
return ep;
if (parent_effective > siblings_protected && usage > protected) {
if (parent_effective > siblings_protected &&
parent_usage > siblings_protected &&
usage > protected) {
unsigned long unclaimed;
unclaimed = parent_effective - siblings_protected;
@ -6416,7 +6423,7 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
if (parent == root) {
memcg->memory.emin = READ_ONCE(memcg->memory.min);
memcg->memory.elow = memcg->memory.low;
memcg->memory.elow = READ_ONCE(memcg->memory.low);
goto out;
}
@ -6428,7 +6435,8 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
atomic_long_read(&parent->memory.children_min_usage)));
WRITE_ONCE(memcg->memory.elow, effective_protection(usage, parent_usage,
memcg->memory.low, READ_ONCE(parent->memory.elow),
READ_ONCE(memcg->memory.low),
READ_ONCE(parent->memory.elow),
atomic_long_read(&parent->memory.children_low_usage)));
out:

View File

@ -1498,7 +1498,7 @@ out:
}
#ifdef pte_index
static int insert_page_in_batch_locked(struct mm_struct *mm, pmd_t *pmd,
static int insert_page_in_batch_locked(struct mm_struct *mm, pte_t *pte,
unsigned long addr, struct page *page, pgprot_t prot)
{
int err;
@ -1506,8 +1506,9 @@ static int insert_page_in_batch_locked(struct mm_struct *mm, pmd_t *pmd,
if (!page_count(page))
return -EINVAL;
err = validate_page_before_insert(page);
return err ? err : insert_page_into_pte_locked(
mm, pte_offset_map(pmd, addr), addr, page, prot);
if (err)
return err;
return insert_page_into_pte_locked(mm, pte, addr, page, prot);
}
/* insert_pages() amortizes the cost of spinlock operations
@ -1517,7 +1518,8 @@ static int insert_pages(struct vm_area_struct *vma, unsigned long addr,
struct page **pages, unsigned long *num, pgprot_t prot)
{
pmd_t *pmd = NULL;
spinlock_t *pte_lock = NULL;
pte_t *start_pte, *pte;
spinlock_t *pte_lock;
struct mm_struct *const mm = vma->vm_mm;
unsigned long curr_page_idx = 0;
unsigned long remaining_pages_total = *num;
@ -1536,18 +1538,17 @@ more:
ret = -ENOMEM;
if (pte_alloc(mm, pmd))
goto out;
pte_lock = pte_lockptr(mm, pmd);
while (pages_to_write_in_pmd) {
int pte_idx = 0;
const int batch_size = min_t(int, pages_to_write_in_pmd, 8);
spin_lock(pte_lock);
for (; pte_idx < batch_size; ++pte_idx) {
int err = insert_page_in_batch_locked(mm, pmd,
start_pte = pte_offset_map_lock(mm, pmd, addr, &pte_lock);
for (pte = start_pte; pte_idx < batch_size; ++pte, ++pte_idx) {
int err = insert_page_in_batch_locked(mm, pte,
addr, pages[curr_page_idx], prot);
if (unlikely(err)) {
spin_unlock(pte_lock);
pte_unmap_unlock(start_pte, pte_lock);
ret = err;
remaining_pages_total -= pte_idx;
goto out;
@ -1555,7 +1556,7 @@ more:
addr += PAGE_SIZE;
++curr_page_idx;
}
spin_unlock(pte_lock);
pte_unmap_unlock(start_pte, pte_lock);
pages_to_write_in_pmd -= batch_size;
remaining_pages_total -= batch_size;
}
@ -3140,8 +3141,18 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
err = mem_cgroup_charge(page, vma->vm_mm,
GFP_KERNEL);
ClearPageSwapCache(page);
if (err)
if (err) {
ret = VM_FAULT_OOM;
goto out_page;
}
/*
* XXX: Move to lru_cache_add() when it
* supports new vs putback
*/
spin_lock_irq(&page_pgdat(page)->lru_lock);
lru_note_cost_page(page);
spin_unlock_irq(&page_pgdat(page)->lru_lock);
lru_cache_add(page);
swap_readpage(page, true);

View File

@ -471,11 +471,20 @@ void __ref remove_pfn_range_from_zone(struct zone *zone,
unsigned long start_pfn,
unsigned long nr_pages)
{
const unsigned long end_pfn = start_pfn + nr_pages;
struct pglist_data *pgdat = zone->zone_pgdat;
unsigned long flags;
unsigned long pfn, cur_nr_pages, flags;
/* Poison struct pages because they are now uninitialized again. */
page_init_poison(pfn_to_page(start_pfn), sizeof(struct page) * nr_pages);
for (pfn = start_pfn; pfn < end_pfn; pfn += cur_nr_pages) {
cond_resched();
/* Select all remaining pages up to the next section boundary */
cur_nr_pages =
min(end_pfn - pfn, SECTION_ALIGN_UP(pfn + 1) - pfn);
page_init_poison(pfn_to_page(pfn),
sizeof(struct page) * cur_nr_pages);
}
#ifdef CONFIG_ZONE_DEVICE
/*

View File

@ -290,23 +290,6 @@ void *vzalloc_node(unsigned long size, int node)
}
EXPORT_SYMBOL(vzalloc_node);
/**
* vmalloc_exec - allocate virtually contiguous, executable memory
* @size: allocation size
*
* Kernel-internal function to allocate enough pages to cover @size
* the page level allocator and map them into contiguous and
* executable kernel virtual space.
*
* For tight control over page level allocator and protection flags
* use __vmalloc() instead.
*/
void *vmalloc_exec(unsigned long size)
{
return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM);
}
/**
* vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
* @size: allocation size

View File

@ -348,7 +348,7 @@ static __always_inline int memcg_charge_slab(struct page *page,
gfp_t gfp, int order,
struct kmem_cache *s)
{
unsigned int nr_pages = 1 << order;
int nr_pages = 1 << order;
struct mem_cgroup *memcg;
struct lruvec *lruvec;
int ret;
@ -388,7 +388,7 @@ out:
static __always_inline void memcg_uncharge_slab(struct page *page, int order,
struct kmem_cache *s)
{
unsigned int nr_pages = 1 << order;
int nr_pages = 1 << order;
struct mem_cgroup *memcg;
struct lruvec *lruvec;

View File

@ -1726,7 +1726,7 @@ void kzfree(const void *p)
if (unlikely(ZERO_OR_NULL_PTR(mem)))
return;
ks = ksize(mem);
memset(mem, 0, ks);
memzero_explicit(mem, ks);
kfree(mem);
}
EXPORT_SYMBOL(kzfree);

View File

@ -3766,15 +3766,13 @@ error:
}
static void list_slab_objects(struct kmem_cache *s, struct page *page,
const char *text, unsigned long *map)
const char *text)
{
#ifdef CONFIG_SLUB_DEBUG
void *addr = page_address(page);
unsigned long *map;
void *p;
if (!map)
return;
slab_err(s, page, text, s->name);
slab_lock(page);
@ -3786,6 +3784,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
print_tracking(s, p);
}
}
put_map(map);
slab_unlock(page);
#endif
}
@ -3799,11 +3798,6 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
{
LIST_HEAD(discard);
struct page *page, *h;
unsigned long *map = NULL;
#ifdef CONFIG_SLUB_DEBUG
map = bitmap_alloc(oo_objects(s->max), GFP_KERNEL);
#endif
BUG_ON(irqs_disabled());
spin_lock_irq(&n->list_lock);
@ -3813,16 +3807,11 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
list_add(&page->slab_list, &discard);
} else {
list_slab_objects(s, page,
"Objects remaining in %s on __kmem_cache_shutdown()",
map);
"Objects remaining in %s on __kmem_cache_shutdown()");
}
}
spin_unlock_irq(&n->list_lock);
#ifdef CONFIG_SLUB_DEBUG
bitmap_free(map);
#endif
list_for_each_entry_safe(page, h, &discard, slab_list)
discard_slab(s, page);
}

View File

@ -443,7 +443,6 @@ void mark_page_accessed(struct page *page)
else
__lru_cache_activate_page(page);
ClearPageReferenced(page);
if (page_is_file_lru(page))
workingset_activation(page);
}
if (page_is_idle(page))

View File

@ -21,7 +21,7 @@
#include <linux/vmalloc.h>
#include <linux/swap_slots.h>
#include <linux/huge_mm.h>
#include "internal.h"
/*
* swapper_space is a fiction, retained to simplify the path through
@ -429,7 +429,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
__SetPageSwapBacked(page);
/* May fail (-ENOMEM) if XArray node allocation failed. */
if (add_to_swap_cache(page, entry, gfp_mask & GFP_KERNEL)) {
if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK)) {
put_swap_page(page, entry);
goto fail_unlock;
}

View File

@ -1862,7 +1862,6 @@ EXPORT_SYMBOL(vm_unmap_ram);
* @pages: an array of pointers to the pages to be mapped
* @count: number of pages
* @node: prefer to allocate data structures on this node
* @prot: memory protection to use. PAGE_KERNEL for regular RAM
*
* If you use this function for less than VMAP_MAX_ALLOC pages, it could be
* faster than vmap so it's good. But if you mix long-life and short-life
@ -2696,26 +2695,6 @@ void *vzalloc_node(unsigned long size, int node)
}
EXPORT_SYMBOL(vzalloc_node);
/**
* vmalloc_exec - allocate virtually contiguous, executable memory
* @size: allocation size
*
* Kernel-internal function to allocate enough pages to cover @size
* the page level allocator and map them into contiguous and
* executable kernel virtual space.
*
* For tight control over page level allocator and protection flags
* use __vmalloc() instead.
*
* Return: pointer to the allocated memory or %NULL on error
*/
void *vmalloc_exec(unsigned long size)
{
return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
NUMA_NO_NODE, __builtin_return_address(0));
}
#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)

View File

@ -904,6 +904,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
__delete_from_swap_cache(page, swap);
xa_unlock_irqrestore(&mapping->i_pages, flags);
put_swap_page(page, swap);
workingset_eviction(page, target_memcg);
} else {
void (*freepage)(struct page *);
void *shadow = NULL;
@ -1884,6 +1885,8 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
list_add(&page->lru, &pages_to_free);
} else {
nr_moved += nr_pages;
if (PageActive(page))
workingset_age_nonresident(lruvec, nr_pages);
}
}

View File

@ -156,8 +156,8 @@
*
* Implementation
*
* For each node's file LRU lists, a counter for inactive evictions
* and activations is maintained (node->inactive_age).
* For each node's LRU lists, a counter for inactive evictions and
* activations is maintained (node->nonresident_age).
*
* On eviction, a snapshot of this counter (along with some bits to
* identify the node) is stored in the now empty page cache
@ -213,7 +213,17 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
*workingsetp = workingset;
}
static void advance_inactive_age(struct mem_cgroup *memcg, pg_data_t *pgdat)
/**
* workingset_age_nonresident - age non-resident entries as LRU ages
* @memcg: the lruvec that was aged
* @nr_pages: the number of pages to count
*
* As in-memory pages are aged, non-resident pages need to be aged as
* well, in order for the refault distances later on to be comparable
* to the in-memory dimensions. This function allows reclaim and LRU
* operations to drive the non-resident aging along in parallel.
*/
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages)
{
/*
* Reclaiming a cgroup means reclaiming all its children in a
@ -227,11 +237,8 @@ static void advance_inactive_age(struct mem_cgroup *memcg, pg_data_t *pgdat)
* the root cgroup's, age as well.
*/
do {
struct lruvec *lruvec;
lruvec = mem_cgroup_lruvec(memcg, pgdat);
atomic_long_inc(&lruvec->inactive_age);
} while (memcg && (memcg = parent_mem_cgroup(memcg)));
atomic_long_add(nr_pages, &lruvec->nonresident_age);
} while ((lruvec = parent_lruvec(lruvec)));
}
/**
@ -254,12 +261,11 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg)
VM_BUG_ON_PAGE(page_count(page), page);
VM_BUG_ON_PAGE(!PageLocked(page), page);
advance_inactive_age(page_memcg(page), pgdat);
lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
workingset_age_nonresident(lruvec, hpage_nr_pages(page));
/* XXX: target_memcg can be NULL, go through lruvec */
memcgid = mem_cgroup_id(lruvec_memcg(lruvec));
eviction = atomic_long_read(&lruvec->inactive_age);
eviction = atomic_long_read(&lruvec->nonresident_age);
return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page));
}
@ -309,20 +315,20 @@ void workingset_refault(struct page *page, void *shadow)
if (!mem_cgroup_disabled() && !eviction_memcg)
goto out;
eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
refault = atomic_long_read(&eviction_lruvec->inactive_age);
refault = atomic_long_read(&eviction_lruvec->nonresident_age);
/*
* Calculate the refault distance
*
* The unsigned subtraction here gives an accurate distance
* across inactive_age overflows in most cases. There is a
* across nonresident_age overflows in most cases. There is a
* special case: usually, shadow entries have a short lifetime
* and are either refaulted or reclaimed along with the inode
* before they get too old. But it is not impossible for the
* inactive_age to lap a shadow entry in the field, which can
* then result in a false small refault distance, leading to a
* false activation should this old entry actually refault
* again. However, earlier kernels used to deactivate
* nonresident_age to lap a shadow entry in the field, which
* can then result in a false small refault distance, leading
* to a false activation should this old entry actually
* refault again. However, earlier kernels used to deactivate
* unconditionally with *every* reclaim invocation for the
* longest time, so the occasional inappropriate activation
* leading to pressure on the active list is not a problem.
@ -359,7 +365,7 @@ void workingset_refault(struct page *page, void *shadow)
goto out;
SetPageActive(page);
advance_inactive_age(memcg, pgdat);
workingset_age_nonresident(lruvec, hpage_nr_pages(page));
inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE);
/* Page was active prior to eviction */
@ -382,6 +388,7 @@ out:
void workingset_activation(struct page *page)
{
struct mem_cgroup *memcg;
struct lruvec *lruvec;
rcu_read_lock();
/*
@ -394,7 +401,8 @@ void workingset_activation(struct page *page)
memcg = page_memcg_rcu(page);
if (!mem_cgroup_disabled() && !memcg)
goto out;
advance_inactive_age(memcg, page_pgdat(page));
lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
workingset_age_nonresident(lruvec, hpage_nr_pages(page));
out:
rcu_read_unlock();
}