Merge branch 'akpm' (patches from Andrew)
Merge fixes from Andrew Morton: "26 fixes" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (26 commits) MAINTAINERS: add Dan Streetman to zbud maintainers MAINTAINERS: add Dan Streetman to zswap maintainers mm: do not export ioremap_page_range symbol for external module mn10300: fix build error of missing fpu_save() romfs: use different way to generate fsid for BLOCK or MTD frv: add missing atomic64 operations mm, page_alloc: fix premature OOM when racing with cpuset mems update mm, page_alloc: move cpuset seqcount checking to slowpath mm, page_alloc: fix fast-path race with cpuset update or removal mm, page_alloc: fix check for NULL preferred_zone kernel/panic.c: add missing \n fbdev: color map copying bounds checking frv: add atomic64_add_unless() mm/mempolicy.c: do not put mempolicy before using its nodemask radix-tree: fix private list warnings Documentation/filesystems/proc.txt: add VmPin mm, memcg: do not retry precharge charges proc: add a schedule point in proc_pid_readdir() mm: alloc_contig: re-allow CMA to compact FS pages mm/slub.c: trace free objects at KERN_INFO ...
This commit is contained in:
commit
883af14e67
@ -212,10 +212,11 @@ asynchronous manner and the value may not be very precise. To see a precise
|
|||||||
snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table.
|
snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table.
|
||||||
It's slow but very precise.
|
It's slow but very precise.
|
||||||
|
|
||||||
Table 1-2: Contents of the status files (as of 4.1)
|
Table 1-2: Contents of the status files (as of 4.8)
|
||||||
..............................................................................
|
..............................................................................
|
||||||
Field Content
|
Field Content
|
||||||
Name filename of the executable
|
Name filename of the executable
|
||||||
|
Umask file mode creation mask
|
||||||
State state (R is running, S is sleeping, D is sleeping
|
State state (R is running, S is sleeping, D is sleeping
|
||||||
in an uninterruptible wait, Z is zombie,
|
in an uninterruptible wait, Z is zombie,
|
||||||
T is traced or stopped)
|
T is traced or stopped)
|
||||||
@ -226,7 +227,6 @@ Table 1-2: Contents of the status files (as of 4.1)
|
|||||||
TracerPid PID of process tracing this process (0 if not)
|
TracerPid PID of process tracing this process (0 if not)
|
||||||
Uid Real, effective, saved set, and file system UIDs
|
Uid Real, effective, saved set, and file system UIDs
|
||||||
Gid Real, effective, saved set, and file system GIDs
|
Gid Real, effective, saved set, and file system GIDs
|
||||||
Umask file mode creation mask
|
|
||||||
FDSize number of file descriptor slots currently allocated
|
FDSize number of file descriptor slots currently allocated
|
||||||
Groups supplementary group list
|
Groups supplementary group list
|
||||||
NStgid descendant namespace thread group ID hierarchy
|
NStgid descendant namespace thread group ID hierarchy
|
||||||
@ -236,6 +236,7 @@ Table 1-2: Contents of the status files (as of 4.1)
|
|||||||
VmPeak peak virtual memory size
|
VmPeak peak virtual memory size
|
||||||
VmSize total program size
|
VmSize total program size
|
||||||
VmLck locked memory size
|
VmLck locked memory size
|
||||||
|
VmPin pinned memory size
|
||||||
VmHWM peak resident set size ("high water mark")
|
VmHWM peak resident set size ("high water mark")
|
||||||
VmRSS size of memory portions. It contains the three
|
VmRSS size of memory portions. It contains the three
|
||||||
following parts (VmRSS = RssAnon + RssFile + RssShmem)
|
following parts (VmRSS = RssAnon + RssFile + RssShmem)
|
||||||
|
@ -13625,6 +13625,7 @@ F: drivers/net/hamradio/z8530.h
|
|||||||
|
|
||||||
ZBUD COMPRESSED PAGE ALLOCATOR
|
ZBUD COMPRESSED PAGE ALLOCATOR
|
||||||
M: Seth Jennings <sjenning@redhat.com>
|
M: Seth Jennings <sjenning@redhat.com>
|
||||||
|
M: Dan Streetman <ddstreet@ieee.org>
|
||||||
L: linux-mm@kvack.org
|
L: linux-mm@kvack.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
F: mm/zbud.c
|
F: mm/zbud.c
|
||||||
@ -13680,6 +13681,7 @@ F: Documentation/vm/zsmalloc.txt
|
|||||||
|
|
||||||
ZSWAP COMPRESSED SWAP CACHING
|
ZSWAP COMPRESSED SWAP CACHING
|
||||||
M: Seth Jennings <sjenning@redhat.com>
|
M: Seth Jennings <sjenning@redhat.com>
|
||||||
|
M: Dan Streetman <ddstreet@ieee.org>
|
||||||
L: linux-mm@kvack.org
|
L: linux-mm@kvack.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
F: mm/zswap.c
|
F: mm/zswap.c
|
||||||
|
@ -139,7 +139,7 @@ static inline void atomic64_dec(atomic64_t *v)
|
|||||||
#define atomic64_sub_and_test(i,v) (atomic64_sub_return((i), (v)) == 0)
|
#define atomic64_sub_and_test(i,v) (atomic64_sub_return((i), (v)) == 0)
|
||||||
#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
|
#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
|
||||||
#define atomic64_inc_and_test(v) (atomic64_inc_return((v)) == 0)
|
#define atomic64_inc_and_test(v) (atomic64_inc_return((v)) == 0)
|
||||||
|
#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
|
||||||
|
|
||||||
#define atomic_cmpxchg(v, old, new) (cmpxchg(&(v)->counter, old, new))
|
#define atomic_cmpxchg(v, old, new) (cmpxchg(&(v)->counter, old, new))
|
||||||
#define atomic_xchg(v, new) (xchg(&(v)->counter, new))
|
#define atomic_xchg(v, new) (xchg(&(v)->counter, new))
|
||||||
@ -161,6 +161,39 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
|
|||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u)
|
||||||
|
{
|
||||||
|
long long c, old;
|
||||||
|
|
||||||
|
c = atomic64_read(v);
|
||||||
|
for (;;) {
|
||||||
|
if (unlikely(c == u))
|
||||||
|
break;
|
||||||
|
old = atomic64_cmpxchg(v, c, c + i);
|
||||||
|
if (likely(old == c))
|
||||||
|
break;
|
||||||
|
c = old;
|
||||||
|
}
|
||||||
|
return c != u;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline long long atomic64_dec_if_positive(atomic64_t *v)
|
||||||
|
{
|
||||||
|
long long c, old, dec;
|
||||||
|
|
||||||
|
c = atomic64_read(v);
|
||||||
|
for (;;) {
|
||||||
|
dec = c - 1;
|
||||||
|
if (unlikely(dec < 0))
|
||||||
|
break;
|
||||||
|
old = atomic64_cmpxchg((v), c, dec);
|
||||||
|
if (likely(old == c))
|
||||||
|
break;
|
||||||
|
c = old;
|
||||||
|
}
|
||||||
|
return dec;
|
||||||
|
}
|
||||||
|
|
||||||
#define ATOMIC_OP(op) \
|
#define ATOMIC_OP(op) \
|
||||||
static inline int atomic_fetch_##op(int i, atomic_t *v) \
|
static inline int atomic_fetch_##op(int i, atomic_t *v) \
|
||||||
{ \
|
{ \
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
struct task_struct;
|
struct task_struct;
|
||||||
struct thread_struct;
|
struct thread_struct;
|
||||||
|
|
||||||
#if !defined(CONFIG_LAZY_SAVE_FPU)
|
#if defined(CONFIG_FPU) && !defined(CONFIG_LAZY_SAVE_FPU)
|
||||||
struct fpu_state_struct;
|
struct fpu_state_struct;
|
||||||
extern asmlinkage void fpu_save(struct fpu_state_struct *);
|
extern asmlinkage void fpu_save(struct fpu_state_struct *);
|
||||||
#define switch_fpu(prev, next) \
|
#define switch_fpu(prev, next) \
|
||||||
|
@ -408,14 +408,14 @@ static ssize_t show_valid_zones(struct device *dev,
|
|||||||
sprintf(buf, "%s", zone->name);
|
sprintf(buf, "%s", zone->name);
|
||||||
|
|
||||||
/* MMOP_ONLINE_KERNEL */
|
/* MMOP_ONLINE_KERNEL */
|
||||||
zone_shift = zone_can_shift(start_pfn, nr_pages, ZONE_NORMAL);
|
zone_can_shift(start_pfn, nr_pages, ZONE_NORMAL, &zone_shift);
|
||||||
if (zone_shift) {
|
if (zone_shift) {
|
||||||
strcat(buf, " ");
|
strcat(buf, " ");
|
||||||
strcat(buf, (zone + zone_shift)->name);
|
strcat(buf, (zone + zone_shift)->name);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* MMOP_ONLINE_MOVABLE */
|
/* MMOP_ONLINE_MOVABLE */
|
||||||
zone_shift = zone_can_shift(start_pfn, nr_pages, ZONE_MOVABLE);
|
zone_can_shift(start_pfn, nr_pages, ZONE_MOVABLE, &zone_shift);
|
||||||
if (zone_shift) {
|
if (zone_shift) {
|
||||||
strcat(buf, " ");
|
strcat(buf, " ");
|
||||||
strcat(buf, (zone + zone_shift)->name);
|
strcat(buf, (zone + zone_shift)->name);
|
||||||
|
@ -330,7 +330,7 @@ static int h_memstick_read_dev_id(struct memstick_dev *card,
|
|||||||
struct ms_id_register id_reg;
|
struct ms_id_register id_reg;
|
||||||
|
|
||||||
if (!(*mrq)) {
|
if (!(*mrq)) {
|
||||||
memstick_init_req(&card->current_mrq, MS_TPC_READ_REG, NULL,
|
memstick_init_req(&card->current_mrq, MS_TPC_READ_REG, &id_reg,
|
||||||
sizeof(struct ms_id_register));
|
sizeof(struct ms_id_register));
|
||||||
*mrq = &card->current_mrq;
|
*mrq = &card->current_mrq;
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -163,17 +163,18 @@ void fb_dealloc_cmap(struct fb_cmap *cmap)
|
|||||||
|
|
||||||
int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to)
|
int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to)
|
||||||
{
|
{
|
||||||
int tooff = 0, fromoff = 0;
|
unsigned int tooff = 0, fromoff = 0;
|
||||||
int size;
|
size_t size;
|
||||||
|
|
||||||
if (to->start > from->start)
|
if (to->start > from->start)
|
||||||
fromoff = to->start - from->start;
|
fromoff = to->start - from->start;
|
||||||
else
|
else
|
||||||
tooff = from->start - to->start;
|
tooff = from->start - to->start;
|
||||||
size = to->len - tooff;
|
if (fromoff >= from->len || tooff >= to->len)
|
||||||
if (size > (int) (from->len - fromoff))
|
return -EINVAL;
|
||||||
size = from->len - fromoff;
|
|
||||||
if (size <= 0)
|
size = min_t(size_t, to->len - tooff, from->len - fromoff);
|
||||||
|
if (size == 0)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
size *= sizeof(u16);
|
size *= sizeof(u16);
|
||||||
|
|
||||||
@ -187,17 +188,18 @@ int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to)
|
|||||||
|
|
||||||
int fb_cmap_to_user(const struct fb_cmap *from, struct fb_cmap_user *to)
|
int fb_cmap_to_user(const struct fb_cmap *from, struct fb_cmap_user *to)
|
||||||
{
|
{
|
||||||
int tooff = 0, fromoff = 0;
|
unsigned int tooff = 0, fromoff = 0;
|
||||||
int size;
|
size_t size;
|
||||||
|
|
||||||
if (to->start > from->start)
|
if (to->start > from->start)
|
||||||
fromoff = to->start - from->start;
|
fromoff = to->start - from->start;
|
||||||
else
|
else
|
||||||
tooff = from->start - to->start;
|
tooff = from->start - to->start;
|
||||||
size = to->len - tooff;
|
if (fromoff >= from->len || tooff >= to->len)
|
||||||
if (size > (int) (from->len - fromoff))
|
return -EINVAL;
|
||||||
size = from->len - fromoff;
|
|
||||||
if (size <= 0)
|
size = min_t(size_t, to->len - tooff, from->len - fromoff);
|
||||||
|
if (size == 0)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
size *= sizeof(u16);
|
size *= sizeof(u16);
|
||||||
|
|
||||||
|
@ -38,6 +38,7 @@ config FS_DAX
|
|||||||
bool "Direct Access (DAX) support"
|
bool "Direct Access (DAX) support"
|
||||||
depends on MMU
|
depends on MMU
|
||||||
depends on !(ARM || MIPS || SPARC)
|
depends on !(ARM || MIPS || SPARC)
|
||||||
|
select FS_IOMAP
|
||||||
help
|
help
|
||||||
Direct Access (DAX) can be used on memory-backed block devices.
|
Direct Access (DAX) can be used on memory-backed block devices.
|
||||||
If the block device supports DAX and the filesystem supports DAX,
|
If the block device supports DAX and the filesystem supports DAX,
|
||||||
|
2
fs/dax.c
2
fs/dax.c
@ -990,7 +990,6 @@ int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(__dax_zero_page_range);
|
EXPORT_SYMBOL_GPL(__dax_zero_page_range);
|
||||||
|
|
||||||
#ifdef CONFIG_FS_IOMAP
|
|
||||||
static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
|
static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
|
||||||
{
|
{
|
||||||
return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9);
|
return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9);
|
||||||
@ -1428,4 +1427,3 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault);
|
EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault);
|
||||||
#endif /* CONFIG_FS_DAX_PMD */
|
#endif /* CONFIG_FS_DAX_PMD */
|
||||||
#endif /* CONFIG_FS_IOMAP */
|
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
config EXT2_FS
|
config EXT2_FS
|
||||||
tristate "Second extended fs support"
|
tristate "Second extended fs support"
|
||||||
select FS_IOMAP if FS_DAX
|
|
||||||
help
|
help
|
||||||
Ext2 is a standard Linux file system for hard disks.
|
Ext2 is a standard Linux file system for hard disks.
|
||||||
|
|
||||||
|
@ -37,7 +37,6 @@ config EXT4_FS
|
|||||||
select CRC16
|
select CRC16
|
||||||
select CRYPTO
|
select CRYPTO
|
||||||
select CRYPTO_CRC32C
|
select CRYPTO_CRC32C
|
||||||
select FS_IOMAP if FS_DAX
|
|
||||||
help
|
help
|
||||||
This is the next generation of the ext3 filesystem.
|
This is the next generation of the ext3 filesystem.
|
||||||
|
|
||||||
|
@ -3179,6 +3179,8 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
|
|||||||
iter.tgid += 1, iter = next_tgid(ns, iter)) {
|
iter.tgid += 1, iter = next_tgid(ns, iter)) {
|
||||||
char name[PROC_NUMBUF];
|
char name[PROC_NUMBUF];
|
||||||
int len;
|
int len;
|
||||||
|
|
||||||
|
cond_resched();
|
||||||
if (!has_pid_permissions(ns, iter.task, 2))
|
if (!has_pid_permissions(ns, iter.task, 2))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
@ -74,6 +74,7 @@
|
|||||||
#include <linux/highmem.h>
|
#include <linux/highmem.h>
|
||||||
#include <linux/pagemap.h>
|
#include <linux/pagemap.h>
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
|
#include <linux/major.h>
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
|
|
||||||
static struct kmem_cache *romfs_inode_cachep;
|
static struct kmem_cache *romfs_inode_cachep;
|
||||||
@ -416,7 +417,22 @@ static void romfs_destroy_inode(struct inode *inode)
|
|||||||
static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf)
|
static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||||
{
|
{
|
||||||
struct super_block *sb = dentry->d_sb;
|
struct super_block *sb = dentry->d_sb;
|
||||||
u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
|
u64 id = 0;
|
||||||
|
|
||||||
|
/* When calling huge_encode_dev(),
|
||||||
|
* use sb->s_bdev->bd_dev when,
|
||||||
|
* - CONFIG_ROMFS_ON_BLOCK defined
|
||||||
|
* use sb->s_dev when,
|
||||||
|
* - CONFIG_ROMFS_ON_BLOCK undefined and
|
||||||
|
* - CONFIG_ROMFS_ON_MTD defined
|
||||||
|
* leave id as 0 when,
|
||||||
|
* - CONFIG_ROMFS_ON_BLOCK undefined and
|
||||||
|
* - CONFIG_ROMFS_ON_MTD undefined
|
||||||
|
*/
|
||||||
|
if (sb->s_bdev)
|
||||||
|
id = huge_encode_dev(sb->s_bdev->bd_dev);
|
||||||
|
else if (sb->s_dev)
|
||||||
|
id = huge_encode_dev(sb->s_dev);
|
||||||
|
|
||||||
buf->f_type = ROMFS_MAGIC;
|
buf->f_type = ROMFS_MAGIC;
|
||||||
buf->f_namelen = ROMFS_MAXFN;
|
buf->f_namelen = ROMFS_MAXFN;
|
||||||
@ -489,6 +505,11 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
|
|||||||
sb->s_flags |= MS_RDONLY | MS_NOATIME;
|
sb->s_flags |= MS_RDONLY | MS_NOATIME;
|
||||||
sb->s_op = &romfs_super_ops;
|
sb->s_op = &romfs_super_ops;
|
||||||
|
|
||||||
|
#ifdef CONFIG_ROMFS_ON_MTD
|
||||||
|
/* Use same dev ID from the underlying mtdblock device */
|
||||||
|
if (sb->s_mtd)
|
||||||
|
sb->s_dev = MKDEV(MTD_BLOCK_MAJOR, sb->s_mtd->index);
|
||||||
|
#endif
|
||||||
/* read the image superblock and check it */
|
/* read the image superblock and check it */
|
||||||
rsb = kmalloc(512, GFP_KERNEL);
|
rsb = kmalloc(512, GFP_KERNEL);
|
||||||
if (!rsb)
|
if (!rsb)
|
||||||
|
@ -63,6 +63,7 @@ struct userfaultfd_wait_queue {
|
|||||||
struct uffd_msg msg;
|
struct uffd_msg msg;
|
||||||
wait_queue_t wq;
|
wait_queue_t wq;
|
||||||
struct userfaultfd_ctx *ctx;
|
struct userfaultfd_ctx *ctx;
|
||||||
|
bool waken;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct userfaultfd_wake_range {
|
struct userfaultfd_wake_range {
|
||||||
@ -86,6 +87,12 @@ static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode,
|
|||||||
if (len && (start > uwq->msg.arg.pagefault.address ||
|
if (len && (start > uwq->msg.arg.pagefault.address ||
|
||||||
start + len <= uwq->msg.arg.pagefault.address))
|
start + len <= uwq->msg.arg.pagefault.address))
|
||||||
goto out;
|
goto out;
|
||||||
|
WRITE_ONCE(uwq->waken, true);
|
||||||
|
/*
|
||||||
|
* The implicit smp_mb__before_spinlock in try_to_wake_up()
|
||||||
|
* renders uwq->waken visible to other CPUs before the task is
|
||||||
|
* waken.
|
||||||
|
*/
|
||||||
ret = wake_up_state(wq->private, mode);
|
ret = wake_up_state(wq->private, mode);
|
||||||
if (ret)
|
if (ret)
|
||||||
/*
|
/*
|
||||||
@ -264,6 +271,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
|
|||||||
struct userfaultfd_wait_queue uwq;
|
struct userfaultfd_wait_queue uwq;
|
||||||
int ret;
|
int ret;
|
||||||
bool must_wait, return_to_userland;
|
bool must_wait, return_to_userland;
|
||||||
|
long blocking_state;
|
||||||
|
|
||||||
BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
|
BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
|
||||||
|
|
||||||
@ -334,10 +342,13 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
|
|||||||
uwq.wq.private = current;
|
uwq.wq.private = current;
|
||||||
uwq.msg = userfault_msg(vmf->address, vmf->flags, reason);
|
uwq.msg = userfault_msg(vmf->address, vmf->flags, reason);
|
||||||
uwq.ctx = ctx;
|
uwq.ctx = ctx;
|
||||||
|
uwq.waken = false;
|
||||||
|
|
||||||
return_to_userland =
|
return_to_userland =
|
||||||
(vmf->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
|
(vmf->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
|
||||||
(FAULT_FLAG_USER|FAULT_FLAG_KILLABLE);
|
(FAULT_FLAG_USER|FAULT_FLAG_KILLABLE);
|
||||||
|
blocking_state = return_to_userland ? TASK_INTERRUPTIBLE :
|
||||||
|
TASK_KILLABLE;
|
||||||
|
|
||||||
spin_lock(&ctx->fault_pending_wqh.lock);
|
spin_lock(&ctx->fault_pending_wqh.lock);
|
||||||
/*
|
/*
|
||||||
@ -350,8 +361,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
|
|||||||
* following the spin_unlock to happen before the list_add in
|
* following the spin_unlock to happen before the list_add in
|
||||||
* __add_wait_queue.
|
* __add_wait_queue.
|
||||||
*/
|
*/
|
||||||
set_current_state(return_to_userland ? TASK_INTERRUPTIBLE :
|
set_current_state(blocking_state);
|
||||||
TASK_KILLABLE);
|
|
||||||
spin_unlock(&ctx->fault_pending_wqh.lock);
|
spin_unlock(&ctx->fault_pending_wqh.lock);
|
||||||
|
|
||||||
must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
|
must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
|
||||||
@ -364,6 +374,29 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
|
|||||||
wake_up_poll(&ctx->fd_wqh, POLLIN);
|
wake_up_poll(&ctx->fd_wqh, POLLIN);
|
||||||
schedule();
|
schedule();
|
||||||
ret |= VM_FAULT_MAJOR;
|
ret |= VM_FAULT_MAJOR;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* False wakeups can orginate even from rwsem before
|
||||||
|
* up_read() however userfaults will wait either for a
|
||||||
|
* targeted wakeup on the specific uwq waitqueue from
|
||||||
|
* wake_userfault() or for signals or for uffd
|
||||||
|
* release.
|
||||||
|
*/
|
||||||
|
while (!READ_ONCE(uwq.waken)) {
|
||||||
|
/*
|
||||||
|
* This needs the full smp_store_mb()
|
||||||
|
* guarantee as the state write must be
|
||||||
|
* visible to other CPUs before reading
|
||||||
|
* uwq.waken from other CPUs.
|
||||||
|
*/
|
||||||
|
set_current_state(blocking_state);
|
||||||
|
if (READ_ONCE(uwq.waken) ||
|
||||||
|
READ_ONCE(ctx->released) ||
|
||||||
|
(return_to_userland ? signal_pending(current) :
|
||||||
|
fatal_signal_pending(current)))
|
||||||
|
break;
|
||||||
|
schedule();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__set_current_state(TASK_RUNNING);
|
__set_current_state(TASK_RUNNING);
|
||||||
|
@ -284,7 +284,7 @@ extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
|
|||||||
unsigned long map_offset);
|
unsigned long map_offset);
|
||||||
extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
|
extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
|
||||||
unsigned long pnum);
|
unsigned long pnum);
|
||||||
extern int zone_can_shift(unsigned long pfn, unsigned long nr_pages,
|
extern bool zone_can_shift(unsigned long pfn, unsigned long nr_pages,
|
||||||
enum zone_type target);
|
enum zone_type target, int *zone_shift);
|
||||||
|
|
||||||
#endif /* __LINUX_MEMORY_HOTPLUG_H */
|
#endif /* __LINUX_MEMORY_HOTPLUG_H */
|
||||||
|
@ -972,12 +972,16 @@ static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z,
|
|||||||
* @zonelist - The zonelist to search for a suitable zone
|
* @zonelist - The zonelist to search for a suitable zone
|
||||||
* @highest_zoneidx - The zone index of the highest zone to return
|
* @highest_zoneidx - The zone index of the highest zone to return
|
||||||
* @nodes - An optional nodemask to filter the zonelist with
|
* @nodes - An optional nodemask to filter the zonelist with
|
||||||
* @zone - The first suitable zone found is returned via this parameter
|
* @return - Zoneref pointer for the first suitable zone found (see below)
|
||||||
*
|
*
|
||||||
* This function returns the first zone at or below a given zone index that is
|
* This function returns the first zone at or below a given zone index that is
|
||||||
* within the allowed nodemask. The zoneref returned is a cursor that can be
|
* within the allowed nodemask. The zoneref returned is a cursor that can be
|
||||||
* used to iterate the zonelist with next_zones_zonelist by advancing it by
|
* used to iterate the zonelist with next_zones_zonelist by advancing it by
|
||||||
* one before calling.
|
* one before calling.
|
||||||
|
*
|
||||||
|
* When no eligible zone is found, zoneref->zone is NULL (zoneref itself is
|
||||||
|
* never NULL). This may happen either genuinely, or due to concurrent nodemask
|
||||||
|
* update due to cpuset modification.
|
||||||
*/
|
*/
|
||||||
static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
|
static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
|
||||||
enum zone_type highest_zoneidx,
|
enum zone_type highest_zoneidx,
|
||||||
|
@ -110,6 +110,7 @@ extern int watchdog_user_enabled;
|
|||||||
extern int watchdog_thresh;
|
extern int watchdog_thresh;
|
||||||
extern unsigned long watchdog_enabled;
|
extern unsigned long watchdog_enabled;
|
||||||
extern unsigned long *watchdog_cpumask_bits;
|
extern unsigned long *watchdog_cpumask_bits;
|
||||||
|
extern atomic_t watchdog_park_in_progress;
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
extern int sysctl_softlockup_all_cpu_backtrace;
|
extern int sysctl_softlockup_all_cpu_backtrace;
|
||||||
extern int sysctl_hardlockup_all_cpu_backtrace;
|
extern int sysctl_hardlockup_all_cpu_backtrace;
|
||||||
|
@ -249,7 +249,7 @@ void panic(const char *fmt, ...)
|
|||||||
* Delay timeout seconds before rebooting the machine.
|
* Delay timeout seconds before rebooting the machine.
|
||||||
* We can't use the "normal" timers since we just panicked.
|
* We can't use the "normal" timers since we just panicked.
|
||||||
*/
|
*/
|
||||||
pr_emerg("Rebooting in %d seconds..", panic_timeout);
|
pr_emerg("Rebooting in %d seconds..\n", panic_timeout);
|
||||||
|
|
||||||
for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) {
|
for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) {
|
||||||
touch_nmi_watchdog();
|
touch_nmi_watchdog();
|
||||||
|
@ -49,6 +49,8 @@ unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
|
|||||||
#define for_each_watchdog_cpu(cpu) \
|
#define for_each_watchdog_cpu(cpu) \
|
||||||
for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
|
for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
|
||||||
|
|
||||||
|
atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The 'watchdog_running' variable is set to 1 when the watchdog threads
|
* The 'watchdog_running' variable is set to 1 when the watchdog threads
|
||||||
* are registered/started and is set to 0 when the watchdog threads are
|
* are registered/started and is set to 0 when the watchdog threads are
|
||||||
@ -260,6 +262,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
|
|||||||
int duration;
|
int duration;
|
||||||
int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
|
int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
|
||||||
|
|
||||||
|
if (atomic_read(&watchdog_park_in_progress) != 0)
|
||||||
|
return HRTIMER_NORESTART;
|
||||||
|
|
||||||
/* kick the hardlockup detector */
|
/* kick the hardlockup detector */
|
||||||
watchdog_interrupt_count();
|
watchdog_interrupt_count();
|
||||||
|
|
||||||
@ -467,12 +472,16 @@ static int watchdog_park_threads(void)
|
|||||||
{
|
{
|
||||||
int cpu, ret = 0;
|
int cpu, ret = 0;
|
||||||
|
|
||||||
|
atomic_set(&watchdog_park_in_progress, 1);
|
||||||
|
|
||||||
for_each_watchdog_cpu(cpu) {
|
for_each_watchdog_cpu(cpu) {
|
||||||
ret = kthread_park(per_cpu(softlockup_watchdog, cpu));
|
ret = kthread_park(per_cpu(softlockup_watchdog, cpu));
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
atomic_set(&watchdog_park_in_progress, 0);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -84,6 +84,9 @@ static void watchdog_overflow_callback(struct perf_event *event,
|
|||||||
/* Ensure the watchdog never gets throttled */
|
/* Ensure the watchdog never gets throttled */
|
||||||
event->hw.interrupts = 0;
|
event->hw.interrupts = 0;
|
||||||
|
|
||||||
|
if (atomic_read(&watchdog_park_in_progress) != 0)
|
||||||
|
return;
|
||||||
|
|
||||||
if (__this_cpu_read(watchdog_nmi_touch) == true) {
|
if (__this_cpu_read(watchdog_nmi_touch) == true) {
|
||||||
__this_cpu_write(watchdog_nmi_touch, false);
|
__this_cpu_write(watchdog_nmi_touch, false);
|
||||||
return;
|
return;
|
||||||
|
@ -144,4 +144,3 @@ int ioremap_page_range(unsigned long addr,
|
|||||||
|
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(ioremap_page_range);
|
|
||||||
|
@ -769,7 +769,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
|
|||||||
struct radix_tree_node *old = child;
|
struct radix_tree_node *old = child;
|
||||||
offset = child->offset + 1;
|
offset = child->offset + 1;
|
||||||
child = child->parent;
|
child = child->parent;
|
||||||
WARN_ON_ONCE(!list_empty(&node->private_list));
|
WARN_ON_ONCE(!list_empty(&old->private_list));
|
||||||
radix_tree_node_free(old);
|
radix_tree_node_free(old);
|
||||||
if (old == entry_to_node(node))
|
if (old == entry_to_node(node))
|
||||||
return;
|
return;
|
||||||
|
@ -783,6 +783,12 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
|
|||||||
|
|
||||||
assert_spin_locked(pmd_lockptr(mm, pmd));
|
assert_spin_locked(pmd_lockptr(mm, pmd));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When we COW a devmap PMD entry, we split it into PTEs, so we should
|
||||||
|
* not be in this function with `flags & FOLL_COW` set.
|
||||||
|
*/
|
||||||
|
WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set");
|
||||||
|
|
||||||
if (flags & FOLL_WRITE && !pmd_write(*pmd))
|
if (flags & FOLL_WRITE && !pmd_write(*pmd))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
@ -1128,6 +1134,16 @@ out_unlock:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FOLL_FORCE can write to even unwritable pmd's, but only
|
||||||
|
* after we've gone through a COW cycle and they are dirty.
|
||||||
|
*/
|
||||||
|
static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
|
||||||
|
{
|
||||||
|
return pmd_write(pmd) ||
|
||||||
|
((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
|
||||||
|
}
|
||||||
|
|
||||||
struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
|
struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
|
||||||
unsigned long addr,
|
unsigned long addr,
|
||||||
pmd_t *pmd,
|
pmd_t *pmd,
|
||||||
@ -1138,7 +1154,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
|
|||||||
|
|
||||||
assert_spin_locked(pmd_lockptr(mm, pmd));
|
assert_spin_locked(pmd_lockptr(mm, pmd));
|
||||||
|
|
||||||
if (flags & FOLL_WRITE && !pmd_write(*pmd))
|
if (flags & FOLL_WRITE && !can_follow_write_pmd(*pmd, flags))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
/* Avoid dumping huge zero page */
|
/* Avoid dumping huge zero page */
|
||||||
|
@ -4353,9 +4353,9 @@ static int mem_cgroup_do_precharge(unsigned long count)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Try charges one by one with reclaim */
|
/* Try charges one by one with reclaim, but do not retry */
|
||||||
while (count--) {
|
while (count--) {
|
||||||
ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, 1);
|
ret = try_charge(mc.to, GFP_KERNEL | __GFP_NORETRY, 1);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
mc.precharge++;
|
mc.precharge++;
|
||||||
|
@ -1033,36 +1033,39 @@ static void node_states_set_node(int node, struct memory_notify *arg)
|
|||||||
node_set_state(node, N_MEMORY);
|
node_set_state(node, N_MEMORY);
|
||||||
}
|
}
|
||||||
|
|
||||||
int zone_can_shift(unsigned long pfn, unsigned long nr_pages,
|
bool zone_can_shift(unsigned long pfn, unsigned long nr_pages,
|
||||||
enum zone_type target)
|
enum zone_type target, int *zone_shift)
|
||||||
{
|
{
|
||||||
struct zone *zone = page_zone(pfn_to_page(pfn));
|
struct zone *zone = page_zone(pfn_to_page(pfn));
|
||||||
enum zone_type idx = zone_idx(zone);
|
enum zone_type idx = zone_idx(zone);
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
*zone_shift = 0;
|
||||||
|
|
||||||
if (idx < target) {
|
if (idx < target) {
|
||||||
/* pages must be at end of current zone */
|
/* pages must be at end of current zone */
|
||||||
if (pfn + nr_pages != zone_end_pfn(zone))
|
if (pfn + nr_pages != zone_end_pfn(zone))
|
||||||
return 0;
|
return false;
|
||||||
|
|
||||||
/* no zones in use between current zone and target */
|
/* no zones in use between current zone and target */
|
||||||
for (i = idx + 1; i < target; i++)
|
for (i = idx + 1; i < target; i++)
|
||||||
if (zone_is_initialized(zone - idx + i))
|
if (zone_is_initialized(zone - idx + i))
|
||||||
return 0;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (target < idx) {
|
if (target < idx) {
|
||||||
/* pages must be at beginning of current zone */
|
/* pages must be at beginning of current zone */
|
||||||
if (pfn != zone->zone_start_pfn)
|
if (pfn != zone->zone_start_pfn)
|
||||||
return 0;
|
return false;
|
||||||
|
|
||||||
/* no zones in use between current zone and target */
|
/* no zones in use between current zone and target */
|
||||||
for (i = target + 1; i < idx; i++)
|
for (i = target + 1; i < idx; i++)
|
||||||
if (zone_is_initialized(zone - idx + i))
|
if (zone_is_initialized(zone - idx + i))
|
||||||
return 0;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return target - idx;
|
*zone_shift = target - idx;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Must be protected by mem_hotplug_begin() */
|
/* Must be protected by mem_hotplug_begin() */
|
||||||
@ -1089,10 +1092,13 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
|
|||||||
!can_online_high_movable(zone))
|
!can_online_high_movable(zone))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (online_type == MMOP_ONLINE_KERNEL)
|
if (online_type == MMOP_ONLINE_KERNEL) {
|
||||||
zone_shift = zone_can_shift(pfn, nr_pages, ZONE_NORMAL);
|
if (!zone_can_shift(pfn, nr_pages, ZONE_NORMAL, &zone_shift))
|
||||||
else if (online_type == MMOP_ONLINE_MOVABLE)
|
return -EINVAL;
|
||||||
zone_shift = zone_can_shift(pfn, nr_pages, ZONE_MOVABLE);
|
} else if (online_type == MMOP_ONLINE_MOVABLE) {
|
||||||
|
if (!zone_can_shift(pfn, nr_pages, ZONE_MOVABLE, &zone_shift))
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
zone = move_pfn_range(zone_shift, pfn, pfn + nr_pages);
|
zone = move_pfn_range(zone_shift, pfn, pfn + nr_pages);
|
||||||
if (!zone)
|
if (!zone)
|
||||||
|
@ -2017,8 +2017,8 @@ retry_cpuset:
|
|||||||
|
|
||||||
nmask = policy_nodemask(gfp, pol);
|
nmask = policy_nodemask(gfp, pol);
|
||||||
zl = policy_zonelist(gfp, pol, node);
|
zl = policy_zonelist(gfp, pol, node);
|
||||||
mpol_cond_put(pol);
|
|
||||||
page = __alloc_pages_nodemask(gfp, order, zl, nmask);
|
page = __alloc_pages_nodemask(gfp, order, zl, nmask);
|
||||||
|
mpol_cond_put(pol);
|
||||||
out:
|
out:
|
||||||
if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
|
if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
|
||||||
goto retry_cpuset;
|
goto retry_cpuset;
|
||||||
|
@ -3523,12 +3523,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
|
|||||||
struct page *page = NULL;
|
struct page *page = NULL;
|
||||||
unsigned int alloc_flags;
|
unsigned int alloc_flags;
|
||||||
unsigned long did_some_progress;
|
unsigned long did_some_progress;
|
||||||
enum compact_priority compact_priority = DEF_COMPACT_PRIORITY;
|
enum compact_priority compact_priority;
|
||||||
enum compact_result compact_result;
|
enum compact_result compact_result;
|
||||||
int compaction_retries = 0;
|
int compaction_retries;
|
||||||
int no_progress_loops = 0;
|
int no_progress_loops;
|
||||||
unsigned long alloc_start = jiffies;
|
unsigned long alloc_start = jiffies;
|
||||||
unsigned int stall_timeout = 10 * HZ;
|
unsigned int stall_timeout = 10 * HZ;
|
||||||
|
unsigned int cpuset_mems_cookie;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In the slowpath, we sanity check order to avoid ever trying to
|
* In the slowpath, we sanity check order to avoid ever trying to
|
||||||
@ -3549,6 +3550,23 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
|
|||||||
(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
|
(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
|
||||||
gfp_mask &= ~__GFP_ATOMIC;
|
gfp_mask &= ~__GFP_ATOMIC;
|
||||||
|
|
||||||
|
retry_cpuset:
|
||||||
|
compaction_retries = 0;
|
||||||
|
no_progress_loops = 0;
|
||||||
|
compact_priority = DEF_COMPACT_PRIORITY;
|
||||||
|
cpuset_mems_cookie = read_mems_allowed_begin();
|
||||||
|
/*
|
||||||
|
* We need to recalculate the starting point for the zonelist iterator
|
||||||
|
* because we might have used different nodemask in the fast path, or
|
||||||
|
* there was a cpuset modification and we are retrying - otherwise we
|
||||||
|
* could end up iterating over non-eligible zones endlessly.
|
||||||
|
*/
|
||||||
|
ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
|
||||||
|
ac->high_zoneidx, ac->nodemask);
|
||||||
|
if (!ac->preferred_zoneref->zone)
|
||||||
|
goto nopage;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The fast path uses conservative alloc_flags to succeed only until
|
* The fast path uses conservative alloc_flags to succeed only until
|
||||||
* kswapd needs to be woken up, and to avoid the cost of setting up
|
* kswapd needs to be woken up, and to avoid the cost of setting up
|
||||||
@ -3708,6 +3726,13 @@ retry:
|
|||||||
&compaction_retries))
|
&compaction_retries))
|
||||||
goto retry;
|
goto retry;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* It's possible we raced with cpuset update so the OOM would be
|
||||||
|
* premature (see below the nopage: label for full explanation).
|
||||||
|
*/
|
||||||
|
if (read_mems_allowed_retry(cpuset_mems_cookie))
|
||||||
|
goto retry_cpuset;
|
||||||
|
|
||||||
/* Reclaim has failed us, start killing things */
|
/* Reclaim has failed us, start killing things */
|
||||||
page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress);
|
page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress);
|
||||||
if (page)
|
if (page)
|
||||||
@ -3720,6 +3745,16 @@ retry:
|
|||||||
}
|
}
|
||||||
|
|
||||||
nopage:
|
nopage:
|
||||||
|
/*
|
||||||
|
* When updating a task's mems_allowed or mempolicy nodemask, it is
|
||||||
|
* possible to race with parallel threads in such a way that our
|
||||||
|
* allocation can fail while the mask is being updated. If we are about
|
||||||
|
* to fail, check if the cpuset changed during allocation and if so,
|
||||||
|
* retry.
|
||||||
|
*/
|
||||||
|
if (read_mems_allowed_retry(cpuset_mems_cookie))
|
||||||
|
goto retry_cpuset;
|
||||||
|
|
||||||
warn_alloc(gfp_mask,
|
warn_alloc(gfp_mask,
|
||||||
"page allocation failure: order:%u", order);
|
"page allocation failure: order:%u", order);
|
||||||
got_pg:
|
got_pg:
|
||||||
@ -3734,7 +3769,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
|
|||||||
struct zonelist *zonelist, nodemask_t *nodemask)
|
struct zonelist *zonelist, nodemask_t *nodemask)
|
||||||
{
|
{
|
||||||
struct page *page;
|
struct page *page;
|
||||||
unsigned int cpuset_mems_cookie;
|
|
||||||
unsigned int alloc_flags = ALLOC_WMARK_LOW;
|
unsigned int alloc_flags = ALLOC_WMARK_LOW;
|
||||||
gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */
|
gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */
|
||||||
struct alloc_context ac = {
|
struct alloc_context ac = {
|
||||||
@ -3771,9 +3805,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
|
|||||||
if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE)
|
if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE)
|
||||||
alloc_flags |= ALLOC_CMA;
|
alloc_flags |= ALLOC_CMA;
|
||||||
|
|
||||||
retry_cpuset:
|
|
||||||
cpuset_mems_cookie = read_mems_allowed_begin();
|
|
||||||
|
|
||||||
/* Dirty zone balancing only done in the fast path */
|
/* Dirty zone balancing only done in the fast path */
|
||||||
ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE);
|
ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE);
|
||||||
|
|
||||||
@ -3784,8 +3815,13 @@ retry_cpuset:
|
|||||||
*/
|
*/
|
||||||
ac.preferred_zoneref = first_zones_zonelist(ac.zonelist,
|
ac.preferred_zoneref = first_zones_zonelist(ac.zonelist,
|
||||||
ac.high_zoneidx, ac.nodemask);
|
ac.high_zoneidx, ac.nodemask);
|
||||||
if (!ac.preferred_zoneref) {
|
if (!ac.preferred_zoneref->zone) {
|
||||||
page = NULL;
|
page = NULL;
|
||||||
|
/*
|
||||||
|
* This might be due to race with cpuset_current_mems_allowed
|
||||||
|
* update, so make sure we retry with original nodemask in the
|
||||||
|
* slow path.
|
||||||
|
*/
|
||||||
goto no_zone;
|
goto no_zone;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3794,6 +3830,7 @@ retry_cpuset:
|
|||||||
if (likely(page))
|
if (likely(page))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
no_zone:
|
||||||
/*
|
/*
|
||||||
* Runtime PM, block IO and its error handling path can deadlock
|
* Runtime PM, block IO and its error handling path can deadlock
|
||||||
* because I/O on the device might not complete.
|
* because I/O on the device might not complete.
|
||||||
@ -3805,21 +3842,10 @@ retry_cpuset:
|
|||||||
* Restore the original nodemask if it was potentially replaced with
|
* Restore the original nodemask if it was potentially replaced with
|
||||||
* &cpuset_current_mems_allowed to optimize the fast-path attempt.
|
* &cpuset_current_mems_allowed to optimize the fast-path attempt.
|
||||||
*/
|
*/
|
||||||
if (cpusets_enabled())
|
if (unlikely(ac.nodemask != nodemask))
|
||||||
ac.nodemask = nodemask;
|
ac.nodemask = nodemask;
|
||||||
page = __alloc_pages_slowpath(alloc_mask, order, &ac);
|
|
||||||
|
|
||||||
no_zone:
|
page = __alloc_pages_slowpath(alloc_mask, order, &ac);
|
||||||
/*
|
|
||||||
* When updating a task's mems_allowed, it is possible to race with
|
|
||||||
* parallel threads in such a way that an allocation can fail while
|
|
||||||
* the mask is being updated. If a page allocation is about to fail,
|
|
||||||
* check if the cpuset changed during allocation and if so, retry.
|
|
||||||
*/
|
|
||||||
if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) {
|
|
||||||
alloc_mask = gfp_mask;
|
|
||||||
goto retry_cpuset;
|
|
||||||
}
|
|
||||||
|
|
||||||
out:
|
out:
|
||||||
if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page &&
|
if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page &&
|
||||||
@ -7248,6 +7274,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
|
|||||||
.zone = page_zone(pfn_to_page(start)),
|
.zone = page_zone(pfn_to_page(start)),
|
||||||
.mode = MIGRATE_SYNC,
|
.mode = MIGRATE_SYNC,
|
||||||
.ignore_skip_hint = true,
|
.ignore_skip_hint = true,
|
||||||
|
.gfp_mask = GFP_KERNEL,
|
||||||
};
|
};
|
||||||
INIT_LIST_HEAD(&cc.migratepages);
|
INIT_LIST_HEAD(&cc.migratepages);
|
||||||
|
|
||||||
|
23
mm/slub.c
23
mm/slub.c
@ -496,10 +496,11 @@ static inline int check_valid_pointer(struct kmem_cache *s,
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void print_section(char *text, u8 *addr, unsigned int length)
|
static void print_section(char *level, char *text, u8 *addr,
|
||||||
|
unsigned int length)
|
||||||
{
|
{
|
||||||
metadata_access_enable();
|
metadata_access_enable();
|
||||||
print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
|
print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
|
||||||
length, 1);
|
length, 1);
|
||||||
metadata_access_disable();
|
metadata_access_disable();
|
||||||
}
|
}
|
||||||
@ -636,14 +637,15 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
|
|||||||
p, p - addr, get_freepointer(s, p));
|
p, p - addr, get_freepointer(s, p));
|
||||||
|
|
||||||
if (s->flags & SLAB_RED_ZONE)
|
if (s->flags & SLAB_RED_ZONE)
|
||||||
print_section("Redzone ", p - s->red_left_pad, s->red_left_pad);
|
print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
|
||||||
|
s->red_left_pad);
|
||||||
else if (p > addr + 16)
|
else if (p > addr + 16)
|
||||||
print_section("Bytes b4 ", p - 16, 16);
|
print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
|
||||||
|
|
||||||
print_section("Object ", p, min_t(unsigned long, s->object_size,
|
print_section(KERN_ERR, "Object ", p,
|
||||||
PAGE_SIZE));
|
min_t(unsigned long, s->object_size, PAGE_SIZE));
|
||||||
if (s->flags & SLAB_RED_ZONE)
|
if (s->flags & SLAB_RED_ZONE)
|
||||||
print_section("Redzone ", p + s->object_size,
|
print_section(KERN_ERR, "Redzone ", p + s->object_size,
|
||||||
s->inuse - s->object_size);
|
s->inuse - s->object_size);
|
||||||
|
|
||||||
if (s->offset)
|
if (s->offset)
|
||||||
@ -658,7 +660,8 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
|
|||||||
|
|
||||||
if (off != size_from_object(s))
|
if (off != size_from_object(s))
|
||||||
/* Beginning of the filler is the free pointer */
|
/* Beginning of the filler is the free pointer */
|
||||||
print_section("Padding ", p + off, size_from_object(s) - off);
|
print_section(KERN_ERR, "Padding ", p + off,
|
||||||
|
size_from_object(s) - off);
|
||||||
|
|
||||||
dump_stack();
|
dump_stack();
|
||||||
}
|
}
|
||||||
@ -820,7 +823,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
|
|||||||
end--;
|
end--;
|
||||||
|
|
||||||
slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
|
slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
|
||||||
print_section("Padding ", end - remainder, remainder);
|
print_section(KERN_ERR, "Padding ", end - remainder, remainder);
|
||||||
|
|
||||||
restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
|
restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
|
||||||
return 0;
|
return 0;
|
||||||
@ -973,7 +976,7 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
|
|||||||
page->freelist);
|
page->freelist);
|
||||||
|
|
||||||
if (!alloc)
|
if (!alloc)
|
||||||
print_section("Object ", (void *)object,
|
print_section(KERN_INFO, "Object ", (void *)object,
|
||||||
s->object_size);
|
s->object_size);
|
||||||
|
|
||||||
dump_stack();
|
dump_stack();
|
||||||
|
Loading…
Reference in New Issue
Block a user