mirror of
https://github.com/torvalds/linux.git
synced 2024-11-11 14:42:24 +00:00
Merge branch 'akpm' (more patches from Andrew)
Merge patches from Andrew Morton: "Most of the rest of MM, plus a few dribs and drabs. I still have quite a few irritating patches left around: ones with dubious testing results, lack of review, ones which should have gone via maintainer trees but the maintainers are slack, etc. I need to be more activist in getting these things wrapped up outside the merge window, but they're such a PITA." * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (48 commits) mm/vmscan.c: avoid possible deadlock caused by too_many_isolated() vmscan: comment too_many_isolated() mm/kmemleak.c: remove obsolete simple_strtoul mm/memory_hotplug.c: improve comments mm/hugetlb: create hugetlb cgroup file in hugetlb_init mm/mprotect.c: coding-style cleanups Documentation: ABI: /sys/devices/system/node/ slub: drop mutex before deleting sysfs entry memcg: add comments clarifying aspects of cache attribute propagation kmem: add slab-specific documentation about the kmem controller slub: slub-specific propagation changes slab: propagate tunable values memcg: aggregate memcg cache values in slabinfo memcg/sl[au]b: shrink dead caches memcg/sl[au]b: track all the memcg children of a kmem_cache memcg: destroy memcg caches sl[au]b: allocate objects from memcg cache sl[au]b: always get the cache from its page in kmem_cache_free() memcg: skip memcg kmem allocations in specified code regions memcg: infrastructure to match an allocation to the right cache ...
This commit is contained in:
commit
673ab8783b
@ -1,7 +1,101 @@
|
||||
What: /sys/devices/system/node/possible
|
||||
Date: October 2002
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
Nodes that could be possibly become online at some point.
|
||||
|
||||
What: /sys/devices/system/node/online
|
||||
Date: October 2002
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
Nodes that are online.
|
||||
|
||||
What: /sys/devices/system/node/has_normal_memory
|
||||
Date: October 2002
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
Nodes that have regular memory.
|
||||
|
||||
What: /sys/devices/system/node/has_cpu
|
||||
Date: October 2002
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
Nodes that have one or more CPUs.
|
||||
|
||||
What: /sys/devices/system/node/has_high_memory
|
||||
Date: October 2002
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
Nodes that have regular or high memory.
|
||||
Depends on CONFIG_HIGHMEM.
|
||||
|
||||
What: /sys/devices/system/node/nodeX
|
||||
Date: October 2002
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
When CONFIG_NUMA is enabled, this is a directory containing
|
||||
information on node X such as what CPUs are local to the
|
||||
node.
|
||||
node. Each file is detailed next.
|
||||
|
||||
What: /sys/devices/system/node/nodeX/cpumap
|
||||
Date: October 2002
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
The node's cpumap.
|
||||
|
||||
What: /sys/devices/system/node/nodeX/cpulist
|
||||
Date: October 2002
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
The CPUs associated to the node.
|
||||
|
||||
What: /sys/devices/system/node/nodeX/meminfo
|
||||
Date: October 2002
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
Provides information about the node's distribution and memory
|
||||
utilization. Similar to /proc/meminfo, see Documentation/filesystems/proc.txt
|
||||
|
||||
What: /sys/devices/system/node/nodeX/numastat
|
||||
Date: October 2002
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
The node's hit/miss statistics, in units of pages.
|
||||
See Documentation/numastat.txt
|
||||
|
||||
What: /sys/devices/system/node/nodeX/distance
|
||||
Date: October 2002
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
Distance between the node and all the other nodes
|
||||
in the system.
|
||||
|
||||
What: /sys/devices/system/node/nodeX/vmstat
|
||||
Date: October 2002
|
||||
Contact: Linux Memory Management list <linux-mm@kvack.org>
|
||||
Description:
|
||||
The node's zoned virtual memory statistics.
|
||||
This is a superset of numastat.
|
||||
|
||||
What: /sys/devices/system/node/nodeX/compact
|
||||
Date: February 2010
|
||||
Contact: Mel Gorman <mel@csn.ul.ie>
|
||||
Description:
|
||||
When this file is written to, all memory within that node
|
||||
will be compacted. When it completes, memory will be freed
|
||||
into blocks which have as many contiguous pages as possible
|
||||
|
||||
What: /sys/devices/system/node/nodeX/scan_unevictable_pages
|
||||
Date: October 2008
|
||||
Contact: Lee Schermerhorn <lee.schermerhorn@hp.com>
|
||||
Description:
|
||||
When set, it triggers scanning the node's unevictable lists
|
||||
and move any pages that have become evictable onto the respective
|
||||
zone's inactive list. See mm/vmscan.c
|
||||
|
||||
What: /sys/devices/system/node/nodeX/hugepages/hugepages-<size>/
|
||||
Date: December 2009
|
||||
Contact: Lee Schermerhorn <lee.schermerhorn@hp.com>
|
||||
Description:
|
||||
The node's huge page size control/query attributes.
|
||||
See Documentation/vm/hugetlbpage.txt
|
@ -71,6 +71,11 @@ Brief summary of control files.
|
||||
memory.oom_control # set/show oom controls.
|
||||
memory.numa_stat # show the number of memory usage per numa node
|
||||
|
||||
memory.kmem.limit_in_bytes # set/show hard limit for kernel memory
|
||||
memory.kmem.usage_in_bytes # show current kernel memory allocation
|
||||
memory.kmem.failcnt # show the number of kernel memory usage hits limits
|
||||
memory.kmem.max_usage_in_bytes # show max kernel memory usage recorded
|
||||
|
||||
memory.kmem.tcp.limit_in_bytes # set/show hard limit for tcp buf memory
|
||||
memory.kmem.tcp.usage_in_bytes # show current tcp buf memory allocation
|
||||
memory.kmem.tcp.failcnt # show the number of tcp buf memory usage hits limits
|
||||
@ -268,20 +273,73 @@ the amount of kernel memory used by the system. Kernel memory is fundamentally
|
||||
different than user memory, since it can't be swapped out, which makes it
|
||||
possible to DoS the system by consuming too much of this precious resource.
|
||||
|
||||
Kernel memory won't be accounted at all until limit on a group is set. This
|
||||
allows for existing setups to continue working without disruption. The limit
|
||||
cannot be set if the cgroup have children, or if there are already tasks in the
|
||||
cgroup. Attempting to set the limit under those conditions will return -EBUSY.
|
||||
When use_hierarchy == 1 and a group is accounted, its children will
|
||||
automatically be accounted regardless of their limit value.
|
||||
|
||||
After a group is first limited, it will be kept being accounted until it
|
||||
is removed. The memory limitation itself, can of course be removed by writing
|
||||
-1 to memory.kmem.limit_in_bytes. In this case, kmem will be accounted, but not
|
||||
limited.
|
||||
|
||||
Kernel memory limits are not imposed for the root cgroup. Usage for the root
|
||||
cgroup may or may not be accounted.
|
||||
cgroup may or may not be accounted. The memory used is accumulated into
|
||||
memory.kmem.usage_in_bytes, or in a separate counter when it makes sense.
|
||||
(currently only for tcp).
|
||||
The main "kmem" counter is fed into the main counter, so kmem charges will
|
||||
also be visible from the user counter.
|
||||
|
||||
Currently no soft limit is implemented for kernel memory. It is future work
|
||||
to trigger slab reclaim when those limits are reached.
|
||||
|
||||
2.7.1 Current Kernel Memory resources accounted
|
||||
|
||||
* stack pages: every process consumes some stack pages. By accounting into
|
||||
kernel memory, we prevent new processes from being created when the kernel
|
||||
memory usage is too high.
|
||||
|
||||
* slab pages: pages allocated by the SLAB or SLUB allocator are tracked. A copy
|
||||
of each kmem_cache is created everytime the cache is touched by the first time
|
||||
from inside the memcg. The creation is done lazily, so some objects can still be
|
||||
skipped while the cache is being created. All objects in a slab page should
|
||||
belong to the same memcg. This only fails to hold when a task is migrated to a
|
||||
different memcg during the page allocation by the cache.
|
||||
|
||||
* sockets memory pressure: some sockets protocols have memory pressure
|
||||
thresholds. The Memory Controller allows them to be controlled individually
|
||||
per cgroup, instead of globally.
|
||||
|
||||
* tcp memory pressure: sockets memory pressure for the tcp protocol.
|
||||
|
||||
2.7.3 Common use cases
|
||||
|
||||
Because the "kmem" counter is fed to the main user counter, kernel memory can
|
||||
never be limited completely independently of user memory. Say "U" is the user
|
||||
limit, and "K" the kernel limit. There are three possible ways limits can be
|
||||
set:
|
||||
|
||||
U != 0, K = unlimited:
|
||||
This is the standard memcg limitation mechanism already present before kmem
|
||||
accounting. Kernel memory is completely ignored.
|
||||
|
||||
U != 0, K < U:
|
||||
Kernel memory is a subset of the user memory. This setup is useful in
|
||||
deployments where the total amount of memory per-cgroup is overcommited.
|
||||
Overcommiting kernel memory limits is definitely not recommended, since the
|
||||
box can still run out of non-reclaimable memory.
|
||||
In this case, the admin could set up K so that the sum of all groups is
|
||||
never greater than the total memory, and freely set U at the cost of his
|
||||
QoS.
|
||||
|
||||
U != 0, K >= U:
|
||||
Since kmem charges will also be fed to the user counter and reclaim will be
|
||||
triggered for the cgroup for both kinds of memory. This setup gives the
|
||||
admin a unified view of memory, and it is also useful for people who just
|
||||
want to track kernel memory usage.
|
||||
|
||||
3. User Interface
|
||||
|
||||
0. Configuration
|
||||
@ -290,6 +348,7 @@ a. Enable CONFIG_CGROUPS
|
||||
b. Enable CONFIG_RESOURCE_COUNTERS
|
||||
c. Enable CONFIG_MEMCG
|
||||
d. Enable CONFIG_MEMCG_SWAP (to use swap extension)
|
||||
d. Enable CONFIG_MEMCG_KMEM (to use kmem extension)
|
||||
|
||||
1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?)
|
||||
# mount -t tmpfs none /sys/fs/cgroup
|
||||
@ -406,6 +465,11 @@ About use_hierarchy, see Section 6.
|
||||
Because rmdir() moves all pages to parent, some out-of-use page caches can be
|
||||
moved to the parent. If you want to avoid that, force_empty will be useful.
|
||||
|
||||
Also, note that when memory.kmem.limit_in_bytes is set the charges due to
|
||||
kernel pages will still be seen. This is not considered a failure and the
|
||||
write will still return success. In this case, it is expected that
|
||||
memory.kmem.usage_in_bytes == memory.usage_in_bytes.
|
||||
|
||||
About use_hierarchy, see Section 6.
|
||||
|
||||
5.2 stat file
|
||||
|
@ -83,16 +83,17 @@ to work with it.
|
||||
res_counter->lock internally (it must be called with res_counter->lock
|
||||
held). The force parameter indicates whether we can bypass the limit.
|
||||
|
||||
e. void res_counter_uncharge[_locked]
|
||||
e. u64 res_counter_uncharge[_locked]
|
||||
(struct res_counter *rc, unsigned long val)
|
||||
|
||||
When a resource is released (freed) it should be de-accounted
|
||||
from the resource counter it was accounted to. This is called
|
||||
"uncharging".
|
||||
"uncharging". The return value of this function indicate the amount
|
||||
of charges still present in the counter.
|
||||
|
||||
The _locked routines imply that the res_counter->lock is taken.
|
||||
|
||||
f. void res_counter_uncharge_until
|
||||
f. u64 res_counter_uncharge_until
|
||||
(struct res_counter *rc, struct res_counter *top,
|
||||
unsinged long val)
|
||||
|
||||
|
@ -133,12 +133,39 @@ static inline void writel(unsigned int b, volatile void __iomem *addr)
|
||||
#define insb(port,addr,count) (cris_iops ? cris_iops->read_io(port,addr,1,count) : 0)
|
||||
#define insw(port,addr,count) (cris_iops ? cris_iops->read_io(port,addr,2,count) : 0)
|
||||
#define insl(port,addr,count) (cris_iops ? cris_iops->read_io(port,addr,4,count) : 0)
|
||||
#define outb(data,port) if (cris_iops) cris_iops->write_io(port,(void*)(unsigned)data,1,1)
|
||||
#define outw(data,port) if (cris_iops) cris_iops->write_io(port,(void*)(unsigned)data,2,1)
|
||||
#define outl(data,port) if (cris_iops) cris_iops->write_io(port,(void*)(unsigned)data,4,1)
|
||||
#define outsb(port,addr,count) if(cris_iops) cris_iops->write_io(port,(void*)addr,1,count)
|
||||
#define outsw(port,addr,count) if(cris_iops) cris_iops->write_io(port,(void*)addr,2,count)
|
||||
#define outsl(port,addr,count) if(cris_iops) cris_iops->write_io(port,(void*)addr,3,count)
|
||||
static inline void outb(unsigned char data, unsigned int port)
|
||||
{
|
||||
if (cris_iops)
|
||||
cris_iops->write_io(port, (void *) &data, 1, 1);
|
||||
}
|
||||
static inline void outw(unsigned short data, unsigned int port)
|
||||
{
|
||||
if (cris_iops)
|
||||
cris_iops->write_io(port, (void *) &data, 2, 1);
|
||||
}
|
||||
static inline void outl(unsigned int data, unsigned int port)
|
||||
{
|
||||
if (cris_iops)
|
||||
cris_iops->write_io(port, (void *) &data, 4, 1);
|
||||
}
|
||||
static inline void outsb(unsigned int port, const void *addr,
|
||||
unsigned long count)
|
||||
{
|
||||
if (cris_iops)
|
||||
cris_iops->write_io(port, (void *)addr, 1, count);
|
||||
}
|
||||
static inline void outsw(unsigned int port, const void *addr,
|
||||
unsigned long count)
|
||||
{
|
||||
if (cris_iops)
|
||||
cris_iops->write_io(port, (void *)addr, 2, count);
|
||||
}
|
||||
static inline void outsl(unsigned int port, const void *addr,
|
||||
unsigned long count)
|
||||
{
|
||||
if (cris_iops)
|
||||
cris_iops->write_io(port, (void *)addr, 4, count);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
|
||||
|
@ -3,6 +3,7 @@ config H8300
|
||||
default y
|
||||
select HAVE_IDE
|
||||
select HAVE_GENERIC_HARDIRQS
|
||||
select GENERIC_ATOMIC64
|
||||
select HAVE_UID16
|
||||
select ARCH_WANT_IPC_PARSE_VERSION
|
||||
select GENERIC_IRQ_SHOW
|
||||
|
@ -23,6 +23,7 @@
|
||||
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/delay.h>
|
||||
@ -62,29 +63,75 @@ static void iris_power_off(void)
|
||||
* by reading its input port and seeing whether the read value is
|
||||
* meaningful.
|
||||
*/
|
||||
static int iris_init(void)
|
||||
static int iris_probe(struct platform_device *pdev)
|
||||
{
|
||||
unsigned char status;
|
||||
if (force != 1) {
|
||||
printk(KERN_ERR "The force parameter has not been set to 1 so the Iris poweroff handler will not be installed.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
status = inb(IRIS_GIO_INPUT);
|
||||
unsigned char status = inb(IRIS_GIO_INPUT);
|
||||
if (status == IRIS_GIO_NODEV) {
|
||||
printk(KERN_ERR "This machine does not seem to be an Iris. Power_off handler not installed.\n");
|
||||
printk(KERN_ERR "This machine does not seem to be an Iris. "
|
||||
"Power off handler not installed.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
old_pm_power_off = pm_power_off;
|
||||
pm_power_off = &iris_power_off;
|
||||
printk(KERN_INFO "Iris power_off handler installed.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int iris_remove(struct platform_device *pdev)
|
||||
{
|
||||
pm_power_off = old_pm_power_off;
|
||||
printk(KERN_INFO "Iris power_off handler uninstalled.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct platform_driver iris_driver = {
|
||||
.driver = {
|
||||
.name = "iris",
|
||||
.owner = THIS_MODULE,
|
||||
},
|
||||
.probe = iris_probe,
|
||||
.remove = iris_remove,
|
||||
};
|
||||
|
||||
static struct resource iris_resources[] = {
|
||||
{
|
||||
.start = IRIS_GIO_BASE,
|
||||
.end = IRIS_GIO_OUTPUT,
|
||||
.flags = IORESOURCE_IO,
|
||||
.name = "address"
|
||||
}
|
||||
};
|
||||
|
||||
static struct platform_device *iris_device;
|
||||
|
||||
static int iris_init(void)
|
||||
{
|
||||
int ret;
|
||||
if (force != 1) {
|
||||
printk(KERN_ERR "The force parameter has not been set to 1."
|
||||
" The Iris poweroff handler will not be installed.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
ret = platform_driver_register(&iris_driver);
|
||||
if (ret < 0) {
|
||||
printk(KERN_ERR "Failed to register iris platform driver: %d\n",
|
||||
ret);
|
||||
return ret;
|
||||
}
|
||||
iris_device = platform_device_register_simple("iris", (-1),
|
||||
iris_resources, ARRAY_SIZE(iris_resources));
|
||||
if (IS_ERR(iris_device)) {
|
||||
printk(KERN_ERR "Failed to register iris platform device\n");
|
||||
platform_driver_unregister(&iris_driver);
|
||||
return PTR_ERR(iris_device);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void iris_exit(void)
|
||||
{
|
||||
pm_power_off = old_pm_power_off;
|
||||
printk(KERN_INFO "Iris power_off handler uninstalled.\n");
|
||||
platform_device_unregister(iris_device);
|
||||
platform_driver_unregister(&iris_driver);
|
||||
}
|
||||
|
||||
module_init(iris_init);
|
||||
|
@ -792,6 +792,7 @@ mptscsih_io_done(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
|
||||
* than an unsolicited DID_ABORT.
|
||||
*/
|
||||
sc->result = DID_RESET << 16;
|
||||
break;
|
||||
|
||||
case MPI_IOCSTATUS_SCSI_EXT_TERMINATED: /* 0x004C */
|
||||
if (ioc->bus_type == FC)
|
||||
|
@ -107,7 +107,6 @@ void locomolcd_power(int on)
|
||||
}
|
||||
EXPORT_SYMBOL(locomolcd_power);
|
||||
|
||||
|
||||
static int current_intensity;
|
||||
|
||||
static int locomolcd_set_intensity(struct backlight_device *bd)
|
||||
@ -122,13 +121,25 @@ static int locomolcd_set_intensity(struct backlight_device *bd)
|
||||
intensity = 0;
|
||||
|
||||
switch (intensity) {
|
||||
/* AC and non-AC are handled differently, but produce same results in sharp code? */
|
||||
case 0: locomo_frontlight_set(locomolcd_dev, 0, 0, 161); break;
|
||||
case 1: locomo_frontlight_set(locomolcd_dev, 117, 0, 161); break;
|
||||
case 2: locomo_frontlight_set(locomolcd_dev, 163, 0, 148); break;
|
||||
case 3: locomo_frontlight_set(locomolcd_dev, 194, 0, 161); break;
|
||||
case 4: locomo_frontlight_set(locomolcd_dev, 194, 1, 161); break;
|
||||
|
||||
/*
|
||||
* AC and non-AC are handled differently,
|
||||
* but produce same results in sharp code?
|
||||
*/
|
||||
case 0:
|
||||
locomo_frontlight_set(locomolcd_dev, 0, 0, 161);
|
||||
break;
|
||||
case 1:
|
||||
locomo_frontlight_set(locomolcd_dev, 117, 0, 161);
|
||||
break;
|
||||
case 2:
|
||||
locomo_frontlight_set(locomolcd_dev, 163, 0, 148);
|
||||
break;
|
||||
case 3:
|
||||
locomo_frontlight_set(locomolcd_dev, 194, 0, 161);
|
||||
break;
|
||||
case 4:
|
||||
locomo_frontlight_set(locomolcd_dev, 194, 1, 161);
|
||||
break;
|
||||
default:
|
||||
return -ENODEV;
|
||||
}
|
||||
@ -175,9 +186,11 @@ static int locomolcd_probe(struct locomo_dev *ldev)
|
||||
|
||||
locomo_gpio_set_dir(ldev->dev.parent, LOCOMO_GPIO_FL_VR, 0);
|
||||
|
||||
/* the poodle_lcd_power function is called for the first time
|
||||
/*
|
||||
* the poodle_lcd_power function is called for the first time
|
||||
* from fs_initcall, which is before locomo is activated.
|
||||
* We need to recall poodle_lcd_power here*/
|
||||
* We need to recall poodle_lcd_power here
|
||||
*/
|
||||
if (machine_is_poodle())
|
||||
locomolcd_power(1);
|
||||
|
||||
@ -190,8 +203,8 @@ static int locomolcd_probe(struct locomo_dev *ldev)
|
||||
&ldev->dev, NULL,
|
||||
&locomobl_data, &props);
|
||||
|
||||
if (IS_ERR (locomolcd_bl_device))
|
||||
return PTR_ERR (locomolcd_bl_device);
|
||||
if (IS_ERR(locomolcd_bl_device))
|
||||
return PTR_ERR(locomolcd_bl_device);
|
||||
|
||||
/* Set up frontlight so that screen is readable */
|
||||
locomolcd_bl_device->props.brightness = 2;
|
||||
@ -226,7 +239,6 @@ static struct locomo_driver poodle_lcd_driver = {
|
||||
.resume = locomolcd_resume,
|
||||
};
|
||||
|
||||
|
||||
static int __init locomolcd_init(void)
|
||||
{
|
||||
return locomo_driver_register(&poodle_lcd_driver);
|
||||
|
@ -56,13 +56,15 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
|
||||
struct ceph_nfs_confh *cfh = (void *)rawfh;
|
||||
int connected_handle_length = sizeof(*cfh)/4;
|
||||
int handle_length = sizeof(*fh)/4;
|
||||
struct dentry *dentry = d_find_alias(inode);
|
||||
struct dentry *dentry;
|
||||
struct dentry *parent;
|
||||
|
||||
/* don't re-export snaps */
|
||||
if (ceph_snap(inode) != CEPH_NOSNAP)
|
||||
return -EINVAL;
|
||||
|
||||
dentry = d_find_alias(inode);
|
||||
|
||||
/* if we found an alias, generate a connectable fh */
|
||||
if (*max_len >= connected_handle_length && dentry) {
|
||||
dout("encode_fh %p connectable\n", dentry);
|
||||
|
@ -30,6 +30,7 @@ struct vm_area_struct;
|
||||
#define ___GFP_HARDWALL 0x20000u
|
||||
#define ___GFP_THISNODE 0x40000u
|
||||
#define ___GFP_RECLAIMABLE 0x80000u
|
||||
#define ___GFP_KMEMCG 0x100000u
|
||||
#define ___GFP_NOTRACK 0x200000u
|
||||
#define ___GFP_NO_KSWAPD 0x400000u
|
||||
#define ___GFP_OTHER_NODE 0x800000u
|
||||
@ -89,6 +90,7 @@ struct vm_area_struct;
|
||||
|
||||
#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
|
||||
#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
|
||||
#define __GFP_KMEMCG ((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
|
||||
#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
|
||||
|
||||
/*
|
||||
@ -365,6 +367,9 @@ extern void free_pages(unsigned long addr, unsigned int order);
|
||||
extern void free_hot_cold_page(struct page *page, int cold);
|
||||
extern void free_hot_cold_page_list(struct list_head *list, int cold);
|
||||
|
||||
extern void __free_memcg_kmem_pages(struct page *page, unsigned int order);
|
||||
extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order);
|
||||
|
||||
#define __free_page(page) __free_pages((page), 0)
|
||||
#define free_page(addr) free_pages((addr), 0)
|
||||
|
||||
|
@ -62,7 +62,7 @@ extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
|
||||
struct page *page);
|
||||
extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
|
||||
struct hugetlb_cgroup *h_cg);
|
||||
extern int hugetlb_cgroup_file_init(int idx) __init;
|
||||
extern void hugetlb_cgroup_file_init(void) __init;
|
||||
extern void hugetlb_cgroup_migrate(struct page *oldhpage,
|
||||
struct page *newhpage);
|
||||
|
||||
@ -111,9 +111,8 @@ hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
|
||||
return;
|
||||
}
|
||||
|
||||
static inline int __init hugetlb_cgroup_file_init(int idx)
|
||||
static inline void hugetlb_cgroup_file_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void hugetlb_cgroup_migrate(struct page *oldhpage,
|
||||
|
@ -21,11 +21,14 @@
|
||||
#define _LINUX_MEMCONTROL_H
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/vm_event_item.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/jump_label.h>
|
||||
|
||||
struct mem_cgroup;
|
||||
struct page_cgroup;
|
||||
struct page;
|
||||
struct mm_struct;
|
||||
struct kmem_cache;
|
||||
|
||||
/* Stats that can be updated by kernel. */
|
||||
enum mem_cgroup_page_stat_item {
|
||||
@ -414,5 +417,211 @@ static inline void sock_release_memcg(struct sock *sk)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */
|
||||
|
||||
#ifdef CONFIG_MEMCG_KMEM
|
||||
extern struct static_key memcg_kmem_enabled_key;
|
||||
|
||||
extern int memcg_limited_groups_array_size;
|
||||
|
||||
/*
|
||||
* Helper macro to loop through all memcg-specific caches. Callers must still
|
||||
* check if the cache is valid (it is either valid or NULL).
|
||||
* the slab_mutex must be held when looping through those caches
|
||||
*/
|
||||
#define for_each_memcg_cache_index(_idx) \
|
||||
for ((_idx) = 0; i < memcg_limited_groups_array_size; (_idx)++)
|
||||
|
||||
static inline bool memcg_kmem_enabled(void)
|
||||
{
|
||||
return static_key_false(&memcg_kmem_enabled_key);
|
||||
}
|
||||
|
||||
/*
|
||||
* In general, we'll do everything in our power to not incur in any overhead
|
||||
* for non-memcg users for the kmem functions. Not even a function call, if we
|
||||
* can avoid it.
|
||||
*
|
||||
* Therefore, we'll inline all those functions so that in the best case, we'll
|
||||
* see that kmemcg is off for everybody and proceed quickly. If it is on,
|
||||
* we'll still do most of the flag checking inline. We check a lot of
|
||||
* conditions, but because they are pretty simple, they are expected to be
|
||||
* fast.
|
||||
*/
|
||||
bool __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg,
|
||||
int order);
|
||||
void __memcg_kmem_commit_charge(struct page *page,
|
||||
struct mem_cgroup *memcg, int order);
|
||||
void __memcg_kmem_uncharge_pages(struct page *page, int order);
|
||||
|
||||
int memcg_cache_id(struct mem_cgroup *memcg);
|
||||
int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
|
||||
struct kmem_cache *root_cache);
|
||||
void memcg_release_cache(struct kmem_cache *cachep);
|
||||
void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep);
|
||||
|
||||
int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
|
||||
void memcg_update_array_size(int num_groups);
|
||||
|
||||
struct kmem_cache *
|
||||
__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
|
||||
|
||||
void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
|
||||
void kmem_cache_destroy_memcg_children(struct kmem_cache *s);
|
||||
|
||||
/**
|
||||
* memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
|
||||
* @gfp: the gfp allocation flags.
|
||||
* @memcg: a pointer to the memcg this was charged against.
|
||||
* @order: allocation order.
|
||||
*
|
||||
* returns true if the memcg where the current task belongs can hold this
|
||||
* allocation.
|
||||
*
|
||||
* We return true automatically if this allocation is not to be accounted to
|
||||
* any memcg.
|
||||
*/
|
||||
static inline bool
|
||||
memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
|
||||
{
|
||||
if (!memcg_kmem_enabled())
|
||||
return true;
|
||||
|
||||
/*
|
||||
* __GFP_NOFAIL allocations will move on even if charging is not
|
||||
* possible. Therefore we don't even try, and have this allocation
|
||||
* unaccounted. We could in theory charge it with
|
||||
* res_counter_charge_nofail, but we hope those allocations are rare,
|
||||
* and won't be worth the trouble.
|
||||
*/
|
||||
if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL))
|
||||
return true;
|
||||
if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
|
||||
return true;
|
||||
|
||||
/* If the test is dying, just let it go. */
|
||||
if (unlikely(fatal_signal_pending(current)))
|
||||
return true;
|
||||
|
||||
return __memcg_kmem_newpage_charge(gfp, memcg, order);
|
||||
}
|
||||
|
||||
/**
|
||||
* memcg_kmem_uncharge_pages: uncharge pages from memcg
|
||||
* @page: pointer to struct page being freed
|
||||
* @order: allocation order.
|
||||
*
|
||||
* there is no need to specify memcg here, since it is embedded in page_cgroup
|
||||
*/
|
||||
static inline void
|
||||
memcg_kmem_uncharge_pages(struct page *page, int order)
|
||||
{
|
||||
if (memcg_kmem_enabled())
|
||||
__memcg_kmem_uncharge_pages(page, order);
|
||||
}
|
||||
|
||||
/**
|
||||
* memcg_kmem_commit_charge: embeds correct memcg in a page
|
||||
* @page: pointer to struct page recently allocated
|
||||
* @memcg: the memcg structure we charged against
|
||||
* @order: allocation order.
|
||||
*
|
||||
* Needs to be called after memcg_kmem_newpage_charge, regardless of success or
|
||||
* failure of the allocation. if @page is NULL, this function will revert the
|
||||
* charges. Otherwise, it will commit the memcg given by @memcg to the
|
||||
* corresponding page_cgroup.
|
||||
*/
|
||||
static inline void
|
||||
memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
|
||||
{
|
||||
if (memcg_kmem_enabled() && memcg)
|
||||
__memcg_kmem_commit_charge(page, memcg, order);
|
||||
}
|
||||
|
||||
/**
|
||||
* memcg_kmem_get_cache: selects the correct per-memcg cache for allocation
|
||||
* @cachep: the original global kmem cache
|
||||
* @gfp: allocation flags.
|
||||
*
|
||||
* This function assumes that the task allocating, which determines the memcg
|
||||
* in the page allocator, belongs to the same cgroup throughout the whole
|
||||
* process. Misacounting can happen if the task calls memcg_kmem_get_cache()
|
||||
* while belonging to a cgroup, and later on changes. This is considered
|
||||
* acceptable, and should only happen upon task migration.
|
||||
*
|
||||
* Before the cache is created by the memcg core, there is also a possible
|
||||
* imbalance: the task belongs to a memcg, but the cache being allocated from
|
||||
* is the global cache, since the child cache is not yet guaranteed to be
|
||||
* ready. This case is also fine, since in this case the GFP_KMEMCG will not be
|
||||
* passed and the page allocator will not attempt any cgroup accounting.
|
||||
*/
|
||||
static __always_inline struct kmem_cache *
|
||||
memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
|
||||
{
|
||||
if (!memcg_kmem_enabled())
|
||||
return cachep;
|
||||
if (gfp & __GFP_NOFAIL)
|
||||
return cachep;
|
||||
if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
|
||||
return cachep;
|
||||
if (unlikely(fatal_signal_pending(current)))
|
||||
return cachep;
|
||||
|
||||
return __memcg_kmem_get_cache(cachep, gfp);
|
||||
}
|
||||
#else
|
||||
#define for_each_memcg_cache_index(_idx) \
|
||||
for (; NULL; )
|
||||
|
||||
static inline bool memcg_kmem_enabled(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void memcg_kmem_uncharge_pages(struct page *page, int order)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int memcg_cache_id(struct mem_cgroup *memcg)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline int
|
||||
memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
|
||||
struct kmem_cache *root_cache)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void memcg_release_cache(struct kmem_cache *cachep)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void memcg_cache_list_add(struct mem_cgroup *memcg,
|
||||
struct kmem_cache *s)
|
||||
{
|
||||
}
|
||||
|
||||
static inline struct kmem_cache *
|
||||
memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
|
||||
{
|
||||
return cachep;
|
||||
}
|
||||
|
||||
static inline void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_MEMCG_KMEM */
|
||||
#endif /* _LINUX_MEMCONTROL_H */
|
||||
|
||||
|
@ -125,14 +125,16 @@ int res_counter_charge_nofail(struct res_counter *counter,
|
||||
*
|
||||
* these calls check for usage underflow and show a warning on the console
|
||||
* _locked call expects the counter->lock to be taken
|
||||
*
|
||||
* returns the total charges still present in @counter.
|
||||
*/
|
||||
|
||||
void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
|
||||
void res_counter_uncharge(struct res_counter *counter, unsigned long val);
|
||||
u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
|
||||
u64 res_counter_uncharge(struct res_counter *counter, unsigned long val);
|
||||
|
||||
void res_counter_uncharge_until(struct res_counter *counter,
|
||||
struct res_counter *top,
|
||||
unsigned long val);
|
||||
u64 res_counter_uncharge_until(struct res_counter *counter,
|
||||
struct res_counter *top,
|
||||
unsigned long val);
|
||||
/**
|
||||
* res_counter_margin - calculate chargeable space of a counter
|
||||
* @cnt: the counter
|
||||
|
@ -1597,6 +1597,7 @@ struct task_struct {
|
||||
unsigned long nr_pages; /* uncharged usage */
|
||||
unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
|
||||
} memcg_batch;
|
||||
unsigned int memcg_kmem_skip_account;
|
||||
#endif
|
||||
#ifdef CONFIG_HAVE_HW_BREAKPOINT
|
||||
atomic_t ptrace_bp_refcnt;
|
||||
|
@ -11,6 +11,8 @@
|
||||
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
|
||||
/*
|
||||
* Flags to pass to kmem_cache_create().
|
||||
@ -116,6 +118,7 @@ struct kmem_cache {
|
||||
};
|
||||
#endif
|
||||
|
||||
struct mem_cgroup;
|
||||
/*
|
||||
* struct kmem_cache related prototypes
|
||||
*/
|
||||
@ -125,6 +128,9 @@ int slab_is_available(void);
|
||||
struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
|
||||
unsigned long,
|
||||
void (*)(void *));
|
||||
struct kmem_cache *
|
||||
kmem_cache_create_memcg(struct mem_cgroup *, const char *, size_t, size_t,
|
||||
unsigned long, void (*)(void *), struct kmem_cache *);
|
||||
void kmem_cache_destroy(struct kmem_cache *);
|
||||
int kmem_cache_shrink(struct kmem_cache *);
|
||||
void kmem_cache_free(struct kmem_cache *, void *);
|
||||
@ -175,6 +181,48 @@ void kmem_cache_free(struct kmem_cache *, void *);
|
||||
#ifndef ARCH_SLAB_MINALIGN
|
||||
#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
|
||||
#endif
|
||||
/*
|
||||
* This is the main placeholder for memcg-related information in kmem caches.
|
||||
* struct kmem_cache will hold a pointer to it, so the memory cost while
|
||||
* disabled is 1 pointer. The runtime cost while enabled, gets bigger than it
|
||||
* would otherwise be if that would be bundled in kmem_cache: we'll need an
|
||||
* extra pointer chase. But the trade off clearly lays in favor of not
|
||||
* penalizing non-users.
|
||||
*
|
||||
* Both the root cache and the child caches will have it. For the root cache,
|
||||
* this will hold a dynamically allocated array large enough to hold
|
||||
* information about the currently limited memcgs in the system.
|
||||
*
|
||||
* Child caches will hold extra metadata needed for its operation. Fields are:
|
||||
*
|
||||
* @memcg: pointer to the memcg this cache belongs to
|
||||
* @list: list_head for the list of all caches in this memcg
|
||||
* @root_cache: pointer to the global, root cache, this cache was derived from
|
||||
* @dead: set to true after the memcg dies; the cache may still be around.
|
||||
* @nr_pages: number of pages that belongs to this cache.
|
||||
* @destroy: worker to be called whenever we are ready, or believe we may be
|
||||
* ready, to destroy this cache.
|
||||
*/
|
||||
struct memcg_cache_params {
|
||||
bool is_root_cache;
|
||||
union {
|
||||
struct kmem_cache *memcg_caches[0];
|
||||
struct {
|
||||
struct mem_cgroup *memcg;
|
||||
struct list_head list;
|
||||
struct kmem_cache *root_cache;
|
||||
bool dead;
|
||||
atomic_t nr_pages;
|
||||
struct work_struct destroy;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
int memcg_update_all_caches(int num_memcgs);
|
||||
|
||||
struct seq_file;
|
||||
int cache_show(struct kmem_cache *s, struct seq_file *m);
|
||||
void print_slabinfo_header(struct seq_file *m);
|
||||
|
||||
/*
|
||||
* Common kmalloc functions provided by all allocators
|
||||
|
@ -81,6 +81,9 @@ struct kmem_cache {
|
||||
*/
|
||||
int obj_offset;
|
||||
#endif /* CONFIG_DEBUG_SLAB */
|
||||
#ifdef CONFIG_MEMCG_KMEM
|
||||
struct memcg_cache_params *memcg_params;
|
||||
#endif
|
||||
|
||||
/* 6) per-cpu/per-node data, touched during every alloc/free */
|
||||
/*
|
||||
|
@ -101,6 +101,10 @@ struct kmem_cache {
|
||||
#ifdef CONFIG_SYSFS
|
||||
struct kobject kobj; /* For sysfs */
|
||||
#endif
|
||||
#ifdef CONFIG_MEMCG_KMEM
|
||||
struct memcg_cache_params *memcg_params;
|
||||
int max_attr_size; /* for propagation, maximum size of a stored attr */
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
/*
|
||||
@ -222,7 +226,10 @@ void *__kmalloc(size_t size, gfp_t flags);
|
||||
static __always_inline void *
|
||||
kmalloc_order(size_t size, gfp_t flags, unsigned int order)
|
||||
{
|
||||
void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
|
||||
void *ret;
|
||||
|
||||
flags |= (__GFP_COMP | __GFP_KMEMCG);
|
||||
ret = (void *) __get_free_pages(flags, order);
|
||||
kmemleak_alloc(ret, size, 1, flags);
|
||||
return ret;
|
||||
}
|
||||
|
@ -61,6 +61,8 @@ extern long do_no_restart_syscall(struct restart_block *parm);
|
||||
# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK)
|
||||
#endif
|
||||
|
||||
#define THREADINFO_GFP_ACCOUNTED (THREADINFO_GFP | __GFP_KMEMCG)
|
||||
|
||||
/*
|
||||
* flag set/clear/test wrappers
|
||||
* - pass TIF_xxxx constants to these functions
|
||||
|
@ -34,6 +34,7 @@
|
||||
{(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \
|
||||
{(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \
|
||||
{(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \
|
||||
{(unsigned long)__GFP_KMEMCG, "GFP_KMEMCG"}, \
|
||||
{(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \
|
||||
{(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \
|
||||
{(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \
|
||||
|
@ -882,7 +882,7 @@ config MEMCG_SWAP_ENABLED
|
||||
config MEMCG_KMEM
|
||||
bool "Memory Resource Controller Kernel Memory accounting (EXPERIMENTAL)"
|
||||
depends on MEMCG && EXPERIMENTAL
|
||||
default n
|
||||
depends on SLUB || SLAB
|
||||
help
|
||||
The Kernel Memory extension for Memory Resource Controller can limit
|
||||
the amount of memory used by kernel objects in the system. Those are
|
||||
|
@ -146,7 +146,7 @@ void __weak arch_release_thread_info(struct thread_info *ti)
|
||||
static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
|
||||
int node)
|
||||
{
|
||||
struct page *page = alloc_pages_node(node, THREADINFO_GFP,
|
||||
struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED,
|
||||
THREAD_SIZE_ORDER);
|
||||
|
||||
return page ? page_address(page) : NULL;
|
||||
@ -154,7 +154,7 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
|
||||
|
||||
static inline void free_thread_info(struct thread_info *ti)
|
||||
{
|
||||
free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
|
||||
free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
|
||||
}
|
||||
# else
|
||||
static struct kmem_cache *thread_info_cache;
|
||||
|
@ -818,7 +818,7 @@ static void irq_thread_dtor(struct callback_head *unused)
|
||||
action = kthread_data(tsk);
|
||||
|
||||
pr_err("exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
|
||||
tsk->comm ? tsk->comm : "", tsk->pid, action->irq);
|
||||
tsk->comm, tsk->pid, action->irq);
|
||||
|
||||
|
||||
desc = irq_to_desc(action->irq);
|
||||
|
@ -86,33 +86,39 @@ int res_counter_charge_nofail(struct res_counter *counter, unsigned long val,
|
||||
return __res_counter_charge(counter, val, limit_fail_at, true);
|
||||
}
|
||||
|
||||
void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
|
||||
u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
|
||||
{
|
||||
if (WARN_ON(counter->usage < val))
|
||||
val = counter->usage;
|
||||
|
||||
counter->usage -= val;
|
||||
return counter->usage;
|
||||
}
|
||||
|
||||
void res_counter_uncharge_until(struct res_counter *counter,
|
||||
struct res_counter *top,
|
||||
unsigned long val)
|
||||
u64 res_counter_uncharge_until(struct res_counter *counter,
|
||||
struct res_counter *top,
|
||||
unsigned long val)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct res_counter *c;
|
||||
u64 ret = 0;
|
||||
|
||||
local_irq_save(flags);
|
||||
for (c = counter; c != top; c = c->parent) {
|
||||
u64 r;
|
||||
spin_lock(&c->lock);
|
||||
res_counter_uncharge_locked(c, val);
|
||||
r = res_counter_uncharge_locked(c, val);
|
||||
if (c == counter)
|
||||
ret = r;
|
||||
spin_unlock(&c->lock);
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void res_counter_uncharge(struct res_counter *counter, unsigned long val)
|
||||
u64 res_counter_uncharge(struct res_counter *counter, unsigned long val)
|
||||
{
|
||||
res_counter_uncharge_until(counter, NULL, val);
|
||||
return res_counter_uncharge_until(counter, NULL, val);
|
||||
}
|
||||
|
||||
static inline unsigned long long *
|
||||
|
13
mm/Kconfig
13
mm/Kconfig
@ -149,7 +149,18 @@ config MOVABLE_NODE
|
||||
depends on NO_BOOTMEM
|
||||
depends on X86_64
|
||||
depends on NUMA
|
||||
depends on BROKEN
|
||||
default n
|
||||
help
|
||||
Allow a node to have only movable memory. Pages used by the kernel,
|
||||
such as direct mapping pages cannot be migrated. So the corresponding
|
||||
memory device cannot be hotplugged. This option allows users to
|
||||
online all the memory of a node as movable memory so that the whole
|
||||
node can be hotplugged. Users who don't use the memory hotplug
|
||||
feature are fine with this option on since they don't online memory
|
||||
as movable.
|
||||
|
||||
Say Y here if you want to hotplug a whole node.
|
||||
Say N here if you want kernel to use memory on all nodes evenly.
|
||||
|
||||
# eventually, we can have this option just 'select SPARSEMEM'
|
||||
config MEMORY_HOTPLUG
|
||||
|
11
mm/hugetlb.c
11
mm/hugetlb.c
@ -1906,14 +1906,12 @@ static int __init hugetlb_init(void)
|
||||
default_hstate.max_huge_pages = default_hstate_max_huge_pages;
|
||||
|
||||
hugetlb_init_hstates();
|
||||
|
||||
gather_bootmem_prealloc();
|
||||
|
||||
report_hugepages();
|
||||
|
||||
hugetlb_sysfs_init();
|
||||
|
||||
hugetlb_register_all_nodes();
|
||||
hugetlb_cgroup_file_init();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1943,13 +1941,6 @@ void __init hugetlb_add_hstate(unsigned order)
|
||||
h->next_nid_to_free = first_node(node_states[N_MEMORY]);
|
||||
snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
|
||||
huge_page_size(h)/1024);
|
||||
/*
|
||||
* Add cgroup control files only if the huge page consists
|
||||
* of more than two normal pages. This is because we use
|
||||
* page[2].lru.next for storing cgoup details.
|
||||
*/
|
||||
if (order >= HUGETLB_CGROUP_MIN_ORDER)
|
||||
hugetlb_cgroup_file_init(hugetlb_max_hstate - 1);
|
||||
|
||||
parsed_hstate = h;
|
||||
}
|
||||
|
@ -333,7 +333,7 @@ static char *mem_fmt(char *buf, int size, unsigned long hsize)
|
||||
return buf;
|
||||
}
|
||||
|
||||
int __init hugetlb_cgroup_file_init(int idx)
|
||||
static void __init __hugetlb_cgroup_file_init(int idx)
|
||||
{
|
||||
char buf[32];
|
||||
struct cftype *cft;
|
||||
@ -375,7 +375,22 @@ int __init hugetlb_cgroup_file_init(int idx)
|
||||
|
||||
WARN_ON(cgroup_add_cftypes(&hugetlb_subsys, h->cgroup_files));
|
||||
|
||||
return 0;
|
||||
return;
|
||||
}
|
||||
|
||||
void __init hugetlb_cgroup_file_init(void)
|
||||
{
|
||||
struct hstate *h;
|
||||
|
||||
for_each_hstate(h) {
|
||||
/*
|
||||
* Add cgroup control files only if the huge page consists
|
||||
* of more than two normal pages. This is because we use
|
||||
* page[2].lru.next for storing cgroup details.
|
||||
*/
|
||||
if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER)
|
||||
__hugetlb_cgroup_file_init(hstate_index(h));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1556,7 +1556,8 @@ static int dump_str_object_info(const char *str)
|
||||
struct kmemleak_object *object;
|
||||
unsigned long addr;
|
||||
|
||||
addr= simple_strtoul(str, NULL, 0);
|
||||
if (kstrtoul(str, 0, &addr))
|
||||
return -EINVAL;
|
||||
object = find_and_get_object(addr, 0);
|
||||
if (!object) {
|
||||
pr_info("Unknown object at 0x%08lx\n", addr);
|
||||
|
1256
mm/memcontrol.c
1256
mm/memcontrol.c
File diff suppressed because it is too large
Load Diff
@ -590,18 +590,21 @@ static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MOVABLE_NODE
|
||||
/* when CONFIG_MOVABLE_NODE, we allow online node don't have normal memory */
|
||||
/*
|
||||
* When CONFIG_MOVABLE_NODE, we permit onlining of a node which doesn't have
|
||||
* normal memory.
|
||||
*/
|
||||
static bool can_online_high_movable(struct zone *zone)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#else /* #ifdef CONFIG_MOVABLE_NODE */
|
||||
#else /* CONFIG_MOVABLE_NODE */
|
||||
/* ensure every online node has NORMAL memory */
|
||||
static bool can_online_high_movable(struct zone *zone)
|
||||
{
|
||||
return node_state(zone_to_nid(zone), N_NORMAL_MEMORY);
|
||||
}
|
||||
#endif /* #ifdef CONFIG_MOVABLE_NODE */
|
||||
#endif /* CONFIG_MOVABLE_NODE */
|
||||
|
||||
/* check which state of node_states will be changed when online memory */
|
||||
static void node_states_check_changes_online(unsigned long nr_pages,
|
||||
@ -1112,12 +1115,15 @@ check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MOVABLE_NODE
|
||||
/* when CONFIG_MOVABLE_NODE, we allow online node don't have normal memory */
|
||||
/*
|
||||
* When CONFIG_MOVABLE_NODE, we permit offlining of a node which doesn't have
|
||||
* normal memory.
|
||||
*/
|
||||
static bool can_offline_normal(struct zone *zone, unsigned long nr_pages)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#else /* #ifdef CONFIG_MOVABLE_NODE */
|
||||
#else /* CONFIG_MOVABLE_NODE */
|
||||
/* ensure the node has NORMAL memory if it is still online */
|
||||
static bool can_offline_normal(struct zone *zone, unsigned long nr_pages)
|
||||
{
|
||||
@ -1141,7 +1147,7 @@ static bool can_offline_normal(struct zone *zone, unsigned long nr_pages)
|
||||
*/
|
||||
return present_pages == 0;
|
||||
}
|
||||
#endif /* #ifdef CONFIG_MOVABLE_NODE */
|
||||
#endif /* CONFIG_MOVABLE_NODE */
|
||||
|
||||
/* check which state of node_states will be changed when offline memory */
|
||||
static void node_states_check_changes_offline(unsigned long nr_pages,
|
||||
|
@ -114,7 +114,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
|
||||
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
|
||||
pmd_t *pmd)
|
||||
pmd_t *pmd)
|
||||
{
|
||||
spin_lock(&mm->page_table_lock);
|
||||
set_pmd_at(mm, addr & PMD_MASK, pmd, pmd_mknuma(*pmd));
|
||||
@ -122,15 +122,15 @@ static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
|
||||
}
|
||||
#else
|
||||
static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
|
||||
pmd_t *pmd)
|
||||
pmd_t *pmd)
|
||||
{
|
||||
BUG();
|
||||
}
|
||||
#endif /* CONFIG_NUMA_BALANCING */
|
||||
|
||||
static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud,
|
||||
unsigned long addr, unsigned long end, pgprot_t newprot,
|
||||
int dirty_accountable, int prot_numa)
|
||||
static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
|
||||
pud_t *pud, unsigned long addr, unsigned long end,
|
||||
pgprot_t newprot, int dirty_accountable, int prot_numa)
|
||||
{
|
||||
pmd_t *pmd;
|
||||
unsigned long next;
|
||||
@ -143,7 +143,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *
|
||||
if (pmd_trans_huge(*pmd)) {
|
||||
if (next - addr != HPAGE_PMD_SIZE)
|
||||
split_huge_page_pmd(vma, addr, pmd);
|
||||
else if (change_huge_pmd(vma, pmd, addr, newprot, prot_numa)) {
|
||||
else if (change_huge_pmd(vma, pmd, addr, newprot,
|
||||
prot_numa)) {
|
||||
pages += HPAGE_PMD_NR;
|
||||
continue;
|
||||
}
|
||||
@ -167,9 +168,9 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *
|
||||
return pages;
|
||||
}
|
||||
|
||||
static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
|
||||
unsigned long addr, unsigned long end, pgprot_t newprot,
|
||||
int dirty_accountable, int prot_numa)
|
||||
static inline unsigned long change_pud_range(struct vm_area_struct *vma,
|
||||
pgd_t *pgd, unsigned long addr, unsigned long end,
|
||||
pgprot_t newprot, int dirty_accountable, int prot_numa)
|
||||
{
|
||||
pud_t *pud;
|
||||
unsigned long next;
|
||||
@ -304,7 +305,8 @@ success:
|
||||
dirty_accountable = 1;
|
||||
}
|
||||
|
||||
change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable, 0);
|
||||
change_protection(vma, start, end, vma->vm_page_prot,
|
||||
dirty_accountable, 0);
|
||||
|
||||
vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
|
||||
vm_stat_account(mm, newflags, vma->vm_file, nrpages);
|
||||
@ -361,8 +363,7 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
|
||||
error = -EINVAL;
|
||||
if (!(vma->vm_flags & VM_GROWSDOWN))
|
||||
goto out;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
if (vma->vm_start > start)
|
||||
goto out;
|
||||
if (unlikely(grows & PROT_GROWSUP)) {
|
||||
@ -378,9 +379,10 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
|
||||
for (nstart = start ; ; ) {
|
||||
unsigned long newflags;
|
||||
|
||||
/* Here we know that vma->vm_start <= nstart < vma->vm_end. */
|
||||
/* Here we know that vma->vm_start <= nstart < vma->vm_end. */
|
||||
|
||||
newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
|
||||
newflags = vm_flags;
|
||||
newflags |= (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
|
||||
|
||||
/* newflags >> 4 shift VM_MAY% in place of VM_% */
|
||||
if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) {
|
||||
|
@ -371,8 +371,7 @@ static int destroy_compound_page(struct page *page, unsigned long order)
|
||||
int nr_pages = 1 << order;
|
||||
int bad = 0;
|
||||
|
||||
if (unlikely(compound_order(page) != order) ||
|
||||
unlikely(!PageHead(page))) {
|
||||
if (unlikely(compound_order(page) != order)) {
|
||||
bad_page(page);
|
||||
bad++;
|
||||
}
|
||||
@ -2613,6 +2612,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
|
||||
int migratetype = allocflags_to_migratetype(gfp_mask);
|
||||
unsigned int cpuset_mems_cookie;
|
||||
int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET;
|
||||
struct mem_cgroup *memcg = NULL;
|
||||
|
||||
gfp_mask &= gfp_allowed_mask;
|
||||
|
||||
@ -2631,6 +2631,13 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
|
||||
if (unlikely(!zonelist->_zonerefs->zone))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Will only have any effect when __GFP_KMEMCG is set. This is
|
||||
* verified in the (always inline) callee
|
||||
*/
|
||||
if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
|
||||
return NULL;
|
||||
|
||||
retry_cpuset:
|
||||
cpuset_mems_cookie = get_mems_allowed();
|
||||
|
||||
@ -2666,6 +2673,8 @@ out:
|
||||
if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
|
||||
goto retry_cpuset;
|
||||
|
||||
memcg_kmem_commit_charge(page, memcg, order);
|
||||
|
||||
return page;
|
||||
}
|
||||
EXPORT_SYMBOL(__alloc_pages_nodemask);
|
||||
@ -2718,6 +2727,31 @@ void free_pages(unsigned long addr, unsigned int order)
|
||||
|
||||
EXPORT_SYMBOL(free_pages);
|
||||
|
||||
/*
|
||||
* __free_memcg_kmem_pages and free_memcg_kmem_pages will free
|
||||
* pages allocated with __GFP_KMEMCG.
|
||||
*
|
||||
* Those pages are accounted to a particular memcg, embedded in the
|
||||
* corresponding page_cgroup. To avoid adding a hit in the allocator to search
|
||||
* for that information only to find out that it is NULL for users who have no
|
||||
* interest in that whatsoever, we provide these functions.
|
||||
*
|
||||
* The caller knows better which flags it relies on.
|
||||
*/
|
||||
void __free_memcg_kmem_pages(struct page *page, unsigned int order)
|
||||
{
|
||||
memcg_kmem_uncharge_pages(page, order);
|
||||
__free_pages(page, order);
|
||||
}
|
||||
|
||||
void free_memcg_kmem_pages(unsigned long addr, unsigned int order)
|
||||
{
|
||||
if (addr != 0) {
|
||||
VM_BUG_ON(!virt_addr_valid((void *)addr));
|
||||
__free_memcg_kmem_pages(virt_to_page((void *)addr), order);
|
||||
}
|
||||
}
|
||||
|
||||
static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size)
|
||||
{
|
||||
if (addr) {
|
||||
|
94
mm/slab.c
94
mm/slab.c
@ -87,7 +87,6 @@
|
||||
*/
|
||||
|
||||
#include <linux/slab.h>
|
||||
#include "slab.h"
|
||||
#include <linux/mm.h>
|
||||
#include <linux/poison.h>
|
||||
#include <linux/swap.h>
|
||||
@ -128,6 +127,8 @@
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
#include "slab.h"
|
||||
|
||||
/*
|
||||
* DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
|
||||
* 0 for faster, smaller code (especially in the critical paths).
|
||||
@ -641,6 +642,26 @@ static void init_node_lock_keys(int q)
|
||||
}
|
||||
}
|
||||
|
||||
static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q)
|
||||
{
|
||||
struct kmem_list3 *l3;
|
||||
l3 = cachep->nodelists[q];
|
||||
if (!l3)
|
||||
return;
|
||||
|
||||
slab_set_lock_classes(cachep, &on_slab_l3_key,
|
||||
&on_slab_alc_key, q);
|
||||
}
|
||||
|
||||
static inline void on_slab_lock_classes(struct kmem_cache *cachep)
|
||||
{
|
||||
int node;
|
||||
|
||||
VM_BUG_ON(OFF_SLAB(cachep));
|
||||
for_each_node(node)
|
||||
on_slab_lock_classes_node(cachep, node);
|
||||
}
|
||||
|
||||
static inline void init_lock_keys(void)
|
||||
{
|
||||
int node;
|
||||
@ -657,6 +678,14 @@ static inline void init_lock_keys(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void on_slab_lock_classes(struct kmem_cache *cachep)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void on_slab_lock_classes_node(struct kmem_cache *cachep, int node)
|
||||
{
|
||||
}
|
||||
|
||||
static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
|
||||
{
|
||||
}
|
||||
@ -1385,6 +1414,9 @@ static int __cpuinit cpuup_prepare(long cpu)
|
||||
free_alien_cache(alien);
|
||||
if (cachep->flags & SLAB_DEBUG_OBJECTS)
|
||||
slab_set_debugobj_lock_classes_node(cachep, node);
|
||||
else if (!OFF_SLAB(cachep) &&
|
||||
!(cachep->flags & SLAB_DESTROY_BY_RCU))
|
||||
on_slab_lock_classes_node(cachep, node);
|
||||
}
|
||||
init_node_lock_keys(node);
|
||||
|
||||
@ -1863,6 +1895,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
|
||||
if (page->pfmemalloc)
|
||||
SetPageSlabPfmemalloc(page + i);
|
||||
}
|
||||
memcg_bind_pages(cachep, cachep->gfporder);
|
||||
|
||||
if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
|
||||
kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
|
||||
@ -1899,9 +1932,11 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
|
||||
__ClearPageSlab(page);
|
||||
page++;
|
||||
}
|
||||
|
||||
memcg_release_pages(cachep, cachep->gfporder);
|
||||
if (current->reclaim_state)
|
||||
current->reclaim_state->reclaimed_slab += nr_freed;
|
||||
free_pages((unsigned long)addr, cachep->gfporder);
|
||||
free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder);
|
||||
}
|
||||
|
||||
static void kmem_rcu_free(struct rcu_head *head)
|
||||
@ -2489,7 +2524,8 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
|
||||
WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU);
|
||||
|
||||
slab_set_debugobj_lock_classes(cachep);
|
||||
}
|
||||
} else if (!OFF_SLAB(cachep) && !(flags & SLAB_DESTROY_BY_RCU))
|
||||
on_slab_lock_classes(cachep);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -3453,6 +3489,8 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
|
||||
if (slab_should_failslab(cachep, flags))
|
||||
return NULL;
|
||||
|
||||
cachep = memcg_kmem_get_cache(cachep, flags);
|
||||
|
||||
cache_alloc_debugcheck_before(cachep, flags);
|
||||
local_irq_save(save_flags);
|
||||
|
||||
@ -3538,6 +3576,8 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
|
||||
if (slab_should_failslab(cachep, flags))
|
||||
return NULL;
|
||||
|
||||
cachep = memcg_kmem_get_cache(cachep, flags);
|
||||
|
||||
cache_alloc_debugcheck_before(cachep, flags);
|
||||
local_irq_save(save_flags);
|
||||
objp = __do_cache_alloc(cachep, flags);
|
||||
@ -3851,6 +3891,9 @@ EXPORT_SYMBOL(__kmalloc);
|
||||
void kmem_cache_free(struct kmem_cache *cachep, void *objp)
|
||||
{
|
||||
unsigned long flags;
|
||||
cachep = cache_from_obj(cachep, objp);
|
||||
if (!cachep)
|
||||
return;
|
||||
|
||||
local_irq_save(flags);
|
||||
debug_check_no_locks_freed(objp, cachep->object_size);
|
||||
@ -3998,7 +4041,7 @@ static void do_ccupdate_local(void *info)
|
||||
}
|
||||
|
||||
/* Always called with the slab_mutex held */
|
||||
static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
|
||||
static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
|
||||
int batchcount, int shared, gfp_t gfp)
|
||||
{
|
||||
struct ccupdate_struct *new;
|
||||
@ -4041,12 +4084,49 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
|
||||
return alloc_kmemlist(cachep, gfp);
|
||||
}
|
||||
|
||||
static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
|
||||
int batchcount, int shared, gfp_t gfp)
|
||||
{
|
||||
int ret;
|
||||
struct kmem_cache *c = NULL;
|
||||
int i = 0;
|
||||
|
||||
ret = __do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
|
||||
|
||||
if (slab_state < FULL)
|
||||
return ret;
|
||||
|
||||
if ((ret < 0) || !is_root_cache(cachep))
|
||||
return ret;
|
||||
|
||||
VM_BUG_ON(!mutex_is_locked(&slab_mutex));
|
||||
for_each_memcg_cache_index(i) {
|
||||
c = cache_from_memcg(cachep, i);
|
||||
if (c)
|
||||
/* return value determined by the parent cache only */
|
||||
__do_tune_cpucache(c, limit, batchcount, shared, gfp);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Called with slab_mutex held always */
|
||||
static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
|
||||
{
|
||||
int err;
|
||||
int limit, shared;
|
||||
int limit = 0;
|
||||
int shared = 0;
|
||||
int batchcount = 0;
|
||||
|
||||
if (!is_root_cache(cachep)) {
|
||||
struct kmem_cache *root = memcg_root_cache(cachep);
|
||||
limit = root->limit;
|
||||
shared = root->shared;
|
||||
batchcount = root->batchcount;
|
||||
}
|
||||
|
||||
if (limit && shared && batchcount)
|
||||
goto skip_setup;
|
||||
/*
|
||||
* The head array serves three purposes:
|
||||
* - create a LIFO ordering, i.e. return objects that are cache-warm
|
||||
@ -4088,7 +4168,9 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
|
||||
if (limit > 32)
|
||||
limit = 32;
|
||||
#endif
|
||||
err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);
|
||||
batchcount = (limit + 1) / 2;
|
||||
skip_setup:
|
||||
err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
|
||||
if (err)
|
||||
printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
|
||||
cachep->name, -err);
|
||||
|
137
mm/slab.h
137
mm/slab.h
@ -43,12 +43,15 @@ extern struct kmem_cache *create_kmalloc_cache(const char *name, size_t size,
|
||||
extern void create_boot_cache(struct kmem_cache *, const char *name,
|
||||
size_t size, unsigned long flags);
|
||||
|
||||
struct mem_cgroup;
|
||||
#ifdef CONFIG_SLUB
|
||||
struct kmem_cache *__kmem_cache_alias(const char *name, size_t size,
|
||||
size_t align, unsigned long flags, void (*ctor)(void *));
|
||||
struct kmem_cache *
|
||||
__kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size,
|
||||
size_t align, unsigned long flags, void (*ctor)(void *));
|
||||
#else
|
||||
static inline struct kmem_cache *__kmem_cache_alias(const char *name, size_t size,
|
||||
size_t align, unsigned long flags, void (*ctor)(void *))
|
||||
static inline struct kmem_cache *
|
||||
__kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size,
|
||||
size_t align, unsigned long flags, void (*ctor)(void *))
|
||||
{ return NULL; }
|
||||
#endif
|
||||
|
||||
@ -100,4 +103,130 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo);
|
||||
void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s);
|
||||
ssize_t slabinfo_write(struct file *file, const char __user *buffer,
|
||||
size_t count, loff_t *ppos);
|
||||
|
||||
#ifdef CONFIG_MEMCG_KMEM
|
||||
static inline bool is_root_cache(struct kmem_cache *s)
|
||||
{
|
||||
return !s->memcg_params || s->memcg_params->is_root_cache;
|
||||
}
|
||||
|
||||
static inline bool cache_match_memcg(struct kmem_cache *cachep,
|
||||
struct mem_cgroup *memcg)
|
||||
{
|
||||
return (is_root_cache(cachep) && !memcg) ||
|
||||
(cachep->memcg_params->memcg == memcg);
|
||||
}
|
||||
|
||||
static inline void memcg_bind_pages(struct kmem_cache *s, int order)
|
||||
{
|
||||
if (!is_root_cache(s))
|
||||
atomic_add(1 << order, &s->memcg_params->nr_pages);
|
||||
}
|
||||
|
||||
static inline void memcg_release_pages(struct kmem_cache *s, int order)
|
||||
{
|
||||
if (is_root_cache(s))
|
||||
return;
|
||||
|
||||
if (atomic_sub_and_test((1 << order), &s->memcg_params->nr_pages))
|
||||
mem_cgroup_destroy_cache(s);
|
||||
}
|
||||
|
||||
static inline bool slab_equal_or_root(struct kmem_cache *s,
|
||||
struct kmem_cache *p)
|
||||
{
|
||||
return (p == s) ||
|
||||
(s->memcg_params && (p == s->memcg_params->root_cache));
|
||||
}
|
||||
|
||||
/*
|
||||
* We use suffixes to the name in memcg because we can't have caches
|
||||
* created in the system with the same name. But when we print them
|
||||
* locally, better refer to them with the base name
|
||||
*/
|
||||
static inline const char *cache_name(struct kmem_cache *s)
|
||||
{
|
||||
if (!is_root_cache(s))
|
||||
return s->memcg_params->root_cache->name;
|
||||
return s->name;
|
||||
}
|
||||
|
||||
static inline struct kmem_cache *cache_from_memcg(struct kmem_cache *s, int idx)
|
||||
{
|
||||
return s->memcg_params->memcg_caches[idx];
|
||||
}
|
||||
|
||||
static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
|
||||
{
|
||||
if (is_root_cache(s))
|
||||
return s;
|
||||
return s->memcg_params->root_cache;
|
||||
}
|
||||
#else
|
||||
static inline bool is_root_cache(struct kmem_cache *s)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool cache_match_memcg(struct kmem_cache *cachep,
|
||||
struct mem_cgroup *memcg)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void memcg_bind_pages(struct kmem_cache *s, int order)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void memcg_release_pages(struct kmem_cache *s, int order)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool slab_equal_or_root(struct kmem_cache *s,
|
||||
struct kmem_cache *p)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline const char *cache_name(struct kmem_cache *s)
|
||||
{
|
||||
return s->name;
|
||||
}
|
||||
|
||||
static inline struct kmem_cache *cache_from_memcg(struct kmem_cache *s, int idx)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
|
||||
{
|
||||
return s;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
|
||||
{
|
||||
struct kmem_cache *cachep;
|
||||
struct page *page;
|
||||
|
||||
/*
|
||||
* When kmemcg is not being used, both assignments should return the
|
||||
* same value. but we don't want to pay the assignment price in that
|
||||
* case. If it is not compiled in, the compiler should be smart enough
|
||||
* to not do even the assignment. In that case, slab_equal_or_root
|
||||
* will also be a constant.
|
||||
*/
|
||||
if (!memcg_kmem_enabled() && !unlikely(s->flags & SLAB_DEBUG_FREE))
|
||||
return s;
|
||||
|
||||
page = virt_to_head_page(x);
|
||||
cachep = page->slab_cache;
|
||||
if (slab_equal_or_root(cachep, s))
|
||||
return cachep;
|
||||
|
||||
pr_err("%s: Wrong slab cache. %s but object is from %s\n",
|
||||
__FUNCTION__, cachep->name, s->name);
|
||||
WARN_ON_ONCE(1);
|
||||
return s;
|
||||
}
|
||||
#endif
|
||||
|
118
mm/slab_common.c
118
mm/slab_common.c
@ -18,6 +18,7 @@
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/page.h>
|
||||
#include <linux/memcontrol.h>
|
||||
|
||||
#include "slab.h"
|
||||
|
||||
@ -27,7 +28,8 @@ DEFINE_MUTEX(slab_mutex);
|
||||
struct kmem_cache *kmem_cache;
|
||||
|
||||
#ifdef CONFIG_DEBUG_VM
|
||||
static int kmem_cache_sanity_check(const char *name, size_t size)
|
||||
static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name,
|
||||
size_t size)
|
||||
{
|
||||
struct kmem_cache *s = NULL;
|
||||
|
||||
@ -53,7 +55,13 @@ static int kmem_cache_sanity_check(const char *name, size_t size)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(s->name, name)) {
|
||||
/*
|
||||
* For simplicity, we won't check this in the list of memcg
|
||||
* caches. We have control over memcg naming, and if there
|
||||
* aren't duplicates in the global list, there won't be any
|
||||
* duplicates in the memcg lists as well.
|
||||
*/
|
||||
if (!memcg && !strcmp(s->name, name)) {
|
||||
pr_err("%s (%s): Cache name already exists.\n",
|
||||
__func__, name);
|
||||
dump_stack();
|
||||
@ -66,12 +74,41 @@ static int kmem_cache_sanity_check(const char *name, size_t size)
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
static inline int kmem_cache_sanity_check(const char *name, size_t size)
|
||||
static inline int kmem_cache_sanity_check(struct mem_cgroup *memcg,
|
||||
const char *name, size_t size)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_MEMCG_KMEM
|
||||
int memcg_update_all_caches(int num_memcgs)
|
||||
{
|
||||
struct kmem_cache *s;
|
||||
int ret = 0;
|
||||
mutex_lock(&slab_mutex);
|
||||
|
||||
list_for_each_entry(s, &slab_caches, list) {
|
||||
if (!is_root_cache(s))
|
||||
continue;
|
||||
|
||||
ret = memcg_update_cache_size(s, num_memcgs);
|
||||
/*
|
||||
* See comment in memcontrol.c, memcg_update_cache_size:
|
||||
* Instead of freeing the memory, we'll just leave the caches
|
||||
* up to this point in an updated state.
|
||||
*/
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
memcg_update_array_size(num_memcgs);
|
||||
out:
|
||||
mutex_unlock(&slab_mutex);
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Figure out what the alignment of the objects will be given a set of
|
||||
* flags, a user specified alignment and the size of the objects.
|
||||
@ -125,8 +162,10 @@ unsigned long calculate_alignment(unsigned long flags,
|
||||
* as davem.
|
||||
*/
|
||||
|
||||
struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align,
|
||||
unsigned long flags, void (*ctor)(void *))
|
||||
struct kmem_cache *
|
||||
kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size,
|
||||
size_t align, unsigned long flags, void (*ctor)(void *),
|
||||
struct kmem_cache *parent_cache)
|
||||
{
|
||||
struct kmem_cache *s = NULL;
|
||||
int err = 0;
|
||||
@ -134,7 +173,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align
|
||||
get_online_cpus();
|
||||
mutex_lock(&slab_mutex);
|
||||
|
||||
if (!kmem_cache_sanity_check(name, size) == 0)
|
||||
if (!kmem_cache_sanity_check(memcg, name, size) == 0)
|
||||
goto out_locked;
|
||||
|
||||
/*
|
||||
@ -145,7 +184,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align
|
||||
*/
|
||||
flags &= CACHE_CREATE_MASK;
|
||||
|
||||
s = __kmem_cache_alias(name, size, align, flags, ctor);
|
||||
s = __kmem_cache_alias(memcg, name, size, align, flags, ctor);
|
||||
if (s)
|
||||
goto out_locked;
|
||||
|
||||
@ -154,6 +193,13 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align
|
||||
s->object_size = s->size = size;
|
||||
s->align = calculate_alignment(flags, align, size);
|
||||
s->ctor = ctor;
|
||||
|
||||
if (memcg_register_cache(memcg, s, parent_cache)) {
|
||||
kmem_cache_free(kmem_cache, s);
|
||||
err = -ENOMEM;
|
||||
goto out_locked;
|
||||
}
|
||||
|
||||
s->name = kstrdup(name, GFP_KERNEL);
|
||||
if (!s->name) {
|
||||
kmem_cache_free(kmem_cache, s);
|
||||
@ -163,10 +209,9 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align
|
||||
|
||||
err = __kmem_cache_create(s, flags);
|
||||
if (!err) {
|
||||
|
||||
s->refcount = 1;
|
||||
list_add(&s->list, &slab_caches);
|
||||
|
||||
memcg_cache_list_add(memcg, s);
|
||||
} else {
|
||||
kfree(s->name);
|
||||
kmem_cache_free(kmem_cache, s);
|
||||
@ -194,10 +239,20 @@ out_locked:
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
struct kmem_cache *
|
||||
kmem_cache_create(const char *name, size_t size, size_t align,
|
||||
unsigned long flags, void (*ctor)(void *))
|
||||
{
|
||||
return kmem_cache_create_memcg(NULL, name, size, align, flags, ctor, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_create);
|
||||
|
||||
void kmem_cache_destroy(struct kmem_cache *s)
|
||||
{
|
||||
/* Destroy all the children caches if we aren't a memcg cache */
|
||||
kmem_cache_destroy_memcg_children(s);
|
||||
|
||||
get_online_cpus();
|
||||
mutex_lock(&slab_mutex);
|
||||
s->refcount--;
|
||||
@ -209,6 +264,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
|
||||
if (s->flags & SLAB_DESTROY_BY_RCU)
|
||||
rcu_barrier();
|
||||
|
||||
memcg_release_cache(s);
|
||||
kfree(s->name);
|
||||
kmem_cache_free(kmem_cache, s);
|
||||
} else {
|
||||
@ -267,7 +323,7 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
|
||||
|
||||
|
||||
#ifdef CONFIG_SLABINFO
|
||||
static void print_slabinfo_header(struct seq_file *m)
|
||||
void print_slabinfo_header(struct seq_file *m)
|
||||
{
|
||||
/*
|
||||
* Output format version, so at least we can change it
|
||||
@ -311,16 +367,43 @@ static void s_stop(struct seq_file *m, void *p)
|
||||
mutex_unlock(&slab_mutex);
|
||||
}
|
||||
|
||||
static int s_show(struct seq_file *m, void *p)
|
||||
static void
|
||||
memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
|
||||
{
|
||||
struct kmem_cache *c;
|
||||
struct slabinfo sinfo;
|
||||
int i;
|
||||
|
||||
if (!is_root_cache(s))
|
||||
return;
|
||||
|
||||
for_each_memcg_cache_index(i) {
|
||||
c = cache_from_memcg(s, i);
|
||||
if (!c)
|
||||
continue;
|
||||
|
||||
memset(&sinfo, 0, sizeof(sinfo));
|
||||
get_slabinfo(c, &sinfo);
|
||||
|
||||
info->active_slabs += sinfo.active_slabs;
|
||||
info->num_slabs += sinfo.num_slabs;
|
||||
info->shared_avail += sinfo.shared_avail;
|
||||
info->active_objs += sinfo.active_objs;
|
||||
info->num_objs += sinfo.num_objs;
|
||||
}
|
||||
}
|
||||
|
||||
int cache_show(struct kmem_cache *s, struct seq_file *m)
|
||||
{
|
||||
struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
|
||||
struct slabinfo sinfo;
|
||||
|
||||
memset(&sinfo, 0, sizeof(sinfo));
|
||||
get_slabinfo(s, &sinfo);
|
||||
|
||||
memcg_accumulate_slabinfo(s, &sinfo);
|
||||
|
||||
seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
|
||||
s->name, sinfo.active_objs, sinfo.num_objs, s->size,
|
||||
cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size,
|
||||
sinfo.objects_per_slab, (1 << sinfo.cache_order));
|
||||
|
||||
seq_printf(m, " : tunables %4u %4u %4u",
|
||||
@ -332,6 +415,15 @@ static int s_show(struct seq_file *m, void *p)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int s_show(struct seq_file *m, void *p)
|
||||
{
|
||||
struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
|
||||
|
||||
if (!is_root_cache(s))
|
||||
return 0;
|
||||
return cache_show(s, m);
|
||||
}
|
||||
|
||||
/*
|
||||
* slabinfo_op - iterator that generates /proc/slabinfo
|
||||
*
|
||||
|
@ -58,7 +58,6 @@
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/slab.h>
|
||||
#include "slab.h"
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/swap.h> /* struct reclaim_state */
|
||||
@ -73,6 +72,7 @@
|
||||
|
||||
#include <linux/atomic.h>
|
||||
|
||||
#include "slab.h"
|
||||
/*
|
||||
* slob_block has a field 'units', which indicates size of block if +ve,
|
||||
* or offset of next block if -ve (in SLOB_UNITs).
|
||||
|
150
mm/slub.c
150
mm/slub.c
@ -31,6 +31,7 @@
|
||||
#include <linux/fault-inject.h>
|
||||
#include <linux/stacktrace.h>
|
||||
#include <linux/prefetch.h>
|
||||
#include <linux/memcontrol.h>
|
||||
|
||||
#include <trace/events/kmem.h>
|
||||
|
||||
@ -200,13 +201,14 @@ enum track_item { TRACK_ALLOC, TRACK_FREE };
|
||||
static int sysfs_slab_add(struct kmem_cache *);
|
||||
static int sysfs_slab_alias(struct kmem_cache *, const char *);
|
||||
static void sysfs_slab_remove(struct kmem_cache *);
|
||||
|
||||
static void memcg_propagate_slab_attrs(struct kmem_cache *s);
|
||||
#else
|
||||
static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
|
||||
static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
|
||||
{ return 0; }
|
||||
static inline void sysfs_slab_remove(struct kmem_cache *s) { }
|
||||
|
||||
static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
|
||||
#endif
|
||||
|
||||
static inline void stat(const struct kmem_cache *s, enum stat_item si)
|
||||
@ -1343,6 +1345,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
|
||||
void *start;
|
||||
void *last;
|
||||
void *p;
|
||||
int order;
|
||||
|
||||
BUG_ON(flags & GFP_SLAB_BUG_MASK);
|
||||
|
||||
@ -1351,7 +1354,9 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
|
||||
if (!page)
|
||||
goto out;
|
||||
|
||||
order = compound_order(page);
|
||||
inc_slabs_node(s, page_to_nid(page), page->objects);
|
||||
memcg_bind_pages(s, order);
|
||||
page->slab_cache = s;
|
||||
__SetPageSlab(page);
|
||||
if (page->pfmemalloc)
|
||||
@ -1360,7 +1365,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
|
||||
start = page_address(page);
|
||||
|
||||
if (unlikely(s->flags & SLAB_POISON))
|
||||
memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));
|
||||
memset(start, POISON_INUSE, PAGE_SIZE << order);
|
||||
|
||||
last = start;
|
||||
for_each_object(p, s, start, page->objects) {
|
||||
@ -1401,10 +1406,12 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
|
||||
|
||||
__ClearPageSlabPfmemalloc(page);
|
||||
__ClearPageSlab(page);
|
||||
|
||||
memcg_release_pages(s, order);
|
||||
reset_page_mapcount(page);
|
||||
if (current->reclaim_state)
|
||||
current->reclaim_state->reclaimed_slab += pages;
|
||||
__free_pages(page, order);
|
||||
__free_memcg_kmem_pages(page, order);
|
||||
}
|
||||
|
||||
#define need_reserve_slab_rcu \
|
||||
@ -2322,6 +2329,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
|
||||
if (slab_pre_alloc_hook(s, gfpflags))
|
||||
return NULL;
|
||||
|
||||
s = memcg_kmem_get_cache(s, gfpflags);
|
||||
redo:
|
||||
|
||||
/*
|
||||
@ -2610,19 +2618,10 @@ redo:
|
||||
|
||||
void kmem_cache_free(struct kmem_cache *s, void *x)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
page = virt_to_head_page(x);
|
||||
|
||||
if (kmem_cache_debug(s) && page->slab_cache != s) {
|
||||
pr_err("kmem_cache_free: Wrong slab cache. %s but object"
|
||||
" is from %s\n", page->slab_cache->name, s->name);
|
||||
WARN_ON_ONCE(1);
|
||||
s = cache_from_obj(s, x);
|
||||
if (!s)
|
||||
return;
|
||||
}
|
||||
|
||||
slab_free(s, page, x, _RET_IP_);
|
||||
|
||||
slab_free(s, virt_to_head_page(x), x, _RET_IP_);
|
||||
trace_kmem_cache_free(_RET_IP_, x);
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_free);
|
||||
@ -3154,8 +3153,19 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
|
||||
{
|
||||
int rc = kmem_cache_close(s);
|
||||
|
||||
if (!rc)
|
||||
if (!rc) {
|
||||
/*
|
||||
* We do the same lock strategy around sysfs_slab_add, see
|
||||
* __kmem_cache_create. Because this is pretty much the last
|
||||
* operation we do and the lock will be released shortly after
|
||||
* that in slab_common.c, we could just move sysfs_slab_remove
|
||||
* to a later point in common code. We should do that when we
|
||||
* have a common sysfs framework for all allocators.
|
||||
*/
|
||||
mutex_unlock(&slab_mutex);
|
||||
sysfs_slab_remove(s);
|
||||
mutex_lock(&slab_mutex);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
@ -3292,7 +3302,7 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
|
||||
struct page *page;
|
||||
void *ptr = NULL;
|
||||
|
||||
flags |= __GFP_COMP | __GFP_NOTRACK;
|
||||
flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_KMEMCG;
|
||||
page = alloc_pages_node(node, flags, get_order(size));
|
||||
if (page)
|
||||
ptr = page_address(page);
|
||||
@ -3398,7 +3408,7 @@ void kfree(const void *x)
|
||||
if (unlikely(!PageSlab(page))) {
|
||||
BUG_ON(!PageCompound(page));
|
||||
kmemleak_free(x);
|
||||
__free_pages(page, compound_order(page));
|
||||
__free_memcg_kmem_pages(page, compound_order(page));
|
||||
return;
|
||||
}
|
||||
slab_free(page->slab_cache, page, object, _RET_IP_);
|
||||
@ -3786,7 +3796,7 @@ static int slab_unmergeable(struct kmem_cache *s)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct kmem_cache *find_mergeable(size_t size,
|
||||
static struct kmem_cache *find_mergeable(struct mem_cgroup *memcg, size_t size,
|
||||
size_t align, unsigned long flags, const char *name,
|
||||
void (*ctor)(void *))
|
||||
{
|
||||
@ -3822,17 +3832,21 @@ static struct kmem_cache *find_mergeable(size_t size,
|
||||
if (s->size - size >= sizeof(void *))
|
||||
continue;
|
||||
|
||||
if (!cache_match_memcg(s, memcg))
|
||||
continue;
|
||||
|
||||
return s;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct kmem_cache *__kmem_cache_alias(const char *name, size_t size,
|
||||
size_t align, unsigned long flags, void (*ctor)(void *))
|
||||
struct kmem_cache *
|
||||
__kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size,
|
||||
size_t align, unsigned long flags, void (*ctor)(void *))
|
||||
{
|
||||
struct kmem_cache *s;
|
||||
|
||||
s = find_mergeable(size, align, flags, name, ctor);
|
||||
s = find_mergeable(memcg, size, align, flags, name, ctor);
|
||||
if (s) {
|
||||
s->refcount++;
|
||||
/*
|
||||
@ -3863,6 +3877,7 @@ int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)
|
||||
if (slab_state <= UP)
|
||||
return 0;
|
||||
|
||||
memcg_propagate_slab_attrs(s);
|
||||
mutex_unlock(&slab_mutex);
|
||||
err = sysfs_slab_add(s);
|
||||
mutex_lock(&slab_mutex);
|
||||
@ -5096,10 +5111,95 @@ static ssize_t slab_attr_store(struct kobject *kobj,
|
||||
return -EIO;
|
||||
|
||||
err = attribute->store(s, buf, len);
|
||||
#ifdef CONFIG_MEMCG_KMEM
|
||||
if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
|
||||
int i;
|
||||
|
||||
mutex_lock(&slab_mutex);
|
||||
if (s->max_attr_size < len)
|
||||
s->max_attr_size = len;
|
||||
|
||||
/*
|
||||
* This is a best effort propagation, so this function's return
|
||||
* value will be determined by the parent cache only. This is
|
||||
* basically because not all attributes will have a well
|
||||
* defined semantics for rollbacks - most of the actions will
|
||||
* have permanent effects.
|
||||
*
|
||||
* Returning the error value of any of the children that fail
|
||||
* is not 100 % defined, in the sense that users seeing the
|
||||
* error code won't be able to know anything about the state of
|
||||
* the cache.
|
||||
*
|
||||
* Only returning the error code for the parent cache at least
|
||||
* has well defined semantics. The cache being written to
|
||||
* directly either failed or succeeded, in which case we loop
|
||||
* through the descendants with best-effort propagation.
|
||||
*/
|
||||
for_each_memcg_cache_index(i) {
|
||||
struct kmem_cache *c = cache_from_memcg(s, i);
|
||||
if (c)
|
||||
attribute->store(c, buf, len);
|
||||
}
|
||||
mutex_unlock(&slab_mutex);
|
||||
}
|
||||
#endif
|
||||
return err;
|
||||
}
|
||||
|
||||
static void memcg_propagate_slab_attrs(struct kmem_cache *s)
|
||||
{
|
||||
#ifdef CONFIG_MEMCG_KMEM
|
||||
int i;
|
||||
char *buffer = NULL;
|
||||
|
||||
if (!is_root_cache(s))
|
||||
return;
|
||||
|
||||
/*
|
||||
* This mean this cache had no attribute written. Therefore, no point
|
||||
* in copying default values around
|
||||
*/
|
||||
if (!s->max_attr_size)
|
||||
return;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
|
||||
char mbuf[64];
|
||||
char *buf;
|
||||
struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
|
||||
|
||||
if (!attr || !attr->store || !attr->show)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* It is really bad that we have to allocate here, so we will
|
||||
* do it only as a fallback. If we actually allocate, though,
|
||||
* we can just use the allocated buffer until the end.
|
||||
*
|
||||
* Most of the slub attributes will tend to be very small in
|
||||
* size, but sysfs allows buffers up to a page, so they can
|
||||
* theoretically happen.
|
||||
*/
|
||||
if (buffer)
|
||||
buf = buffer;
|
||||
else if (s->max_attr_size < ARRAY_SIZE(mbuf))
|
||||
buf = mbuf;
|
||||
else {
|
||||
buffer = (char *) get_zeroed_page(GFP_KERNEL);
|
||||
if (WARN_ON(!buffer))
|
||||
continue;
|
||||
buf = buffer;
|
||||
}
|
||||
|
||||
attr->show(s->memcg_params->root_cache, buf);
|
||||
attr->store(s, buf, strlen(buf));
|
||||
}
|
||||
|
||||
if (buffer)
|
||||
free_page((unsigned long)buffer);
|
||||
#endif
|
||||
}
|
||||
|
||||
static const struct sysfs_ops slab_sysfs_ops = {
|
||||
.show = slab_attr_show,
|
||||
.store = slab_attr_store,
|
||||
@ -5156,6 +5256,12 @@ static char *create_unique_id(struct kmem_cache *s)
|
||||
if (p != name + 1)
|
||||
*p++ = '-';
|
||||
p += sprintf(p, "%07d", s->size);
|
||||
|
||||
#ifdef CONFIG_MEMCG_KMEM
|
||||
if (!is_root_cache(s))
|
||||
p += sprintf(p, "-%08d", memcg_cache_id(s->memcg_params->memcg));
|
||||
#endif
|
||||
|
||||
BUG_ON(p > name + ID_STR_LENGTH - 1);
|
||||
return name;
|
||||
}
|
||||
|
14
mm/vmscan.c
14
mm/vmscan.c
@ -1177,7 +1177,11 @@ int isolate_lru_page(struct page *page)
|
||||
}
|
||||
|
||||
/*
|
||||
* Are there way too many processes in the direct reclaim path already?
|
||||
* A direct reclaimer may isolate SWAP_CLUSTER_MAX pages from the LRU list and
|
||||
* then get resheduled. When there are massive number of tasks doing page
|
||||
* allocation, such sleeping direct reclaimers may keep piling up on each CPU,
|
||||
* the LRU list will go small and be scanned faster than necessary, leading to
|
||||
* unnecessary swapping, thrashing and OOM.
|
||||
*/
|
||||
static int too_many_isolated(struct zone *zone, int file,
|
||||
struct scan_control *sc)
|
||||
@ -1198,6 +1202,14 @@ static int too_many_isolated(struct zone *zone, int file,
|
||||
isolated = zone_page_state(zone, NR_ISOLATED_ANON);
|
||||
}
|
||||
|
||||
/*
|
||||
* GFP_NOIO/GFP_NOFS callers are allowed to isolate more pages, so they
|
||||
* won't get blocked by normal direct-reclaimers, forming a circular
|
||||
* deadlock.
|
||||
*/
|
||||
if ((sc->gfp_mask & GFP_IOFS) == GFP_IOFS)
|
||||
inactive >>= 3;
|
||||
|
||||
return isolated > inactive;
|
||||
}
|
||||
|
||||
|
80
scripts/coccinelle/api/d_find_alias.cocci
Normal file
80
scripts/coccinelle/api/d_find_alias.cocci
Normal file
@ -0,0 +1,80 @@
|
||||
/// Make sure calls to d_find_alias() have a corresponding call to dput().
|
||||
//
|
||||
// Keywords: d_find_alias, dput
|
||||
//
|
||||
// Confidence: Moderate
|
||||
// URL: http://coccinelle.lip6.fr/
|
||||
// Options: -include_headers
|
||||
|
||||
virtual context
|
||||
virtual org
|
||||
virtual patch
|
||||
virtual report
|
||||
|
||||
@r exists@
|
||||
local idexpression struct dentry *dent;
|
||||
expression E, E1;
|
||||
statement S1, S2;
|
||||
position p1, p2;
|
||||
@@
|
||||
(
|
||||
if (!(dent@p1 = d_find_alias(...))) S1
|
||||
|
|
||||
dent@p1 = d_find_alias(...)
|
||||
)
|
||||
|
||||
<...when != dput(dent)
|
||||
when != if (...) { <+... dput(dent) ...+> }
|
||||
when != true !dent || ...
|
||||
when != dent = E
|
||||
when != E = dent
|
||||
if (!dent || ...) S2
|
||||
...>
|
||||
(
|
||||
return <+...dent...+>;
|
||||
|
|
||||
return @p2 ...;
|
||||
|
|
||||
dent@p2 = E1;
|
||||
|
|
||||
E1 = dent;
|
||||
)
|
||||
|
||||
@depends on context@
|
||||
local idexpression struct dentry *r.dent;
|
||||
position r.p1,r.p2;
|
||||
@@
|
||||
* dent@p1 = ...
|
||||
...
|
||||
(
|
||||
* return@p2 ...;
|
||||
|
|
||||
* dent@p2
|
||||
)
|
||||
|
||||
|
||||
@script:python depends on org@
|
||||
p1 << r.p1;
|
||||
p2 << r.p2;
|
||||
@@
|
||||
cocci.print_main("Missing call to dput()",p1)
|
||||
cocci.print_secs("",p2)
|
||||
|
||||
@depends on patch@
|
||||
local idexpression struct dentry *r.dent;
|
||||
position r.p2;
|
||||
@@
|
||||
(
|
||||
+ dput(dent);
|
||||
return @p2 ...;
|
||||
|
|
||||
+ dput(dent);
|
||||
dent@p2 = ...;
|
||||
)
|
||||
|
||||
@script:python depends on report@
|
||||
p1 << r.p1;
|
||||
p2 << r.p2;
|
||||
@@
|
||||
msg = "Missing call to dput() at line %s."
|
||||
coccilib.report.print_report(p1[0], msg % (p2[0].line))
|
Loading…
Reference in New Issue
Block a user