forked from Minki/linux
943a451a87
SLAB allows us to tune a particular cache behavior with tunables. When creating a new memcg cache copy, we'd like to preserve any tunables the parent cache already had. This could be done by an explicit call to do_tune_cpucache() after the cache is created. But this is not very convenient now that the caches are created from common code, since this function is SLAB-specific. Another method of doing that is taking advantage of the fact that do_tune_cpucache() is always called from enable_cpucache(), which is called at cache initialization. We can just preset the values, and then things work as expected. It can also happen that a root cache has its tunables updated during normal system operation. In this case, we will propagate the change to all caches that are already active. This change will require us to move the assignment of root_cache in memcg_params a bit earlier. We need this to be already set - which memcg_kmem_register_cache will do - when we reach __kmem_cache_create() Signed-off-by: Glauber Costa <glommer@parallels.com> Cc: Christoph Lameter <cl@linux.com> Cc: David Rientjes <rientjes@google.com> Cc: Frederic Weisbecker <fweisbec@redhat.com> Cc: Greg Thelen <gthelen@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: JoonSoo Kim <js1304@gmail.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Michal Hocko <mhocko@suse.cz> Cc: Pekka Enberg <penberg@cs.helsinki.fi> Cc: Rik van Riel <riel@redhat.com> Cc: Suleiman Souhlal <suleiman@google.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
467 lines
11 KiB
C
467 lines
11 KiB
C
/*
|
|
* Slab allocator functions that are independent of the allocator strategy
|
|
*
|
|
* (C) 2012 Christoph Lameter <cl@linux.com>
|
|
*/
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/mm.h>
|
|
#include <linux/poison.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/memory.h>
|
|
#include <linux/compiler.h>
|
|
#include <linux/module.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/seq_file.h>
|
|
#include <linux/proc_fs.h>
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/page.h>
|
|
#include <linux/memcontrol.h>
|
|
|
|
#include "slab.h"
|
|
|
|
enum slab_state slab_state;
|
|
LIST_HEAD(slab_caches);
|
|
DEFINE_MUTEX(slab_mutex);
|
|
struct kmem_cache *kmem_cache;
|
|
|
|
#ifdef CONFIG_DEBUG_VM
|
|
static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name,
|
|
size_t size)
|
|
{
|
|
struct kmem_cache *s = NULL;
|
|
|
|
if (!name || in_interrupt() || size < sizeof(void *) ||
|
|
size > KMALLOC_MAX_SIZE) {
|
|
pr_err("kmem_cache_create(%s) integrity check failed\n", name);
|
|
return -EINVAL;
|
|
}
|
|
|
|
list_for_each_entry(s, &slab_caches, list) {
|
|
char tmp;
|
|
int res;
|
|
|
|
/*
|
|
* This happens when the module gets unloaded and doesn't
|
|
* destroy its slab cache and no-one else reuses the vmalloc
|
|
* area of the module. Print a warning.
|
|
*/
|
|
res = probe_kernel_address(s->name, tmp);
|
|
if (res) {
|
|
pr_err("Slab cache with size %d has lost its name\n",
|
|
s->object_size);
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* For simplicity, we won't check this in the list of memcg
|
|
* caches. We have control over memcg naming, and if there
|
|
* aren't duplicates in the global list, there won't be any
|
|
* duplicates in the memcg lists as well.
|
|
*/
|
|
if (!memcg && !strcmp(s->name, name)) {
|
|
pr_err("%s (%s): Cache name already exists.\n",
|
|
__func__, name);
|
|
dump_stack();
|
|
s = NULL;
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
WARN_ON(strchr(name, ' ')); /* It confuses parsers */
|
|
return 0;
|
|
}
|
|
#else
|
|
static inline int kmem_cache_sanity_check(struct mem_cgroup *memcg,
|
|
const char *name, size_t size)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_MEMCG_KMEM
|
|
int memcg_update_all_caches(int num_memcgs)
|
|
{
|
|
struct kmem_cache *s;
|
|
int ret = 0;
|
|
mutex_lock(&slab_mutex);
|
|
|
|
list_for_each_entry(s, &slab_caches, list) {
|
|
if (!is_root_cache(s))
|
|
continue;
|
|
|
|
ret = memcg_update_cache_size(s, num_memcgs);
|
|
/*
|
|
* See comment in memcontrol.c, memcg_update_cache_size:
|
|
* Instead of freeing the memory, we'll just leave the caches
|
|
* up to this point in an updated state.
|
|
*/
|
|
if (ret)
|
|
goto out;
|
|
}
|
|
|
|
memcg_update_array_size(num_memcgs);
|
|
out:
|
|
mutex_unlock(&slab_mutex);
|
|
return ret;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Figure out what the alignment of the objects will be given a set of
|
|
* flags, a user specified alignment and the size of the objects.
|
|
*/
|
|
unsigned long calculate_alignment(unsigned long flags,
|
|
unsigned long align, unsigned long size)
|
|
{
|
|
/*
|
|
* If the user wants hardware cache aligned objects then follow that
|
|
* suggestion if the object is sufficiently large.
|
|
*
|
|
* The hardware cache alignment cannot override the specified
|
|
* alignment though. If that is greater then use it.
|
|
*/
|
|
if (flags & SLAB_HWCACHE_ALIGN) {
|
|
unsigned long ralign = cache_line_size();
|
|
while (size <= ralign / 2)
|
|
ralign /= 2;
|
|
align = max(align, ralign);
|
|
}
|
|
|
|
if (align < ARCH_SLAB_MINALIGN)
|
|
align = ARCH_SLAB_MINALIGN;
|
|
|
|
return ALIGN(align, sizeof(void *));
|
|
}
|
|
|
|
|
|
/*
|
|
* kmem_cache_create - Create a cache.
|
|
* @name: A string which is used in /proc/slabinfo to identify this cache.
|
|
* @size: The size of objects to be created in this cache.
|
|
* @align: The required alignment for the objects.
|
|
* @flags: SLAB flags
|
|
* @ctor: A constructor for the objects.
|
|
*
|
|
* Returns a ptr to the cache on success, NULL on failure.
|
|
* Cannot be called within a interrupt, but can be interrupted.
|
|
* The @ctor is run when new pages are allocated by the cache.
|
|
*
|
|
* The flags are
|
|
*
|
|
* %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
|
|
* to catch references to uninitialised memory.
|
|
*
|
|
* %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
|
|
* for buffer overruns.
|
|
*
|
|
* %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
|
|
* cacheline. This can be beneficial if you're counting cycles as closely
|
|
* as davem.
|
|
*/
|
|
|
|
struct kmem_cache *
|
|
kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size,
|
|
size_t align, unsigned long flags, void (*ctor)(void *),
|
|
struct kmem_cache *parent_cache)
|
|
{
|
|
struct kmem_cache *s = NULL;
|
|
int err = 0;
|
|
|
|
get_online_cpus();
|
|
mutex_lock(&slab_mutex);
|
|
|
|
if (!kmem_cache_sanity_check(memcg, name, size) == 0)
|
|
goto out_locked;
|
|
|
|
/*
|
|
* Some allocators will constraint the set of valid flags to a subset
|
|
* of all flags. We expect them to define CACHE_CREATE_MASK in this
|
|
* case, and we'll just provide them with a sanitized version of the
|
|
* passed flags.
|
|
*/
|
|
flags &= CACHE_CREATE_MASK;
|
|
|
|
s = __kmem_cache_alias(memcg, name, size, align, flags, ctor);
|
|
if (s)
|
|
goto out_locked;
|
|
|
|
s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
|
|
if (s) {
|
|
s->object_size = s->size = size;
|
|
s->align = calculate_alignment(flags, align, size);
|
|
s->ctor = ctor;
|
|
|
|
if (memcg_register_cache(memcg, s, parent_cache)) {
|
|
kmem_cache_free(kmem_cache, s);
|
|
err = -ENOMEM;
|
|
goto out_locked;
|
|
}
|
|
|
|
s->name = kstrdup(name, GFP_KERNEL);
|
|
if (!s->name) {
|
|
kmem_cache_free(kmem_cache, s);
|
|
err = -ENOMEM;
|
|
goto out_locked;
|
|
}
|
|
|
|
err = __kmem_cache_create(s, flags);
|
|
if (!err) {
|
|
s->refcount = 1;
|
|
list_add(&s->list, &slab_caches);
|
|
memcg_cache_list_add(memcg, s);
|
|
} else {
|
|
kfree(s->name);
|
|
kmem_cache_free(kmem_cache, s);
|
|
}
|
|
} else
|
|
err = -ENOMEM;
|
|
|
|
out_locked:
|
|
mutex_unlock(&slab_mutex);
|
|
put_online_cpus();
|
|
|
|
if (err) {
|
|
|
|
if (flags & SLAB_PANIC)
|
|
panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
|
|
name, err);
|
|
else {
|
|
printk(KERN_WARNING "kmem_cache_create(%s) failed with error %d",
|
|
name, err);
|
|
dump_stack();
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
return s;
|
|
}
|
|
|
|
struct kmem_cache *
|
|
kmem_cache_create(const char *name, size_t size, size_t align,
|
|
unsigned long flags, void (*ctor)(void *))
|
|
{
|
|
return kmem_cache_create_memcg(NULL, name, size, align, flags, ctor, NULL);
|
|
}
|
|
EXPORT_SYMBOL(kmem_cache_create);
|
|
|
|
void kmem_cache_destroy(struct kmem_cache *s)
|
|
{
|
|
/* Destroy all the children caches if we aren't a memcg cache */
|
|
kmem_cache_destroy_memcg_children(s);
|
|
|
|
get_online_cpus();
|
|
mutex_lock(&slab_mutex);
|
|
s->refcount--;
|
|
if (!s->refcount) {
|
|
list_del(&s->list);
|
|
|
|
if (!__kmem_cache_shutdown(s)) {
|
|
mutex_unlock(&slab_mutex);
|
|
if (s->flags & SLAB_DESTROY_BY_RCU)
|
|
rcu_barrier();
|
|
|
|
memcg_release_cache(s);
|
|
kfree(s->name);
|
|
kmem_cache_free(kmem_cache, s);
|
|
} else {
|
|
list_add(&s->list, &slab_caches);
|
|
mutex_unlock(&slab_mutex);
|
|
printk(KERN_ERR "kmem_cache_destroy %s: Slab cache still has objects\n",
|
|
s->name);
|
|
dump_stack();
|
|
}
|
|
} else {
|
|
mutex_unlock(&slab_mutex);
|
|
}
|
|
put_online_cpus();
|
|
}
|
|
EXPORT_SYMBOL(kmem_cache_destroy);
|
|
|
|
int slab_is_available(void)
|
|
{
|
|
return slab_state >= UP;
|
|
}
|
|
|
|
#ifndef CONFIG_SLOB
|
|
/* Create a cache during boot when no slab services are available yet */
|
|
void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size,
|
|
unsigned long flags)
|
|
{
|
|
int err;
|
|
|
|
s->name = name;
|
|
s->size = s->object_size = size;
|
|
s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
|
|
err = __kmem_cache_create(s, flags);
|
|
|
|
if (err)
|
|
panic("Creation of kmalloc slab %s size=%zd failed. Reason %d\n",
|
|
name, size, err);
|
|
|
|
s->refcount = -1; /* Exempt from merging for now */
|
|
}
|
|
|
|
struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
|
|
unsigned long flags)
|
|
{
|
|
struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
|
|
|
|
if (!s)
|
|
panic("Out of memory when creating slab %s\n", name);
|
|
|
|
create_boot_cache(s, name, size, flags);
|
|
list_add(&s->list, &slab_caches);
|
|
s->refcount = 1;
|
|
return s;
|
|
}
|
|
|
|
#endif /* !CONFIG_SLOB */
|
|
|
|
|
|
#ifdef CONFIG_SLABINFO
|
|
void print_slabinfo_header(struct seq_file *m)
|
|
{
|
|
/*
|
|
* Output format version, so at least we can change it
|
|
* without _too_ many complaints.
|
|
*/
|
|
#ifdef CONFIG_DEBUG_SLAB
|
|
seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
|
|
#else
|
|
seq_puts(m, "slabinfo - version: 2.1\n");
|
|
#endif
|
|
seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
|
|
"<objperslab> <pagesperslab>");
|
|
seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
|
|
seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
|
|
#ifdef CONFIG_DEBUG_SLAB
|
|
seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
|
|
"<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
|
|
seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
|
|
#endif
|
|
seq_putc(m, '\n');
|
|
}
|
|
|
|
static void *s_start(struct seq_file *m, loff_t *pos)
|
|
{
|
|
loff_t n = *pos;
|
|
|
|
mutex_lock(&slab_mutex);
|
|
if (!n)
|
|
print_slabinfo_header(m);
|
|
|
|
return seq_list_start(&slab_caches, *pos);
|
|
}
|
|
|
|
static void *s_next(struct seq_file *m, void *p, loff_t *pos)
|
|
{
|
|
return seq_list_next(p, &slab_caches, pos);
|
|
}
|
|
|
|
static void s_stop(struct seq_file *m, void *p)
|
|
{
|
|
mutex_unlock(&slab_mutex);
|
|
}
|
|
|
|
static void
|
|
memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
|
|
{
|
|
struct kmem_cache *c;
|
|
struct slabinfo sinfo;
|
|
int i;
|
|
|
|
if (!is_root_cache(s))
|
|
return;
|
|
|
|
for_each_memcg_cache_index(i) {
|
|
c = cache_from_memcg(s, i);
|
|
if (!c)
|
|
continue;
|
|
|
|
memset(&sinfo, 0, sizeof(sinfo));
|
|
get_slabinfo(c, &sinfo);
|
|
|
|
info->active_slabs += sinfo.active_slabs;
|
|
info->num_slabs += sinfo.num_slabs;
|
|
info->shared_avail += sinfo.shared_avail;
|
|
info->active_objs += sinfo.active_objs;
|
|
info->num_objs += sinfo.num_objs;
|
|
}
|
|
}
|
|
|
|
int cache_show(struct kmem_cache *s, struct seq_file *m)
|
|
{
|
|
struct slabinfo sinfo;
|
|
|
|
memset(&sinfo, 0, sizeof(sinfo));
|
|
get_slabinfo(s, &sinfo);
|
|
|
|
memcg_accumulate_slabinfo(s, &sinfo);
|
|
|
|
seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
|
|
cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size,
|
|
sinfo.objects_per_slab, (1 << sinfo.cache_order));
|
|
|
|
seq_printf(m, " : tunables %4u %4u %4u",
|
|
sinfo.limit, sinfo.batchcount, sinfo.shared);
|
|
seq_printf(m, " : slabdata %6lu %6lu %6lu",
|
|
sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail);
|
|
slabinfo_show_stats(m, s);
|
|
seq_putc(m, '\n');
|
|
return 0;
|
|
}
|
|
|
|
static int s_show(struct seq_file *m, void *p)
|
|
{
|
|
struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
|
|
|
|
if (!is_root_cache(s))
|
|
return 0;
|
|
return cache_show(s, m);
|
|
}
|
|
|
|
/*
|
|
* slabinfo_op - iterator that generates /proc/slabinfo
|
|
*
|
|
* Output layout:
|
|
* cache-name
|
|
* num-active-objs
|
|
* total-objs
|
|
* object size
|
|
* num-active-slabs
|
|
* total-slabs
|
|
* num-pages-per-slab
|
|
* + further values on SMP and with statistics enabled
|
|
*/
|
|
static const struct seq_operations slabinfo_op = {
|
|
.start = s_start,
|
|
.next = s_next,
|
|
.stop = s_stop,
|
|
.show = s_show,
|
|
};
|
|
|
|
static int slabinfo_open(struct inode *inode, struct file *file)
|
|
{
|
|
return seq_open(file, &slabinfo_op);
|
|
}
|
|
|
|
static const struct file_operations proc_slabinfo_operations = {
|
|
.open = slabinfo_open,
|
|
.read = seq_read,
|
|
.write = slabinfo_write,
|
|
.llseek = seq_lseek,
|
|
.release = seq_release,
|
|
};
|
|
|
|
static int __init slab_proc_init(void)
|
|
{
|
|
proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations);
|
|
return 0;
|
|
}
|
|
module_init(slab_proc_init);
|
|
#endif /* CONFIG_SLABINFO */
|