Merge branch 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux

* 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux:
  slub: disallow changing cpu_partial from userspace for debug caches
  slub: add missed accounting
  slub: Extract get_freelist from __slab_alloc
  slub: Switch per cpu partial page support off for debugging
  slub: fix a possible memleak in __slab_alloc()
  slub: fix slub_max_order Documentation
  slub: add missed accounting
  slab: add taint flag outputting to debug paths.
  slub: add taint flag outputting to debug paths
  slab: introduce slab_max_order kernel parameter
  slab: rename slab_break_gfp_order to slab_max_order
This commit is contained in:
Linus Torvalds 2012-01-11 18:52:23 -08:00
commit 6296e5d3c0
4 changed files with 82 additions and 42 deletions

View File

@ -2395,6 +2395,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
slram= [HW,MTD]
slab_max_order= [MM, SLAB]
Determines the maximum allowed order for slabs.
A high setting may cause OOMs due to memory
fragmentation. Defaults to 1 for systems with
more than 32MB of RAM, 0 otherwise.
slub_debug[=options[,slabs]] [MM, SLUB]
Enabling slub_debug allows one to determine the
culprit if slab objects become corrupted. Enabling

View File

@ -117,7 +117,7 @@ can be influenced by kernel parameters:
slub_min_objects=x (default 4)
slub_min_order=x (default 0)
slub_max_order=x (default 1)
slub_max_order=x (default 3 (PAGE_ALLOC_COSTLY_ORDER))
slub_min_objects allows to specify how many objects must at least fit
into one slab in order for the allocation order to be acceptable.

View File

@ -481,11 +481,13 @@ EXPORT_SYMBOL(slab_buffer_size);
#endif
/*
* Do not go above this order unless 0 objects fit into the slab.
* Do not go above this order unless 0 objects fit into the slab or
* overridden on the command line.
*/
#define BREAK_GFP_ORDER_HI 1
#define BREAK_GFP_ORDER_LO 0
static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
#define SLAB_MAX_ORDER_HI 1
#define SLAB_MAX_ORDER_LO 0
static int slab_max_order = SLAB_MAX_ORDER_LO;
static bool slab_max_order_set __initdata;
/*
* Functions for storing/retrieving the cachep and or slab from the page
@ -854,6 +856,17 @@ static int __init noaliencache_setup(char *s)
}
__setup("noaliencache", noaliencache_setup);
static int __init slab_max_order_setup(char *str)
{
get_option(&str, &slab_max_order);
slab_max_order = slab_max_order < 0 ? 0 :
min(slab_max_order, MAX_ORDER - 1);
slab_max_order_set = true;
return 1;
}
__setup("slab_max_order=", slab_max_order_setup);
#ifdef CONFIG_NUMA
/*
* Special reaping functions for NUMA systems called from cache_reap().
@ -1502,10 +1515,11 @@ void __init kmem_cache_init(void)
/*
* Fragmentation resistance on low memory - only use bigger
* page orders on machines with more than 32MB of memory.
* page orders on machines with more than 32MB of memory if
* not overridden on the command line.
*/
if (totalram_pages > (32 << 20) >> PAGE_SHIFT)
slab_break_gfp_order = BREAK_GFP_ORDER_HI;
if (!slab_max_order_set && totalram_pages > (32 << 20) >> PAGE_SHIFT)
slab_max_order = SLAB_MAX_ORDER_HI;
/* Bootstrap is tricky, because several objects are allocated
* from caches that do not exist yet:
@ -1932,8 +1946,8 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
/* Print header */
if (lines == 0) {
printk(KERN_ERR
"Slab corruption: %s start=%p, len=%d\n",
cachep->name, realobj, size);
"Slab corruption (%s): %s start=%p, len=%d\n",
print_tainted(), cachep->name, realobj, size);
print_objinfo(cachep, objp, 0);
}
/* Hexdump the affected line */
@ -2117,7 +2131,7 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
* Large number of objects is good, but very large slabs are
* currently bad for the gfp()s.
*/
if (gfporder >= slab_break_gfp_order)
if (gfporder >= slab_max_order)
break;
/*
@ -3042,8 +3056,9 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
if (entries != cachep->num - slabp->inuse) {
bad:
printk(KERN_ERR "slab: Internal list corruption detected in "
"cache '%s'(%d), slabp %p(%d). Hexdump:\n",
cachep->name, cachep->num, slabp, slabp->inuse);
"cache '%s'(%d), slabp %p(%d). Tainted(%s). Hexdump:\n",
cachep->name, cachep->num, slabp, slabp->inuse,
print_tainted());
print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp,
sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t),
1);

View File

@ -570,7 +570,7 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...)
va_end(args);
printk(KERN_ERR "========================================"
"=====================================\n");
printk(KERN_ERR "BUG %s: %s\n", s->name, buf);
printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf);
printk(KERN_ERR "----------------------------------------"
"-------------------------------------\n\n");
}
@ -1901,11 +1901,14 @@ static void unfreeze_partials(struct kmem_cache *s)
}
if (l != m) {
if (l == M_PARTIAL)
if (l == M_PARTIAL) {
remove_partial(n, page);
else
stat(s, FREE_REMOVE_PARTIAL);
} else {
add_partial(n, page,
DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
l = m;
}
@ -2123,6 +2126,37 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
return object;
}
/*
* Check the page->freelist of a page and either transfer the freelist to the per cpu freelist
* or deactivate the page.
*
* The page is still frozen if the return value is not NULL.
*
* If this function returns NULL then the page has been unfrozen.
*/
static inline void *get_freelist(struct kmem_cache *s, struct page *page)
{
struct page new;
unsigned long counters;
void *freelist;
do {
freelist = page->freelist;
counters = page->counters;
new.counters = counters;
VM_BUG_ON(!new.frozen);
new.inuse = page->objects;
new.frozen = freelist != NULL;
} while (!cmpxchg_double_slab(s, page,
freelist, counters,
NULL, new.counters,
"get_freelist"));
return freelist;
}
/*
* Slow path. The lockless freelist is empty or we need to perform
* debugging duties.
@ -2144,8 +2178,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
{
void **object;
unsigned long flags;
struct page new;
unsigned long counters;
local_irq_save(flags);
#ifdef CONFIG_PREEMPT
@ -2166,31 +2198,14 @@ redo:
goto new_slab;
}
/* must check again c->freelist in case of cpu migration or IRQ */
object = c->freelist;
if (object)
goto load_freelist;
stat(s, ALLOC_SLOWPATH);
do {
object = c->page->freelist;
counters = c->page->counters;
new.counters = counters;
VM_BUG_ON(!new.frozen);
/*
* If there is no object left then we use this loop to
* deactivate the slab which is simple since no objects
* are left in the slab and therefore we do not need to
* put the page back onto the partial list.
*
* If there are objects left then we retrieve them
* and use them to refill the per cpu queue.
*/
new.inuse = c->page->objects;
new.frozen = object != NULL;
} while (!__cmpxchg_double_slab(s, c->page,
object, counters,
NULL, new.counters,
"__slab_alloc"));
object = get_freelist(s, c->page);
if (!object) {
c->page = NULL;
@ -3028,7 +3043,9 @@ static int kmem_cache_open(struct kmem_cache *s,
* per node list when we run out of per cpu objects. We only fetch 50%
* to keep some capacity around for frees.
*/
if (s->size >= PAGE_SIZE)
if (kmem_cache_debug(s))
s->cpu_partial = 0;
else if (s->size >= PAGE_SIZE)
s->cpu_partial = 2;
else if (s->size >= 1024)
s->cpu_partial = 6;
@ -4637,6 +4654,8 @@ static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
err = strict_strtoul(buf, 10, &objects);
if (err)
return err;
if (objects && kmem_cache_debug(s))
return -EINVAL;
s->cpu_partial = objects;
flush_all(s);