From 6faa68337b0c90923a1405ae9c196cee64921b7e Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 9 May 2012 10:09:51 -0500 Subject: [PATCH 1/9] slub: Use freelist instead of "object" in __slab_alloc The variable "object" really refers to a list of objects that we are handling. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 80848cd3901c..83f258298de7 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2127,7 +2127,7 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, int node, struct kmem_cache_cpu **pc) { - void *object; + void *freelist; struct kmem_cache_cpu *c; struct page *page = new_slab(s, flags, node); @@ -2140,7 +2140,7 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, * No other reference to the page yet so we can * muck around with it freely without cmpxchg */ - object = page->freelist; + freelist = page->freelist; page->freelist = NULL; stat(s, ALLOC_SLAB); @@ -2148,9 +2148,9 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, c->page = page; *pc = c; } else - object = NULL; + freelist = NULL; - return object; + return freelist; } /* @@ -2170,6 +2170,7 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page) do { freelist = page->freelist; counters = page->counters; + new.counters = counters; VM_BUG_ON(!new.frozen); @@ -2203,7 +2204,7 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page) static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, unsigned long addr, struct kmem_cache_cpu *c) { - void **object; + void *freelist; unsigned long flags; local_irq_save(flags); @@ -2219,6 +2220,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, if (!c->page) goto new_slab; redo: + if (unlikely(!node_match(c, node))) { stat(s, ALLOC_NODE_MISMATCH); deactivate_slab(s, c); @@ -2226,15 +2228,15 @@ redo: } /* must check again c->freelist in case of cpu migration or IRQ */ - object = c->freelist; - if (object) + freelist = c->freelist; + if (freelist) goto load_freelist; stat(s, ALLOC_SLOWPATH); - object = get_freelist(s, c->page); + freelist = get_freelist(s, c->page); - if (!object) { + if (!freelist) { c->page = NULL; stat(s, DEACTIVATE_BYPASS); goto new_slab; @@ -2243,10 +2245,10 @@ redo: stat(s, ALLOC_REFILL); load_freelist: - c->freelist = get_freepointer(s, object); + c->freelist = get_freepointer(s, freelist); c->tid = next_tid(c->tid); local_irq_restore(flags); - return object; + return freelist; new_slab: @@ -2260,13 +2262,13 @@ new_slab: } /* Then do expensive stuff like retrieving pages from the partial lists */ - object = get_partial(s, gfpflags, node, c); + freelist = get_partial(s, gfpflags, node, c); - if (unlikely(!object)) { + if (unlikely(!freelist)) { - object = new_slab_objects(s, gfpflags, node, &c); + freelist = new_slab_objects(s, gfpflags, node, &c); - if (unlikely(!object)) { + if (unlikely(!freelist)) { if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) slab_out_of_memory(s, gfpflags, node); @@ -2279,14 +2281,14 @@ new_slab: goto load_freelist; /* Only entered in the debug case */ - if (!alloc_debug_processing(s, c->page, object, addr)) + if (!alloc_debug_processing(s, c->page, freelist, addr)) goto new_slab; /* Slab failed checks. Next slab needed */ - c->freelist = get_freepointer(s, object); + c->freelist = get_freepointer(s, freelist); deactivate_slab(s, c); c->node = NUMA_NO_NODE; local_irq_restore(flags); - return object; + return freelist; } /* From 507effeaba29bf724dfe38317fbd11d0fe25fa40 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 9 May 2012 10:09:52 -0500 Subject: [PATCH 2/9] slub: Add frozen check in __slab_alloc Verify that objects returned from __slab_alloc come from slab pages in the correct state. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index 83f258298de7..a3395c28f561 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2245,6 +2245,12 @@ redo: stat(s, ALLOC_REFILL); load_freelist: + /* + * freelist is pointing to the list of objects to be used. + * page is pointing to the page from which the objects are obtained. + * That page must be frozen for per cpu allocations to work. + */ + VM_BUG_ON(!c->page->frozen); c->freelist = get_freepointer(s, freelist); c->tid = next_tid(c->tid); local_irq_restore(flags); From 7ced3719719669ad6bd279b45fa3c1a517b2e057 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 9 May 2012 10:09:53 -0500 Subject: [PATCH 3/9] slub: Acquire_slab() avoid loop Avoid the loop in acquire slab and simply fail if there is a conflict. This will cause the next page on the list to be considered. Acked-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a3395c28f561..9892775349bf 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1490,12 +1490,12 @@ static inline void remove_partial(struct kmem_cache_node *n, } /* - * Lock slab, remove from the partial list and put the object into the - * per cpu freelist. + * Remove slab from the partial list, freeze it and + * return the pointer to the freelist. * * Returns a list of objects or NULL if it fails. * - * Must hold list_lock. + * Must hold list_lock since we modify the partial list. */ static inline void *acquire_slab(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page, @@ -1510,22 +1510,24 @@ static inline void *acquire_slab(struct kmem_cache *s, * The old freelist is the list of objects for the * per cpu allocation list. */ - do { - freelist = page->freelist; - counters = page->counters; - new.counters = counters; - if (mode) - new.inuse = page->objects; + freelist = page->freelist; + counters = page->counters; + new.counters = counters; + if (mode) + new.inuse = page->objects; - VM_BUG_ON(new.frozen); - new.frozen = 1; + VM_BUG_ON(new.frozen); + new.frozen = 1; - } while (!__cmpxchg_double_slab(s, page, + if (!__cmpxchg_double_slab(s, page, freelist, counters, NULL, new.counters, - "lock and freeze")); + "acquire_slab")) + + return NULL; remove_partial(n, page); + WARN_ON(!freelist); return freelist; } From f469743673ceda5181970eb6b8090ba728c956fb Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 9 May 2012 10:09:54 -0500 Subject: [PATCH 4/9] slub: Simplify control flow in __slab_alloc() Simplify control flow a bit avoiding nesting. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 9892775349bf..5aacd434e2cb 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2272,17 +2272,15 @@ new_slab: /* Then do expensive stuff like retrieving pages from the partial lists */ freelist = get_partial(s, gfpflags, node, c); - if (unlikely(!freelist)) { - + if (!freelist) freelist = new_slab_objects(s, gfpflags, node, &c); - if (unlikely(!freelist)) { - if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) - slab_out_of_memory(s, gfpflags, node); + if (unlikely(!freelist)) { + if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) + slab_out_of_memory(s, gfpflags, node); - local_irq_restore(flags); - return NULL; - } + local_irq_restore(flags); + return NULL; } if (likely(!kmem_cache_debug(s))) From 188fd063208942a4681d8e8a4484ad0d4ae0fda1 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 9 May 2012 10:09:55 -0500 Subject: [PATCH 5/9] slub: new_slab_objects() can also get objects from partial list Moving the attempt to get a slab page from the partial lists simplifies __slab_alloc which is rather complicated. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 5aacd434e2cb..b29246bc7392 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2130,9 +2130,15 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, int node, struct kmem_cache_cpu **pc) { void *freelist; - struct kmem_cache_cpu *c; - struct page *page = new_slab(s, flags, node); + struct kmem_cache_cpu *c = *pc; + struct page *page; + freelist = get_partial(s, flags, node, c); + + if (freelist) + return freelist; + + page = new_slab(s, flags, node); if (page) { c = __this_cpu_ptr(s->cpu_slab); if (c->page) @@ -2269,11 +2275,7 @@ new_slab: goto redo; } - /* Then do expensive stuff like retrieving pages from the partial lists */ - freelist = get_partial(s, gfpflags, node, c); - - if (!freelist) - freelist = new_slab_objects(s, gfpflags, node, &c); + freelist = new_slab_objects(s, gfpflags, node, &c); if (unlikely(!freelist)) { if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) From ec3ab083a7a004282ee374bdaeb0aa603521b8eb Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 9 May 2012 10:09:56 -0500 Subject: [PATCH 6/9] slub: Get rid of the node field The node field is always page_to_nid(c->page). So its rather easy to replace. Note that there maybe slightly more overhead in various hot paths due to the need to shift the bits from page->flags. However, that is mostly compensated for by a smaller footprint of the kmem_cache_cpu structure (this patch reduces that to 3 words per cache) which allows better caching. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 1 - mm/slub.c | 35 ++++++++++++++++------------------- 2 files changed, 16 insertions(+), 20 deletions(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index c2f8c8bc56ed..ebdcf4ba42ee 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -48,7 +48,6 @@ struct kmem_cache_cpu { unsigned long tid; /* Globally unique transaction id */ struct page *page; /* The slab from which we are allocating */ struct page *partial; /* Partially allocated frozen slabs */ - int node; /* The node of the page (or -1 for debug) */ #ifdef CONFIG_SLUB_STATS unsigned stat[NR_SLUB_STAT_ITEMS]; #endif diff --git a/mm/slub.c b/mm/slub.c index b29246bc7392..aed879276410 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1561,7 +1561,6 @@ static void *get_partial_node(struct kmem_cache *s, if (!object) { c->page = page; - c->node = page_to_nid(page); stat(s, ALLOC_FROM_PARTIAL); object = t; available = page->objects - page->inuse; @@ -2057,7 +2056,7 @@ static void flush_all(struct kmem_cache *s) static inline int node_match(struct kmem_cache_cpu *c, int node) { #ifdef CONFIG_NUMA - if (node != NUMA_NO_NODE && c->node != node) + if (node != NUMA_NO_NODE && page_to_nid(c->page) != node) return 0; #endif return 1; @@ -2152,7 +2151,6 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, page->freelist = NULL; stat(s, ALLOC_SLAB); - c->node = page_to_nid(page); c->page = page; *pc = c; } else @@ -2269,7 +2267,6 @@ new_slab: if (c->partial) { c->page = c->partial; c->partial = c->page->next; - c->node = page_to_nid(c->page); stat(s, CPU_PARTIAL_ALLOC); c->freelist = NULL; goto redo; @@ -2294,7 +2291,6 @@ new_slab: c->freelist = get_freepointer(s, freelist); deactivate_slab(s, c); - c->node = NUMA_NO_NODE; local_irq_restore(flags); return freelist; } @@ -4507,30 +4503,31 @@ static ssize_t show_slab_objects(struct kmem_cache *s, for_each_possible_cpu(cpu) { struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); - int node = ACCESS_ONCE(c->node); + int node; struct page *page; - if (node < 0) - continue; page = ACCESS_ONCE(c->page); - if (page) { - if (flags & SO_TOTAL) - x = page->objects; - else if (flags & SO_OBJECTS) - x = page->inuse; - else - x = 1; + if (!page) + continue; - total += x; - nodes[node] += x; - } - page = c->partial; + node = page_to_nid(page); + if (flags & SO_TOTAL) + x = page->objects; + else if (flags & SO_OBJECTS) + x = page->inuse; + else + x = 1; + total += x; + nodes[node] += x; + + page = ACCESS_ONCE(c->partial); if (page) { x = page->pobjects; total += x; nodes[node] += x; } + per_cpu[node]++; } } From c17dda40a6a4ed95f035db38b7ba4fab0d99da44 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 9 May 2012 10:09:57 -0500 Subject: [PATCH 7/9] slub: Separate out kmem_cache_cpu processing from deactivate_slab Processing on fields of kmem_cache_cpu is cleaner if code working on fields of this struct is taken out of deactivate_slab(). Acked-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index aed879276410..2389a016577e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1729,14 +1729,12 @@ void init_kmem_cache_cpus(struct kmem_cache *s) /* * Remove the cpu slab */ -static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) +static void deactivate_slab(struct kmem_cache *s, struct page *page, void *freelist) { enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE }; - struct page *page = c->page; struct kmem_cache_node *n = get_node(s, page_to_nid(page)); int lock = 0; enum slab_modes l = M_NONE, m = M_NONE; - void *freelist; void *nextfree; int tail = DEACTIVATE_TO_HEAD; struct page new; @@ -1747,11 +1745,6 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) tail = DEACTIVATE_TO_TAIL; } - c->tid = next_tid(c->tid); - c->page = NULL; - freelist = c->freelist; - c->freelist = NULL; - /* * Stage one: Free all available per cpu objects back * to the page freelist while it is still frozen. Leave the @@ -2009,7 +2002,11 @@ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) { stat(s, CPUSLAB_FLUSH); - deactivate_slab(s, c); + deactivate_slab(s, c->page, c->freelist); + + c->tid = next_tid(c->tid); + c->page = NULL; + c->freelist = NULL; } /* @@ -2229,7 +2226,9 @@ redo: if (unlikely(!node_match(c, node))) { stat(s, ALLOC_NODE_MISMATCH); - deactivate_slab(s, c); + deactivate_slab(s, c->page, c->freelist); + c->page = NULL; + c->freelist = NULL; goto new_slab; } @@ -2289,8 +2288,9 @@ new_slab: if (!alloc_debug_processing(s, c->page, freelist, addr)) goto new_slab; /* Slab failed checks. Next slab needed */ - c->freelist = get_freepointer(s, freelist); - deactivate_slab(s, c); + deactivate_slab(s, c->page, get_freepointer(s, freelist)); + c->page = NULL; + c->freelist = NULL; local_irq_restore(flags); return freelist; } From f6e7def7f7d749759e4bf36dcc25ae289a20d868 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 9 May 2012 10:09:58 -0500 Subject: [PATCH 8/9] slub: Use page variable instead of c->page. Store the value of c->page to avoid additional fetches from per cpu data. Acked-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 2389a016577e..6b60fc907a71 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2208,6 +2208,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, unsigned long addr, struct kmem_cache_cpu *c) { void *freelist; + struct page *page; unsigned long flags; local_irq_save(flags); @@ -2220,13 +2221,14 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, c = this_cpu_ptr(s->cpu_slab); #endif - if (!c->page) + page = c->page; + if (!page) goto new_slab; redo: if (unlikely(!node_match(c, node))) { stat(s, ALLOC_NODE_MISMATCH); - deactivate_slab(s, c->page, c->freelist); + deactivate_slab(s, page, c->freelist); c->page = NULL; c->freelist = NULL; goto new_slab; @@ -2239,7 +2241,7 @@ redo: stat(s, ALLOC_SLOWPATH); - freelist = get_freelist(s, c->page); + freelist = get_freelist(s, page); if (!freelist) { c->page = NULL; @@ -2264,8 +2266,8 @@ load_freelist: new_slab: if (c->partial) { - c->page = c->partial; - c->partial = c->page->next; + page = c->page = c->partial; + c->partial = page->next; stat(s, CPU_PARTIAL_ALLOC); c->freelist = NULL; goto redo; @@ -2281,14 +2283,15 @@ new_slab: return NULL; } + page = c->page; if (likely(!kmem_cache_debug(s))) goto load_freelist; /* Only entered in the debug case */ - if (!alloc_debug_processing(s, c->page, freelist, addr)) + if (!alloc_debug_processing(s, page, freelist, addr)) goto new_slab; /* Slab failed checks. Next slab needed */ - deactivate_slab(s, c->page, get_freepointer(s, freelist)); + deactivate_slab(s, page, get_freepointer(s, freelist)); c->page = NULL; c->freelist = NULL; local_irq_restore(flags); From 57d437d2aa680f42d75cef45205834d5f605550a Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 9 May 2012 10:09:59 -0500 Subject: [PATCH 9/9] slub: pass page to node_match() instead of kmem_cache_cpu structure Avoid passing the kmem_cache_cpu pointer to node_match. This makes the node_match function more generic and easier to understand. Acked-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 6b60fc907a71..719509eaa467 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2050,10 +2050,10 @@ static void flush_all(struct kmem_cache *s) * Check if the objects in a per cpu structure fit numa * locality expectations. */ -static inline int node_match(struct kmem_cache_cpu *c, int node) +static inline int node_match(struct page *page, int node) { #ifdef CONFIG_NUMA - if (node != NUMA_NO_NODE && page_to_nid(c->page) != node) + if (node != NUMA_NO_NODE && page_to_nid(page) != node) return 0; #endif return 1; @@ -2226,7 +2226,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, goto new_slab; redo: - if (unlikely(!node_match(c, node))) { + if (unlikely(!node_match(page, node))) { stat(s, ALLOC_NODE_MISMATCH); deactivate_slab(s, page, c->freelist); c->page = NULL; @@ -2313,6 +2313,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, { void **object; struct kmem_cache_cpu *c; + struct page *page; unsigned long tid; if (slab_pre_alloc_hook(s, gfpflags)) @@ -2338,7 +2339,8 @@ redo: barrier(); object = c->freelist; - if (unlikely(!object || !node_match(c, node))) + page = c->page; + if (unlikely(!object || !node_match(page, node))) object = __slab_alloc(s, gfpflags, node, addr, c);