SLUB: Alternate fast paths using cmpxchg_local

Provide an alternate implementation of the SLUB fast paths for alloc and free using cmpxchg_local. The cmpxchg_local fast path is selected for arches that have CONFIG_FAST_CMPXCHG_LOCAL set. An arch should only set CONFIG_FAST_CMPXCHG_LOCAL if the cmpxchg_local is faster than an interrupt enable/disable sequence. This is known to be true for both x86 platforms so set FAST_CMPXCHG_LOCAL for both arches. Currently another requirement for the fastpath is that the kernel is compiled without preemption. The restriction will go away with the introduction of a new per cpu allocator and new per cpu operations. The advantages of a cmpxchg_local based fast path are: 1. Potentially lower cycle count (30%-60% faster) 2. There is no need to disable and enable interrupts on the fast path. Currently interrupts have to be disabled and enabled on every slab operation. This is likely avoiding a significant percentage of interrupt off / on sequences in the kernel. 3. The disposal of freed slabs can occur with interrupts enabled. The alternate path is realized using #ifdef's. Several attempts to do the same with macros and inline functions resulted in a mess (in particular due to the strange way that local_interrupt_save() handles its argument and due to the need to define macros/functions that sometimes disable interrupts and sometimes do something else). [clameter: Stripped preempt bits and disabled fastpath if preempt is enabled] Signed-off-by: Christoph Lameter <clameter@sgi.com> Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi> Cc: <linux-arch@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2008-01-07 23:20:30 -08:00 · 2008-01-07 23:20:30 -08:00 · 1f84260c8c
commit 1f84260c8c
parent 683d0baad3
2 changed files with 92 additions and 5 deletions
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@ -52,6 +52,10 @@ config HAVE_LATENCYTOP_SUPPORT
 config SEMAPHORE_SLEEPERS
 	def_bool y

+config FAST_CMPXCHG_LOCAL
+	bool
+	default y
+
 config MMU
 	def_bool y

--- a/mm/slub.c
+++ b/mm/slub.c
@ -149,6 +149,13 @@ static inline void ClearSlabDebug(struct page *page)
 /* Enable to test recovery from slab corruption on boot */
 #undef SLUB_RESILIENCY_TEST

+/*
+ * Currently fastpath is not supported if preemption is enabled.
+ */
+#if defined(CONFIG_FAST_CMPXCHG_LOCAL) && !defined(CONFIG_PREEMPT)
+#define SLUB_FASTPATH
+#endif
+
 #if PAGE_SHIFT <= 12

 /*
@ -1493,7 +1500,11 @@ static void *__slab_alloc(struct kmem_cache *s,
 {
 	void **object;
 	struct page *new;
+#ifdef SLUB_FASTPATH
+	unsigned long flags;

+	local_irq_save(flags);
+#endif
 	if (!c->page)
 		goto new_slab;

@ -1512,7 +1523,12 @@ load_freelist:
 	c->page->inuse = s->objects;
 	c->page->freelist = c->page->end;
 	c->node = page_to_nid(c->page);
+unlock_out:
 	slab_unlock(c->page);
+out:
+#ifdef SLUB_FASTPATH
+	local_irq_restore(flags);
+#endif
 	return object;

 another_slab:
@ -1542,7 +1558,8 @@ new_slab:
 		c->page = new;
 		goto load_freelist;
 	}
-	return NULL;
+	object = NULL;
+	goto out;
 debug:
 	object = c->page->freelist;
 	if (!alloc_debug_processing(s, c->page, object, addr))
@ -1551,8 +1568,7 @@ debug:
 	c->page->inuse++;
 	c->page->freelist = object[c->offset];
 	c->node = -1;
-	slab_unlock(c->page);
-	return object;
+	goto unlock_out;
 }

 /*
@ -1569,9 +1585,36 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 		gfp_t gfpflags, int node, void *addr)
 {
 	void **object;
-	unsigned long flags;
 	struct kmem_cache_cpu *c;

+/*
+ * The SLUB_FASTPATH path is provisional and is currently disabled if the
+ * kernel is compiled with preemption or if the arch does not support
+ * fast cmpxchg operations. There are a couple of coming changes that will
+ * simplify matters and allow preemption. Ultimately we may end up making
+ * SLUB_FASTPATH the default.
+ *
+ * 1. The introduction of the per cpu allocator will avoid array lookups
+ *    through get_cpu_slab(). A special register can be used instead.
+ *
+ * 2. The introduction of per cpu atomic operations (cpu_ops) means that
+ *    we can realize the logic here entirely with per cpu atomics. The
+ *    per cpu atomic ops will take care of the preemption issues.
+ */
+
+#ifdef SLUB_FASTPATH
+	c = get_cpu_slab(s, raw_smp_processor_id());
+	do {
+		object = c->freelist;
+		if (unlikely(is_end(object) || !node_match(c, node))) {
+			object = __slab_alloc(s, gfpflags, node, addr, c);
+			break;
+		}
+	} while (cmpxchg_local(&c->freelist, object, object[c->offset])
+								!= object);
+#else
+	unsigned long flags;
+
 	local_irq_save(flags);
 	c = get_cpu_slab(s, smp_processor_id());
 	if (unlikely(is_end(c->freelist) || !node_match(c, node)))
@ -1583,6 +1626,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 		c->freelist = object[c->offset];
 	}
 	local_irq_restore(flags);
+#endif

 	if (unlikely((gfpflags & __GFP_ZERO) && object))
 		memset(object, 0, c->objsize);
@ -1618,6 +1662,11 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
 	void *prior;
 	void **object = (void *)x;

+#ifdef SLUB_FASTPATH
+	unsigned long flags;
+
+	local_irq_save(flags);
+#endif
 	slab_lock(page);

 	if (unlikely(SlabDebug(page)))
@ -1643,6 +1692,9 @@ checks_ok:

 out_unlock:
 	slab_unlock(page);
+#ifdef SLUB_FASTPATH
+	local_irq_restore(flags);
+#endif
 	return;

 slab_empty:
@ -1653,6 +1705,9 @@ slab_empty:
 		remove_partial(s, page);

 	slab_unlock(page);
+#ifdef SLUB_FASTPATH
+	local_irq_restore(flags);
+#endif
 	discard_slab(s, page);
 	return;

@ -1677,9 +1732,36 @@ static __always_inline void slab_free(struct kmem_cache *s,
 			struct page *page, void *x, void *addr)
 {
 	void **object = (void *)x;
-	unsigned long flags;
 	struct kmem_cache_cpu *c;

+#ifdef SLUB_FASTPATH
+	void **freelist;
+
+	c = get_cpu_slab(s, raw_smp_processor_id());
+	debug_check_no_locks_freed(object, s->objsize);
+	do {
+		freelist = c->freelist;
+		barrier();
+		/*
+		 * If the compiler would reorder the retrieval of c->page to
+		 * come before c->freelist then an interrupt could
+		 * change the cpu slab before we retrieve c->freelist. We
+		 * could be matching on a page no longer active and put the
+		 * object onto the freelist of the wrong slab.
+		 *
+		 * On the other hand: If we already have the freelist pointer
+		 * then any change of cpu_slab will cause the cmpxchg to fail
+		 * since the freelist pointers are unique per slab.
+		 */
+		if (unlikely(page != c->page || c->node < 0)) {
+			__slab_free(s, page, x, addr, c->offset);
+			break;
+		}
+		object[c->offset] = freelist;
+	} while (cmpxchg_local(&c->freelist, freelist, object) != freelist);
+#else
+	unsigned long flags;
+
 	local_irq_save(flags);
 	debug_check_no_locks_freed(object, s->objsize);
 	c = get_cpu_slab(s, smp_processor_id());
@ -1690,6 +1772,7 @@ static __always_inline void slab_free(struct kmem_cache *s,
 		__slab_free(s, page, x, addr, c->offset);

 	local_irq_restore(flags);
+#endif
 }

 void kmem_cache_free(struct kmem_cache *s, void *x)