[PATCH] slab: fix handling of pages from foreign NUMA nodes

The numa slab allocator may allocate pages from foreign nodes onto the
lists for a particular node if a node runs out of memory.  Inspecting the
slab->nodeid field will not reflect that the page is now in use for the
slabs of another node.

This patch fixes that issue by adding a node field to free_block so that
the caller can indicate which node currently uses a slab.

Also removes the check for the current node from kmalloc_cache_node since
the process may shift later to another node which may lead to an allocation
on another node than intended.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
Christoph Lameter 2005-09-22 21:44:02 -07:00 committed by Linus Torvalds
parent 57487f4376
commit ff69416e63

View File

@ -640,7 +640,7 @@ static enum {
static DEFINE_PER_CPU(struct work_struct, reap_work); static DEFINE_PER_CPU(struct work_struct, reap_work);
static void free_block(kmem_cache_t* cachep, void** objpp, int len); static void free_block(kmem_cache_t* cachep, void** objpp, int len, int node);
static void enable_cpucache (kmem_cache_t *cachep); static void enable_cpucache (kmem_cache_t *cachep);
static void cache_reap (void *unused); static void cache_reap (void *unused);
static int __node_shrink(kmem_cache_t *cachep, int node); static int __node_shrink(kmem_cache_t *cachep, int node);
@ -805,7 +805,7 @@ static inline void __drain_alien_cache(kmem_cache_t *cachep, struct array_cache
if (ac->avail) { if (ac->avail) {
spin_lock(&rl3->list_lock); spin_lock(&rl3->list_lock);
free_block(cachep, ac->entry, ac->avail); free_block(cachep, ac->entry, ac->avail, node);
ac->avail = 0; ac->avail = 0;
spin_unlock(&rl3->list_lock); spin_unlock(&rl3->list_lock);
} }
@ -926,7 +926,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
/* Free limit for this kmem_list3 */ /* Free limit for this kmem_list3 */
l3->free_limit -= cachep->batchcount; l3->free_limit -= cachep->batchcount;
if (nc) if (nc)
free_block(cachep, nc->entry, nc->avail); free_block(cachep, nc->entry, nc->avail, node);
if (!cpus_empty(mask)) { if (!cpus_empty(mask)) {
spin_unlock(&l3->list_lock); spin_unlock(&l3->list_lock);
@ -935,7 +935,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
if (l3->shared) { if (l3->shared) {
free_block(cachep, l3->shared->entry, free_block(cachep, l3->shared->entry,
l3->shared->avail); l3->shared->avail, node);
kfree(l3->shared); kfree(l3->shared);
l3->shared = NULL; l3->shared = NULL;
} }
@ -1883,12 +1883,13 @@ static void do_drain(void *arg)
{ {
kmem_cache_t *cachep = (kmem_cache_t*)arg; kmem_cache_t *cachep = (kmem_cache_t*)arg;
struct array_cache *ac; struct array_cache *ac;
int node = numa_node_id();
check_irq_off(); check_irq_off();
ac = ac_data(cachep); ac = ac_data(cachep);
spin_lock(&cachep->nodelists[numa_node_id()]->list_lock); spin_lock(&cachep->nodelists[node]->list_lock);
free_block(cachep, ac->entry, ac->avail); free_block(cachep, ac->entry, ac->avail, node);
spin_unlock(&cachep->nodelists[numa_node_id()]->list_lock); spin_unlock(&cachep->nodelists[node]->list_lock);
ac->avail = 0; ac->avail = 0;
} }
@ -2609,7 +2610,7 @@ done:
/* /*
* Caller needs to acquire correct kmem_list's list_lock * Caller needs to acquire correct kmem_list's list_lock
*/ */
static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects) static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, int node)
{ {
int i; int i;
struct kmem_list3 *l3; struct kmem_list3 *l3;
@ -2618,14 +2619,12 @@ static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects)
void *objp = objpp[i]; void *objp = objpp[i];
struct slab *slabp; struct slab *slabp;
unsigned int objnr; unsigned int objnr;
int nodeid = 0;
slabp = GET_PAGE_SLAB(virt_to_page(objp)); slabp = GET_PAGE_SLAB(virt_to_page(objp));
nodeid = slabp->nodeid; l3 = cachep->nodelists[node];
l3 = cachep->nodelists[nodeid];
list_del(&slabp->list); list_del(&slabp->list);
objnr = (objp - slabp->s_mem) / cachep->objsize; objnr = (objp - slabp->s_mem) / cachep->objsize;
check_spinlock_acquired_node(cachep, nodeid); check_spinlock_acquired_node(cachep, node);
check_slabp(cachep, slabp); check_slabp(cachep, slabp);
@ -2665,13 +2664,14 @@ static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac)
{ {
int batchcount; int batchcount;
struct kmem_list3 *l3; struct kmem_list3 *l3;
int node = numa_node_id();
batchcount = ac->batchcount; batchcount = ac->batchcount;
#if DEBUG #if DEBUG
BUG_ON(!batchcount || batchcount > ac->avail); BUG_ON(!batchcount || batchcount > ac->avail);
#endif #endif
check_irq_off(); check_irq_off();
l3 = cachep->nodelists[numa_node_id()]; l3 = cachep->nodelists[node];
spin_lock(&l3->list_lock); spin_lock(&l3->list_lock);
if (l3->shared) { if (l3->shared) {
struct array_cache *shared_array = l3->shared; struct array_cache *shared_array = l3->shared;
@ -2687,7 +2687,7 @@ static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac)
} }
} }
free_block(cachep, ac->entry, batchcount); free_block(cachep, ac->entry, batchcount, node);
free_done: free_done:
#if STATS #if STATS
{ {
@ -2752,7 +2752,7 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp)
} else { } else {
spin_lock(&(cachep->nodelists[nodeid])-> spin_lock(&(cachep->nodelists[nodeid])->
list_lock); list_lock);
free_block(cachep, &objp, 1); free_block(cachep, &objp, 1, nodeid);
spin_unlock(&(cachep->nodelists[nodeid])-> spin_unlock(&(cachep->nodelists[nodeid])->
list_lock); list_lock);
} }
@ -2845,7 +2845,7 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, unsigned int __nocast flags, i
unsigned long save_flags; unsigned long save_flags;
void *ptr; void *ptr;
if (nodeid == numa_node_id() || nodeid == -1) if (nodeid == -1)
return __cache_alloc(cachep, flags); return __cache_alloc(cachep, flags);
if (unlikely(!cachep->nodelists[nodeid])) { if (unlikely(!cachep->nodelists[nodeid])) {
@ -3080,7 +3080,7 @@ static int alloc_kmemlist(kmem_cache_t *cachep)
if ((nc = cachep->nodelists[node]->shared)) if ((nc = cachep->nodelists[node]->shared))
free_block(cachep, nc->entry, free_block(cachep, nc->entry,
nc->avail); nc->avail, node);
l3->shared = new; l3->shared = new;
if (!cachep->nodelists[node]->alien) { if (!cachep->nodelists[node]->alien) {
@ -3161,7 +3161,7 @@ static int do_tune_cpucache(kmem_cache_t *cachep, int limit, int batchcount,
if (!ccold) if (!ccold)
continue; continue;
spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
free_block(cachep, ccold->entry, ccold->avail); free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
kfree(ccold); kfree(ccold);
} }
@ -3241,7 +3241,7 @@ static void drain_array_locked(kmem_cache_t *cachep,
if (tofree > ac->avail) { if (tofree > ac->avail) {
tofree = (ac->avail+1)/2; tofree = (ac->avail+1)/2;
} }
free_block(cachep, ac->entry, tofree); free_block(cachep, ac->entry, tofree, node);
ac->avail -= tofree; ac->avail -= tofree;
memmove(ac->entry, &(ac->entry[tofree]), memmove(ac->entry, &(ac->entry[tofree]),
sizeof(void*)*ac->avail); sizeof(void*)*ac->avail);