linux/lib/percpu_counter.c
Andrew Morton 71c5576fbd revert "percpu counter: clean up percpu_counter_sum_and_set()"
Revert

    commit 1f7c14c62c
    Author: Mingming Cao <cmm@us.ibm.com>
    Date:   Thu Oct 9 12:50:59 2008 -0400

        percpu counter: clean up percpu_counter_sum_and_set()

Before this patch we had the following:

percpu_counter_sum(): return the percpu_counter's value

percpu_counter_sum_and_set(): return the percpu_counter's value, copying
that value into the central value and zeroing the per-cpu counters before
returning.

After this patch, percpu_counter_sum_and_set() has gone, and
percpu_counter_sum() gets the old percpu_counter_sum_and_set()
functionality.

Problem is, as Eric points out, the old percpu_counter_sum_and_set()
functionality was racy and wrong.  It zeroes out counters on "other" cpus,
without holding any locks which will prevent races agaist updates from
those other CPUS.

This patch reverts 1f7c14c62c.  This means
that percpu_counter_sum_and_set() still has the race, but
percpu_counter_sum() does not.

Note that this is not a simple revert - ext4 has since started using
percpu_counter_sum() for its dirty_blocks counter as well.

Note that this revert patch changes percpu_counter_sum() semantics.

Before the patch, a call to percpu_counter_sum() will bring the counter's
central counter mostly up-to-date, so a following percpu_counter_read()
will return a close value.

After this patch, a call to percpu_counter_sum() will leave the counter's
central accumulator unaltered, so a subsequent call to
percpu_counter_read() can now return a significantly inaccurate result.

If there is any code in the tree which was introduced after
e8ced39d5e was merged, and which depends
upon the new percpu_counter_sum() semantics, that code will break.

Reported-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mingming Cao <cmm@us.ibm.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-12-10 08:01:52 -08:00

152 lines
3.2 KiB
C

/*
* Fast batching percpu counters.
*/
#include <linux/percpu_counter.h>
#include <linux/notifier.h>
#include <linux/mutex.h>
#include <linux/init.h>
#include <linux/cpu.h>
#include <linux/module.h>
#ifdef CONFIG_HOTPLUG_CPU
static LIST_HEAD(percpu_counters);
static DEFINE_MUTEX(percpu_counters_lock);
#endif
void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
{
int cpu;
spin_lock(&fbc->lock);
for_each_possible_cpu(cpu) {
s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
*pcount = 0;
}
fbc->count = amount;
spin_unlock(&fbc->lock);
}
EXPORT_SYMBOL(percpu_counter_set);
void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch)
{
s64 count;
s32 *pcount;
int cpu = get_cpu();
pcount = per_cpu_ptr(fbc->counters, cpu);
count = *pcount + amount;
if (count >= batch || count <= -batch) {
spin_lock(&fbc->lock);
fbc->count += count;
*pcount = 0;
spin_unlock(&fbc->lock);
} else {
*pcount = count;
}
put_cpu();
}
EXPORT_SYMBOL(__percpu_counter_add);
/*
* Add up all the per-cpu counts, return the result. This is a more accurate
* but much slower version of percpu_counter_read_positive()
*/
s64 __percpu_counter_sum(struct percpu_counter *fbc, int set)
{
s64 ret;
int cpu;
spin_lock(&fbc->lock);
ret = fbc->count;
for_each_online_cpu(cpu) {
s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
ret += *pcount;
if (set)
*pcount = 0;
}
if (set)
fbc->count = ret;
spin_unlock(&fbc->lock);
return ret;
}
EXPORT_SYMBOL(__percpu_counter_sum);
static struct lock_class_key percpu_counter_irqsafe;
int percpu_counter_init(struct percpu_counter *fbc, s64 amount)
{
spin_lock_init(&fbc->lock);
fbc->count = amount;
fbc->counters = alloc_percpu(s32);
if (!fbc->counters)
return -ENOMEM;
#ifdef CONFIG_HOTPLUG_CPU
mutex_lock(&percpu_counters_lock);
list_add(&fbc->list, &percpu_counters);
mutex_unlock(&percpu_counters_lock);
#endif
return 0;
}
EXPORT_SYMBOL(percpu_counter_init);
int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount)
{
int err;
err = percpu_counter_init(fbc, amount);
if (!err)
lockdep_set_class(&fbc->lock, &percpu_counter_irqsafe);
return err;
}
void percpu_counter_destroy(struct percpu_counter *fbc)
{
if (!fbc->counters)
return;
#ifdef CONFIG_HOTPLUG_CPU
mutex_lock(&percpu_counters_lock);
list_del(&fbc->list);
mutex_unlock(&percpu_counters_lock);
#endif
free_percpu(fbc->counters);
fbc->counters = NULL;
}
EXPORT_SYMBOL(percpu_counter_destroy);
#ifdef CONFIG_HOTPLUG_CPU
static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
unsigned long action, void *hcpu)
{
unsigned int cpu;
struct percpu_counter *fbc;
if (action != CPU_DEAD)
return NOTIFY_OK;
cpu = (unsigned long)hcpu;
mutex_lock(&percpu_counters_lock);
list_for_each_entry(fbc, &percpu_counters, list) {
s32 *pcount;
unsigned long flags;
spin_lock_irqsave(&fbc->lock, flags);
pcount = per_cpu_ptr(fbc->counters, cpu);
fbc->count += *pcount;
*pcount = 0;
spin_unlock_irqrestore(&fbc->lock, flags);
}
mutex_unlock(&percpu_counters_lock);
return NOTIFY_OK;
}
static int __init percpu_counter_startup(void)
{
hotcpu_notifier(percpu_counter_hotcpu_callback, 0);
return 0;
}
module_init(percpu_counter_startup);
#endif