forked from Minki/linux
netfilter: revised locking for x_tables
The x_tables are organized with a table structure and a per-cpu copies of the counters and rules. On older kernels there was a reader/writer lock per table which was a performance bottleneck. In 2.6.30-rc, this was converted to use RCU and the counters/rules which solved the performance problems for do_table but made replacing rules much slower because of the necessary RCU grace period. This version uses a per-cpu set of spinlocks and counters to allow to table processing to proceed without the cache thrashing of a global reader lock and keeps the same performance for table updates. Signed-off-by: Stephen Hemminger <shemminger@vyatta.com> Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
bf368e4e70
commit
942e4a2bd6
@ -354,9 +354,6 @@ struct xt_table
|
|||||||
/* What hooks you will enter on */
|
/* What hooks you will enter on */
|
||||||
unsigned int valid_hooks;
|
unsigned int valid_hooks;
|
||||||
|
|
||||||
/* Lock for the curtain */
|
|
||||||
struct mutex lock;
|
|
||||||
|
|
||||||
/* Man behind the curtain... */
|
/* Man behind the curtain... */
|
||||||
struct xt_table_info *private;
|
struct xt_table_info *private;
|
||||||
|
|
||||||
@ -434,8 +431,74 @@ extern void xt_proto_fini(struct net *net, u_int8_t af);
|
|||||||
|
|
||||||
extern struct xt_table_info *xt_alloc_table_info(unsigned int size);
|
extern struct xt_table_info *xt_alloc_table_info(unsigned int size);
|
||||||
extern void xt_free_table_info(struct xt_table_info *info);
|
extern void xt_free_table_info(struct xt_table_info *info);
|
||||||
extern void xt_table_entry_swap_rcu(struct xt_table_info *old,
|
|
||||||
struct xt_table_info *new);
|
/*
|
||||||
|
* Per-CPU spinlock associated with per-cpu table entries, and
|
||||||
|
* with a counter for the "reading" side that allows a recursive
|
||||||
|
* reader to avoid taking the lock and deadlocking.
|
||||||
|
*
|
||||||
|
* "reading" is used by ip/arp/ip6 tables rule processing which runs per-cpu.
|
||||||
|
* It needs to ensure that the rules are not being changed while the packet
|
||||||
|
* is being processed. In some cases, the read lock will be acquired
|
||||||
|
* twice on the same CPU; this is okay because of the count.
|
||||||
|
*
|
||||||
|
* "writing" is used when reading counters.
|
||||||
|
* During replace any readers that are using the old tables have to complete
|
||||||
|
* before freeing the old table. This is handled by the write locking
|
||||||
|
* necessary for reading the counters.
|
||||||
|
*/
|
||||||
|
struct xt_info_lock {
|
||||||
|
spinlock_t lock;
|
||||||
|
unsigned char readers;
|
||||||
|
};
|
||||||
|
DECLARE_PER_CPU(struct xt_info_lock, xt_info_locks);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note: we need to ensure that preemption is disabled before acquiring
|
||||||
|
* the per-cpu-variable, so we do it as a two step process rather than
|
||||||
|
* using "spin_lock_bh()".
|
||||||
|
*
|
||||||
|
* We _also_ need to disable bottom half processing before updating our
|
||||||
|
* nesting count, to make sure that the only kind of re-entrancy is this
|
||||||
|
* code being called by itself: since the count+lock is not an atomic
|
||||||
|
* operation, we can allow no races.
|
||||||
|
*
|
||||||
|
* _Only_ that special combination of being per-cpu and never getting
|
||||||
|
* re-entered asynchronously means that the count is safe.
|
||||||
|
*/
|
||||||
|
static inline void xt_info_rdlock_bh(void)
|
||||||
|
{
|
||||||
|
struct xt_info_lock *lock;
|
||||||
|
|
||||||
|
local_bh_disable();
|
||||||
|
lock = &__get_cpu_var(xt_info_locks);
|
||||||
|
if (!lock->readers++)
|
||||||
|
spin_lock(&lock->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void xt_info_rdunlock_bh(void)
|
||||||
|
{
|
||||||
|
struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks);
|
||||||
|
|
||||||
|
if (!--lock->readers)
|
||||||
|
spin_unlock(&lock->lock);
|
||||||
|
local_bh_enable();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The "writer" side needs to get exclusive access to the lock,
|
||||||
|
* regardless of readers. This must be called with bottom half
|
||||||
|
* processing (and thus also preemption) disabled.
|
||||||
|
*/
|
||||||
|
static inline void xt_info_wrlock(unsigned int cpu)
|
||||||
|
{
|
||||||
|
spin_lock(&per_cpu(xt_info_locks, cpu).lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void xt_info_wrunlock(unsigned int cpu)
|
||||||
|
{
|
||||||
|
spin_unlock(&per_cpu(xt_info_locks, cpu).lock);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This helper is performance critical and must be inlined
|
* This helper is performance critical and must be inlined
|
||||||
|
@ -253,9 +253,9 @@ unsigned int arpt_do_table(struct sk_buff *skb,
|
|||||||
indev = in ? in->name : nulldevname;
|
indev = in ? in->name : nulldevname;
|
||||||
outdev = out ? out->name : nulldevname;
|
outdev = out ? out->name : nulldevname;
|
||||||
|
|
||||||
rcu_read_lock_bh();
|
xt_info_rdlock_bh();
|
||||||
private = rcu_dereference(table->private);
|
private = table->private;
|
||||||
table_base = rcu_dereference(private->entries[smp_processor_id()]);
|
table_base = private->entries[smp_processor_id()];
|
||||||
|
|
||||||
e = get_entry(table_base, private->hook_entry[hook]);
|
e = get_entry(table_base, private->hook_entry[hook]);
|
||||||
back = get_entry(table_base, private->underflow[hook]);
|
back = get_entry(table_base, private->underflow[hook]);
|
||||||
@ -273,6 +273,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
|
|||||||
|
|
||||||
hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
|
hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
|
||||||
(2 * skb->dev->addr_len);
|
(2 * skb->dev->addr_len);
|
||||||
|
|
||||||
ADD_COUNTER(e->counters, hdr_len, 1);
|
ADD_COUNTER(e->counters, hdr_len, 1);
|
||||||
|
|
||||||
t = arpt_get_target(e);
|
t = arpt_get_target(e);
|
||||||
@ -328,8 +329,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
|
|||||||
e = (void *)e + e->next_offset;
|
e = (void *)e + e->next_offset;
|
||||||
}
|
}
|
||||||
} while (!hotdrop);
|
} while (!hotdrop);
|
||||||
|
xt_info_rdunlock_bh();
|
||||||
rcu_read_unlock_bh();
|
|
||||||
|
|
||||||
if (hotdrop)
|
if (hotdrop)
|
||||||
return NF_DROP;
|
return NF_DROP;
|
||||||
@ -711,9 +711,12 @@ static void get_counters(const struct xt_table_info *t,
|
|||||||
/* Instead of clearing (by a previous call to memset())
|
/* Instead of clearing (by a previous call to memset())
|
||||||
* the counters and using adds, we set the counters
|
* the counters and using adds, we set the counters
|
||||||
* with data used by 'current' CPU
|
* with data used by 'current' CPU
|
||||||
* We dont care about preemption here.
|
*
|
||||||
|
* Bottom half has to be disabled to prevent deadlock
|
||||||
|
* if new softirq were to run and call ipt_do_table
|
||||||
*/
|
*/
|
||||||
curcpu = raw_smp_processor_id();
|
local_bh_disable();
|
||||||
|
curcpu = smp_processor_id();
|
||||||
|
|
||||||
i = 0;
|
i = 0;
|
||||||
ARPT_ENTRY_ITERATE(t->entries[curcpu],
|
ARPT_ENTRY_ITERATE(t->entries[curcpu],
|
||||||
@ -726,73 +729,22 @@ static void get_counters(const struct xt_table_info *t,
|
|||||||
if (cpu == curcpu)
|
if (cpu == curcpu)
|
||||||
continue;
|
continue;
|
||||||
i = 0;
|
i = 0;
|
||||||
|
xt_info_wrlock(cpu);
|
||||||
ARPT_ENTRY_ITERATE(t->entries[cpu],
|
ARPT_ENTRY_ITERATE(t->entries[cpu],
|
||||||
t->size,
|
t->size,
|
||||||
add_entry_to_counter,
|
add_entry_to_counter,
|
||||||
counters,
|
counters,
|
||||||
&i);
|
&i);
|
||||||
|
xt_info_wrunlock(cpu);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* We're lazy, and add to the first CPU; overflow works its fey magic
|
|
||||||
* and everything is OK. */
|
|
||||||
static int
|
|
||||||
add_counter_to_entry(struct arpt_entry *e,
|
|
||||||
const struct xt_counters addme[],
|
|
||||||
unsigned int *i)
|
|
||||||
{
|
|
||||||
ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
|
|
||||||
|
|
||||||
(*i)++;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Take values from counters and add them back onto the current cpu */
|
|
||||||
static void put_counters(struct xt_table_info *t,
|
|
||||||
const struct xt_counters counters[])
|
|
||||||
{
|
|
||||||
unsigned int i, cpu;
|
|
||||||
|
|
||||||
local_bh_disable();
|
|
||||||
cpu = smp_processor_id();
|
|
||||||
i = 0;
|
|
||||||
ARPT_ENTRY_ITERATE(t->entries[cpu],
|
|
||||||
t->size,
|
|
||||||
add_counter_to_entry,
|
|
||||||
counters,
|
|
||||||
&i);
|
|
||||||
local_bh_enable();
|
local_bh_enable();
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int
|
|
||||||
zero_entry_counter(struct arpt_entry *e, void *arg)
|
|
||||||
{
|
|
||||||
e->counters.bcnt = 0;
|
|
||||||
e->counters.pcnt = 0;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
|
|
||||||
{
|
|
||||||
unsigned int cpu;
|
|
||||||
const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
|
|
||||||
|
|
||||||
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
|
|
||||||
for_each_possible_cpu(cpu) {
|
|
||||||
memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
|
|
||||||
ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
|
|
||||||
zero_entry_counter, NULL);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct xt_counters *alloc_counters(struct xt_table *table)
|
static struct xt_counters *alloc_counters(struct xt_table *table)
|
||||||
{
|
{
|
||||||
unsigned int countersize;
|
unsigned int countersize;
|
||||||
struct xt_counters *counters;
|
struct xt_counters *counters;
|
||||||
struct xt_table_info *private = table->private;
|
struct xt_table_info *private = table->private;
|
||||||
struct xt_table_info *info;
|
|
||||||
|
|
||||||
/* We need atomic snapshot of counters: rest doesn't change
|
/* We need atomic snapshot of counters: rest doesn't change
|
||||||
* (other than comefrom, which userspace doesn't care
|
* (other than comefrom, which userspace doesn't care
|
||||||
@ -802,30 +754,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table)
|
|||||||
counters = vmalloc_node(countersize, numa_node_id());
|
counters = vmalloc_node(countersize, numa_node_id());
|
||||||
|
|
||||||
if (counters == NULL)
|
if (counters == NULL)
|
||||||
goto nomem;
|
return ERR_PTR(-ENOMEM);
|
||||||
|
|
||||||
info = xt_alloc_table_info(private->size);
|
get_counters(private, counters);
|
||||||
if (!info)
|
|
||||||
goto free_counters;
|
|
||||||
|
|
||||||
clone_counters(info, private);
|
|
||||||
|
|
||||||
mutex_lock(&table->lock);
|
|
||||||
xt_table_entry_swap_rcu(private, info);
|
|
||||||
synchronize_net(); /* Wait until smoke has cleared */
|
|
||||||
|
|
||||||
get_counters(info, counters);
|
|
||||||
put_counters(private, counters);
|
|
||||||
mutex_unlock(&table->lock);
|
|
||||||
|
|
||||||
xt_free_table_info(info);
|
|
||||||
|
|
||||||
return counters;
|
return counters;
|
||||||
|
|
||||||
free_counters:
|
|
||||||
vfree(counters);
|
|
||||||
nomem:
|
|
||||||
return ERR_PTR(-ENOMEM);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int copy_entries_to_user(unsigned int total_size,
|
static int copy_entries_to_user(unsigned int total_size,
|
||||||
@ -1094,8 +1027,9 @@ static int __do_replace(struct net *net, const char *name,
|
|||||||
(newinfo->number <= oldinfo->initial_entries))
|
(newinfo->number <= oldinfo->initial_entries))
|
||||||
module_put(t->me);
|
module_put(t->me);
|
||||||
|
|
||||||
/* Get the old counters. */
|
/* Get the old counters, and synchronize with replace */
|
||||||
get_counters(oldinfo, counters);
|
get_counters(oldinfo, counters);
|
||||||
|
|
||||||
/* Decrease module usage counts and free resource */
|
/* Decrease module usage counts and free resource */
|
||||||
loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
|
loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
|
||||||
ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
|
ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
|
||||||
@ -1165,10 +1099,23 @@ static int do_replace(struct net *net, void __user *user, unsigned int len)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* We're lazy, and add to the first CPU; overflow works its fey magic
|
||||||
|
* and everything is OK. */
|
||||||
|
static int
|
||||||
|
add_counter_to_entry(struct arpt_entry *e,
|
||||||
|
const struct xt_counters addme[],
|
||||||
|
unsigned int *i)
|
||||||
|
{
|
||||||
|
ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
|
||||||
|
|
||||||
|
(*i)++;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int do_add_counters(struct net *net, void __user *user, unsigned int len,
|
static int do_add_counters(struct net *net, void __user *user, unsigned int len,
|
||||||
int compat)
|
int compat)
|
||||||
{
|
{
|
||||||
unsigned int i;
|
unsigned int i, curcpu;
|
||||||
struct xt_counters_info tmp;
|
struct xt_counters_info tmp;
|
||||||
struct xt_counters *paddc;
|
struct xt_counters *paddc;
|
||||||
unsigned int num_counters;
|
unsigned int num_counters;
|
||||||
@ -1224,26 +1171,26 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
|
|||||||
goto free;
|
goto free;
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_lock(&t->lock);
|
local_bh_disable();
|
||||||
private = t->private;
|
private = t->private;
|
||||||
if (private->number != num_counters) {
|
if (private->number != num_counters) {
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
goto unlock_up_free;
|
goto unlock_up_free;
|
||||||
}
|
}
|
||||||
|
|
||||||
preempt_disable();
|
|
||||||
i = 0;
|
i = 0;
|
||||||
/* Choose the copy that is on our node */
|
/* Choose the copy that is on our node */
|
||||||
loc_cpu_entry = private->entries[smp_processor_id()];
|
curcpu = smp_processor_id();
|
||||||
|
loc_cpu_entry = private->entries[curcpu];
|
||||||
|
xt_info_wrlock(curcpu);
|
||||||
ARPT_ENTRY_ITERATE(loc_cpu_entry,
|
ARPT_ENTRY_ITERATE(loc_cpu_entry,
|
||||||
private->size,
|
private->size,
|
||||||
add_counter_to_entry,
|
add_counter_to_entry,
|
||||||
paddc,
|
paddc,
|
||||||
&i);
|
&i);
|
||||||
preempt_enable();
|
xt_info_wrunlock(curcpu);
|
||||||
unlock_up_free:
|
unlock_up_free:
|
||||||
mutex_unlock(&t->lock);
|
local_bh_enable();
|
||||||
|
|
||||||
xt_table_unlock(t);
|
xt_table_unlock(t);
|
||||||
module_put(t->me);
|
module_put(t->me);
|
||||||
free:
|
free:
|
||||||
|
@ -338,10 +338,9 @@ ipt_do_table(struct sk_buff *skb,
|
|||||||
tgpar.hooknum = hook;
|
tgpar.hooknum = hook;
|
||||||
|
|
||||||
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
|
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
|
||||||
|
xt_info_rdlock_bh();
|
||||||
rcu_read_lock_bh();
|
private = table->private;
|
||||||
private = rcu_dereference(table->private);
|
table_base = private->entries[smp_processor_id()];
|
||||||
table_base = rcu_dereference(private->entries[smp_processor_id()]);
|
|
||||||
|
|
||||||
e = get_entry(table_base, private->hook_entry[hook]);
|
e = get_entry(table_base, private->hook_entry[hook]);
|
||||||
|
|
||||||
@ -436,8 +435,7 @@ ipt_do_table(struct sk_buff *skb,
|
|||||||
e = (void *)e + e->next_offset;
|
e = (void *)e + e->next_offset;
|
||||||
}
|
}
|
||||||
} while (!hotdrop);
|
} while (!hotdrop);
|
||||||
|
xt_info_rdunlock_bh();
|
||||||
rcu_read_unlock_bh();
|
|
||||||
|
|
||||||
#ifdef DEBUG_ALLOW_ALL
|
#ifdef DEBUG_ALLOW_ALL
|
||||||
return NF_ACCEPT;
|
return NF_ACCEPT;
|
||||||
@ -896,10 +894,13 @@ get_counters(const struct xt_table_info *t,
|
|||||||
|
|
||||||
/* Instead of clearing (by a previous call to memset())
|
/* Instead of clearing (by a previous call to memset())
|
||||||
* the counters and using adds, we set the counters
|
* the counters and using adds, we set the counters
|
||||||
* with data used by 'current' CPU
|
* with data used by 'current' CPU.
|
||||||
* We dont care about preemption here.
|
*
|
||||||
|
* Bottom half has to be disabled to prevent deadlock
|
||||||
|
* if new softirq were to run and call ipt_do_table
|
||||||
*/
|
*/
|
||||||
curcpu = raw_smp_processor_id();
|
local_bh_disable();
|
||||||
|
curcpu = smp_processor_id();
|
||||||
|
|
||||||
i = 0;
|
i = 0;
|
||||||
IPT_ENTRY_ITERATE(t->entries[curcpu],
|
IPT_ENTRY_ITERATE(t->entries[curcpu],
|
||||||
@ -912,74 +913,22 @@ get_counters(const struct xt_table_info *t,
|
|||||||
if (cpu == curcpu)
|
if (cpu == curcpu)
|
||||||
continue;
|
continue;
|
||||||
i = 0;
|
i = 0;
|
||||||
|
xt_info_wrlock(cpu);
|
||||||
IPT_ENTRY_ITERATE(t->entries[cpu],
|
IPT_ENTRY_ITERATE(t->entries[cpu],
|
||||||
t->size,
|
t->size,
|
||||||
add_entry_to_counter,
|
add_entry_to_counter,
|
||||||
counters,
|
counters,
|
||||||
&i);
|
&i);
|
||||||
|
xt_info_wrunlock(cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/* We're lazy, and add to the first CPU; overflow works its fey magic
|
|
||||||
* and everything is OK. */
|
|
||||||
static int
|
|
||||||
add_counter_to_entry(struct ipt_entry *e,
|
|
||||||
const struct xt_counters addme[],
|
|
||||||
unsigned int *i)
|
|
||||||
{
|
|
||||||
ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
|
|
||||||
|
|
||||||
(*i)++;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Take values from counters and add them back onto the current cpu */
|
|
||||||
static void put_counters(struct xt_table_info *t,
|
|
||||||
const struct xt_counters counters[])
|
|
||||||
{
|
|
||||||
unsigned int i, cpu;
|
|
||||||
|
|
||||||
local_bh_disable();
|
|
||||||
cpu = smp_processor_id();
|
|
||||||
i = 0;
|
|
||||||
IPT_ENTRY_ITERATE(t->entries[cpu],
|
|
||||||
t->size,
|
|
||||||
add_counter_to_entry,
|
|
||||||
counters,
|
|
||||||
&i);
|
|
||||||
local_bh_enable();
|
local_bh_enable();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline int
|
|
||||||
zero_entry_counter(struct ipt_entry *e, void *arg)
|
|
||||||
{
|
|
||||||
e->counters.bcnt = 0;
|
|
||||||
e->counters.pcnt = 0;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
|
|
||||||
{
|
|
||||||
unsigned int cpu;
|
|
||||||
const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
|
|
||||||
|
|
||||||
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
|
|
||||||
for_each_possible_cpu(cpu) {
|
|
||||||
memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
|
|
||||||
IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
|
|
||||||
zero_entry_counter, NULL);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct xt_counters * alloc_counters(struct xt_table *table)
|
static struct xt_counters * alloc_counters(struct xt_table *table)
|
||||||
{
|
{
|
||||||
unsigned int countersize;
|
unsigned int countersize;
|
||||||
struct xt_counters *counters;
|
struct xt_counters *counters;
|
||||||
struct xt_table_info *private = table->private;
|
struct xt_table_info *private = table->private;
|
||||||
struct xt_table_info *info;
|
|
||||||
|
|
||||||
/* We need atomic snapshot of counters: rest doesn't change
|
/* We need atomic snapshot of counters: rest doesn't change
|
||||||
(other than comefrom, which userspace doesn't care
|
(other than comefrom, which userspace doesn't care
|
||||||
@ -988,30 +937,11 @@ static struct xt_counters * alloc_counters(struct xt_table *table)
|
|||||||
counters = vmalloc_node(countersize, numa_node_id());
|
counters = vmalloc_node(countersize, numa_node_id());
|
||||||
|
|
||||||
if (counters == NULL)
|
if (counters == NULL)
|
||||||
goto nomem;
|
return ERR_PTR(-ENOMEM);
|
||||||
|
|
||||||
info = xt_alloc_table_info(private->size);
|
get_counters(private, counters);
|
||||||
if (!info)
|
|
||||||
goto free_counters;
|
|
||||||
|
|
||||||
clone_counters(info, private);
|
|
||||||
|
|
||||||
mutex_lock(&table->lock);
|
|
||||||
xt_table_entry_swap_rcu(private, info);
|
|
||||||
synchronize_net(); /* Wait until smoke has cleared */
|
|
||||||
|
|
||||||
get_counters(info, counters);
|
|
||||||
put_counters(private, counters);
|
|
||||||
mutex_unlock(&table->lock);
|
|
||||||
|
|
||||||
xt_free_table_info(info);
|
|
||||||
|
|
||||||
return counters;
|
return counters;
|
||||||
|
|
||||||
free_counters:
|
|
||||||
vfree(counters);
|
|
||||||
nomem:
|
|
||||||
return ERR_PTR(-ENOMEM);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
@ -1306,8 +1236,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
|
|||||||
(newinfo->number <= oldinfo->initial_entries))
|
(newinfo->number <= oldinfo->initial_entries))
|
||||||
module_put(t->me);
|
module_put(t->me);
|
||||||
|
|
||||||
/* Get the old counters. */
|
/* Get the old counters, and synchronize with replace */
|
||||||
get_counters(oldinfo, counters);
|
get_counters(oldinfo, counters);
|
||||||
|
|
||||||
/* Decrease module usage counts and free resource */
|
/* Decrease module usage counts and free resource */
|
||||||
loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
|
loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
|
||||||
IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
|
IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
|
||||||
@ -1377,11 +1308,23 @@ do_replace(struct net *net, void __user *user, unsigned int len)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* We're lazy, and add to the first CPU; overflow works its fey magic
|
||||||
|
* and everything is OK. */
|
||||||
|
static int
|
||||||
|
add_counter_to_entry(struct ipt_entry *e,
|
||||||
|
const struct xt_counters addme[],
|
||||||
|
unsigned int *i)
|
||||||
|
{
|
||||||
|
ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
|
||||||
|
|
||||||
|
(*i)++;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
|
do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
|
||||||
{
|
{
|
||||||
unsigned int i;
|
unsigned int i, curcpu;
|
||||||
struct xt_counters_info tmp;
|
struct xt_counters_info tmp;
|
||||||
struct xt_counters *paddc;
|
struct xt_counters *paddc;
|
||||||
unsigned int num_counters;
|
unsigned int num_counters;
|
||||||
@ -1437,25 +1380,26 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat
|
|||||||
goto free;
|
goto free;
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_lock(&t->lock);
|
local_bh_disable();
|
||||||
private = t->private;
|
private = t->private;
|
||||||
if (private->number != num_counters) {
|
if (private->number != num_counters) {
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
goto unlock_up_free;
|
goto unlock_up_free;
|
||||||
}
|
}
|
||||||
|
|
||||||
preempt_disable();
|
|
||||||
i = 0;
|
i = 0;
|
||||||
/* Choose the copy that is on our node */
|
/* Choose the copy that is on our node */
|
||||||
loc_cpu_entry = private->entries[raw_smp_processor_id()];
|
curcpu = smp_processor_id();
|
||||||
|
loc_cpu_entry = private->entries[curcpu];
|
||||||
|
xt_info_wrlock(curcpu);
|
||||||
IPT_ENTRY_ITERATE(loc_cpu_entry,
|
IPT_ENTRY_ITERATE(loc_cpu_entry,
|
||||||
private->size,
|
private->size,
|
||||||
add_counter_to_entry,
|
add_counter_to_entry,
|
||||||
paddc,
|
paddc,
|
||||||
&i);
|
&i);
|
||||||
preempt_enable();
|
xt_info_wrunlock(curcpu);
|
||||||
unlock_up_free:
|
unlock_up_free:
|
||||||
mutex_unlock(&t->lock);
|
local_bh_enable();
|
||||||
xt_table_unlock(t);
|
xt_table_unlock(t);
|
||||||
module_put(t->me);
|
module_put(t->me);
|
||||||
free:
|
free:
|
||||||
|
@ -365,9 +365,9 @@ ip6t_do_table(struct sk_buff *skb,
|
|||||||
|
|
||||||
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
|
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
|
||||||
|
|
||||||
rcu_read_lock_bh();
|
xt_info_rdlock_bh();
|
||||||
private = rcu_dereference(table->private);
|
private = table->private;
|
||||||
table_base = rcu_dereference(private->entries[smp_processor_id()]);
|
table_base = private->entries[smp_processor_id()];
|
||||||
|
|
||||||
e = get_entry(table_base, private->hook_entry[hook]);
|
e = get_entry(table_base, private->hook_entry[hook]);
|
||||||
|
|
||||||
@ -466,7 +466,7 @@ ip6t_do_table(struct sk_buff *skb,
|
|||||||
#ifdef CONFIG_NETFILTER_DEBUG
|
#ifdef CONFIG_NETFILTER_DEBUG
|
||||||
((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON;
|
((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON;
|
||||||
#endif
|
#endif
|
||||||
rcu_read_unlock_bh();
|
xt_info_rdunlock_bh();
|
||||||
|
|
||||||
#ifdef DEBUG_ALLOW_ALL
|
#ifdef DEBUG_ALLOW_ALL
|
||||||
return NF_ACCEPT;
|
return NF_ACCEPT;
|
||||||
@ -926,9 +926,12 @@ get_counters(const struct xt_table_info *t,
|
|||||||
/* Instead of clearing (by a previous call to memset())
|
/* Instead of clearing (by a previous call to memset())
|
||||||
* the counters and using adds, we set the counters
|
* the counters and using adds, we set the counters
|
||||||
* with data used by 'current' CPU
|
* with data used by 'current' CPU
|
||||||
* We dont care about preemption here.
|
*
|
||||||
|
* Bottom half has to be disabled to prevent deadlock
|
||||||
|
* if new softirq were to run and call ipt_do_table
|
||||||
*/
|
*/
|
||||||
curcpu = raw_smp_processor_id();
|
local_bh_disable();
|
||||||
|
curcpu = smp_processor_id();
|
||||||
|
|
||||||
i = 0;
|
i = 0;
|
||||||
IP6T_ENTRY_ITERATE(t->entries[curcpu],
|
IP6T_ENTRY_ITERATE(t->entries[curcpu],
|
||||||
@ -941,72 +944,22 @@ get_counters(const struct xt_table_info *t,
|
|||||||
if (cpu == curcpu)
|
if (cpu == curcpu)
|
||||||
continue;
|
continue;
|
||||||
i = 0;
|
i = 0;
|
||||||
|
xt_info_wrlock(cpu);
|
||||||
IP6T_ENTRY_ITERATE(t->entries[cpu],
|
IP6T_ENTRY_ITERATE(t->entries[cpu],
|
||||||
t->size,
|
t->size,
|
||||||
add_entry_to_counter,
|
add_entry_to_counter,
|
||||||
counters,
|
counters,
|
||||||
&i);
|
&i);
|
||||||
|
xt_info_wrunlock(cpu);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/* We're lazy, and add to the first CPU; overflow works its fey magic
|
|
||||||
* and everything is OK. */
|
|
||||||
static int
|
|
||||||
add_counter_to_entry(struct ip6t_entry *e,
|
|
||||||
const struct xt_counters addme[],
|
|
||||||
unsigned int *i)
|
|
||||||
{
|
|
||||||
ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
|
|
||||||
|
|
||||||
(*i)++;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Take values from counters and add them back onto the current cpu */
|
|
||||||
static void put_counters(struct xt_table_info *t,
|
|
||||||
const struct xt_counters counters[])
|
|
||||||
{
|
|
||||||
unsigned int i, cpu;
|
|
||||||
|
|
||||||
local_bh_disable();
|
|
||||||
cpu = smp_processor_id();
|
|
||||||
i = 0;
|
|
||||||
IP6T_ENTRY_ITERATE(t->entries[cpu],
|
|
||||||
t->size,
|
|
||||||
add_counter_to_entry,
|
|
||||||
counters,
|
|
||||||
&i);
|
|
||||||
local_bh_enable();
|
local_bh_enable();
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int
|
|
||||||
zero_entry_counter(struct ip6t_entry *e, void *arg)
|
|
||||||
{
|
|
||||||
e->counters.bcnt = 0;
|
|
||||||
e->counters.pcnt = 0;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
|
|
||||||
{
|
|
||||||
unsigned int cpu;
|
|
||||||
const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
|
|
||||||
|
|
||||||
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
|
|
||||||
for_each_possible_cpu(cpu) {
|
|
||||||
memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
|
|
||||||
IP6T_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
|
|
||||||
zero_entry_counter, NULL);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct xt_counters *alloc_counters(struct xt_table *table)
|
static struct xt_counters *alloc_counters(struct xt_table *table)
|
||||||
{
|
{
|
||||||
unsigned int countersize;
|
unsigned int countersize;
|
||||||
struct xt_counters *counters;
|
struct xt_counters *counters;
|
||||||
struct xt_table_info *private = table->private;
|
struct xt_table_info *private = table->private;
|
||||||
struct xt_table_info *info;
|
|
||||||
|
|
||||||
/* We need atomic snapshot of counters: rest doesn't change
|
/* We need atomic snapshot of counters: rest doesn't change
|
||||||
(other than comefrom, which userspace doesn't care
|
(other than comefrom, which userspace doesn't care
|
||||||
@ -1015,30 +968,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table)
|
|||||||
counters = vmalloc_node(countersize, numa_node_id());
|
counters = vmalloc_node(countersize, numa_node_id());
|
||||||
|
|
||||||
if (counters == NULL)
|
if (counters == NULL)
|
||||||
goto nomem;
|
return ERR_PTR(-ENOMEM);
|
||||||
|
|
||||||
info = xt_alloc_table_info(private->size);
|
get_counters(private, counters);
|
||||||
if (!info)
|
|
||||||
goto free_counters;
|
|
||||||
|
|
||||||
clone_counters(info, private);
|
|
||||||
|
|
||||||
mutex_lock(&table->lock);
|
|
||||||
xt_table_entry_swap_rcu(private, info);
|
|
||||||
synchronize_net(); /* Wait until smoke has cleared */
|
|
||||||
|
|
||||||
get_counters(info, counters);
|
|
||||||
put_counters(private, counters);
|
|
||||||
mutex_unlock(&table->lock);
|
|
||||||
|
|
||||||
xt_free_table_info(info);
|
|
||||||
|
|
||||||
return counters;
|
return counters;
|
||||||
|
|
||||||
free_counters:
|
|
||||||
vfree(counters);
|
|
||||||
nomem:
|
|
||||||
return ERR_PTR(-ENOMEM);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
@ -1334,8 +1268,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
|
|||||||
(newinfo->number <= oldinfo->initial_entries))
|
(newinfo->number <= oldinfo->initial_entries))
|
||||||
module_put(t->me);
|
module_put(t->me);
|
||||||
|
|
||||||
/* Get the old counters. */
|
/* Get the old counters, and synchronize with replace */
|
||||||
get_counters(oldinfo, counters);
|
get_counters(oldinfo, counters);
|
||||||
|
|
||||||
/* Decrease module usage counts and free resource */
|
/* Decrease module usage counts and free resource */
|
||||||
loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
|
loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
|
||||||
IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
|
IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
|
||||||
@ -1405,11 +1340,24 @@ do_replace(struct net *net, void __user *user, unsigned int len)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* We're lazy, and add to the first CPU; overflow works its fey magic
|
||||||
|
* and everything is OK. */
|
||||||
|
static int
|
||||||
|
add_counter_to_entry(struct ip6t_entry *e,
|
||||||
|
const struct xt_counters addme[],
|
||||||
|
unsigned int *i)
|
||||||
|
{
|
||||||
|
ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
|
||||||
|
|
||||||
|
(*i)++;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
do_add_counters(struct net *net, void __user *user, unsigned int len,
|
do_add_counters(struct net *net, void __user *user, unsigned int len,
|
||||||
int compat)
|
int compat)
|
||||||
{
|
{
|
||||||
unsigned int i;
|
unsigned int i, curcpu;
|
||||||
struct xt_counters_info tmp;
|
struct xt_counters_info tmp;
|
||||||
struct xt_counters *paddc;
|
struct xt_counters *paddc;
|
||||||
unsigned int num_counters;
|
unsigned int num_counters;
|
||||||
@ -1465,25 +1413,28 @@ do_add_counters(struct net *net, void __user *user, unsigned int len,
|
|||||||
goto free;
|
goto free;
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_lock(&t->lock);
|
|
||||||
|
local_bh_disable();
|
||||||
private = t->private;
|
private = t->private;
|
||||||
if (private->number != num_counters) {
|
if (private->number != num_counters) {
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
goto unlock_up_free;
|
goto unlock_up_free;
|
||||||
}
|
}
|
||||||
|
|
||||||
preempt_disable();
|
|
||||||
i = 0;
|
i = 0;
|
||||||
/* Choose the copy that is on our node */
|
/* Choose the copy that is on our node */
|
||||||
loc_cpu_entry = private->entries[raw_smp_processor_id()];
|
curcpu = smp_processor_id();
|
||||||
|
xt_info_wrlock(curcpu);
|
||||||
|
loc_cpu_entry = private->entries[curcpu];
|
||||||
IP6T_ENTRY_ITERATE(loc_cpu_entry,
|
IP6T_ENTRY_ITERATE(loc_cpu_entry,
|
||||||
private->size,
|
private->size,
|
||||||
add_counter_to_entry,
|
add_counter_to_entry,
|
||||||
paddc,
|
paddc,
|
||||||
&i);
|
&i);
|
||||||
preempt_enable();
|
xt_info_wrunlock(curcpu);
|
||||||
|
|
||||||
unlock_up_free:
|
unlock_up_free:
|
||||||
mutex_unlock(&t->lock);
|
local_bh_enable();
|
||||||
xt_table_unlock(t);
|
xt_table_unlock(t);
|
||||||
module_put(t->me);
|
module_put(t->me);
|
||||||
free:
|
free:
|
||||||
|
@ -625,20 +625,6 @@ void xt_free_table_info(struct xt_table_info *info)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL(xt_free_table_info);
|
EXPORT_SYMBOL(xt_free_table_info);
|
||||||
|
|
||||||
void xt_table_entry_swap_rcu(struct xt_table_info *oldinfo,
|
|
||||||
struct xt_table_info *newinfo)
|
|
||||||
{
|
|
||||||
unsigned int cpu;
|
|
||||||
|
|
||||||
for_each_possible_cpu(cpu) {
|
|
||||||
void *p = oldinfo->entries[cpu];
|
|
||||||
rcu_assign_pointer(oldinfo->entries[cpu], newinfo->entries[cpu]);
|
|
||||||
newinfo->entries[cpu] = p;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(xt_table_entry_swap_rcu);
|
|
||||||
|
|
||||||
/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
|
/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
|
||||||
struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
|
struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
|
||||||
const char *name)
|
const char *name)
|
||||||
@ -676,32 +662,43 @@ void xt_compat_unlock(u_int8_t af)
|
|||||||
EXPORT_SYMBOL_GPL(xt_compat_unlock);
|
EXPORT_SYMBOL_GPL(xt_compat_unlock);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks);
|
||||||
|
EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks);
|
||||||
|
|
||||||
|
|
||||||
struct xt_table_info *
|
struct xt_table_info *
|
||||||
xt_replace_table(struct xt_table *table,
|
xt_replace_table(struct xt_table *table,
|
||||||
unsigned int num_counters,
|
unsigned int num_counters,
|
||||||
struct xt_table_info *newinfo,
|
struct xt_table_info *newinfo,
|
||||||
int *error)
|
int *error)
|
||||||
{
|
{
|
||||||
struct xt_table_info *oldinfo, *private;
|
struct xt_table_info *private;
|
||||||
|
|
||||||
/* Do the substitution. */
|
/* Do the substitution. */
|
||||||
mutex_lock(&table->lock);
|
local_bh_disable();
|
||||||
private = table->private;
|
private = table->private;
|
||||||
|
|
||||||
/* Check inside lock: is the old number correct? */
|
/* Check inside lock: is the old number correct? */
|
||||||
if (num_counters != private->number) {
|
if (num_counters != private->number) {
|
||||||
duprintf("num_counters != table->private->number (%u/%u)\n",
|
duprintf("num_counters != table->private->number (%u/%u)\n",
|
||||||
num_counters, private->number);
|
num_counters, private->number);
|
||||||
mutex_unlock(&table->lock);
|
local_bh_enable();
|
||||||
*error = -EAGAIN;
|
*error = -EAGAIN;
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
oldinfo = private;
|
|
||||||
rcu_assign_pointer(table->private, newinfo);
|
|
||||||
newinfo->initial_entries = oldinfo->initial_entries;
|
|
||||||
mutex_unlock(&table->lock);
|
|
||||||
|
|
||||||
synchronize_net();
|
table->private = newinfo;
|
||||||
return oldinfo;
|
newinfo->initial_entries = private->initial_entries;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Even though table entries have now been swapped, other CPU's
|
||||||
|
* may still be using the old entries. This is okay, because
|
||||||
|
* resynchronization happens because of the locking done
|
||||||
|
* during the get_counters() routine.
|
||||||
|
*/
|
||||||
|
local_bh_enable();
|
||||||
|
|
||||||
|
return private;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(xt_replace_table);
|
EXPORT_SYMBOL_GPL(xt_replace_table);
|
||||||
|
|
||||||
@ -734,7 +731,6 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table *table,
|
|||||||
|
|
||||||
/* Simplifies replace_table code. */
|
/* Simplifies replace_table code. */
|
||||||
table->private = bootstrap;
|
table->private = bootstrap;
|
||||||
mutex_init(&table->lock);
|
|
||||||
|
|
||||||
if (!xt_replace_table(table, 0, newinfo, &ret))
|
if (!xt_replace_table(table, 0, newinfo, &ret))
|
||||||
goto unlock;
|
goto unlock;
|
||||||
@ -1147,7 +1143,14 @@ static struct pernet_operations xt_net_ops = {
|
|||||||
|
|
||||||
static int __init xt_init(void)
|
static int __init xt_init(void)
|
||||||
{
|
{
|
||||||
int i, rv;
|
unsigned int i;
|
||||||
|
int rv;
|
||||||
|
|
||||||
|
for_each_possible_cpu(i) {
|
||||||
|
struct xt_info_lock *lock = &per_cpu(xt_info_locks, i);
|
||||||
|
spin_lock_init(&lock->lock);
|
||||||
|
lock->readers = 0;
|
||||||
|
}
|
||||||
|
|
||||||
xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL);
|
xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL);
|
||||||
if (!xt)
|
if (!xt)
|
||||||
|
Loading…
Reference in New Issue
Block a user