Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf

Pablo Neira Ayuso says:

====================
Netfilter/IPVS fixes for net

1) ipset limits the max allocatable memory via kvmalloc() to MAX_INT,
   from Jozsef Kadlecsik.

2) Check ip_vs_conn_tab_bits value to be in the range specified
   in Kconfig, from Andrea Claudi.

3) Initialize fragment offset in ip6tables, from Jeremy Sowden.

4) Make conntrack hash chain length random, from Florian Westphal.

5) Add zone ID to conntrack and NAT hashtuple again, also from Florian.

6) Add selftests for bidirectional zone support and colliding tuples,
   from Florian Westphal.

7) Unlink table before synchronize_rcu when cleaning tables with
   owner, from Florian.

8) ipset limits the max allocatable memory via kvmalloc() to MAX_INT.

9) Release conntrack entries via workqueue in masquerade, from Florian.

10) Fix bogus net_init in iptables raw table definition, also from Florian.

11) Work around missing softdep in log extensions, from Florian Westphal.

12) Serialize hash resizes and cleanups with mutex, from Eric Dumazet.

* git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf:
  netfilter: conntrack: serialize hash resizes and cleanups
  netfilter: log: work around missing softdep backend module
  netfilter: iptable_raw: drop bogus net_init annotation
  netfilter: nf_nat_masquerade: defer conntrack walk to work queue
  netfilter: nf_nat_masquerade: make async masq_inet6_event handling generic
  netfilter: nf_tables: Fix oversized kvmalloc() calls
  netfilter: nf_tables: unlink table before deleting it
  selftests: netfilter: add zone stress test with colliding tuples
  selftests: netfilter: add selftest for directional zone support
  netfilter: nat: include zone id in nat table hash again
  netfilter: conntrack: include zone id in tuple hash again
  netfilter: conntrack: make max chain length random
  netfilter: ip6_tables: zero-initialize fragment offset
  ipvs: check that ip_vs_conn_tab_bits is between 8 and 20
  netfilter: ipset: Fix oversized kvmalloc() calls
====================

Link: https://lore.kernel.org/r/20210924221113.348767-1-pablo@netfilter.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2021-09-24 17:27:19 -07:00
commit 7fe7f3182a
13 changed files with 735 additions and 147 deletions

View File

@ -42,7 +42,7 @@ iptable_raw_hook(void *priv, struct sk_buff *skb,
static struct nf_hook_ops *rawtable_ops __read_mostly;
static int __net_init iptable_raw_table_init(struct net *net)
static int iptable_raw_table_init(struct net *net)
{
struct ipt_replace *repl;
const struct xt_table *table = &packet_raw;

View File

@ -273,6 +273,7 @@ ip6t_do_table(struct sk_buff *skb,
* things we don't know, ie. tcp syn flag or ports). If the
* rule is also a fragment-specific rule, non-fragments won't
* match it. */
acpar.fragoff = 0;
acpar.hotdrop = false;
acpar.state = state;

View File

@ -130,11 +130,11 @@ htable_size(u8 hbits)
{
size_t hsize;
/* We must fit both into u32 in jhash and size_t */
/* We must fit both into u32 in jhash and INT_MAX in kvmalloc_node() */
if (hbits > 31)
return 0;
hsize = jhash_size(hbits);
if ((((size_t)-1) - sizeof(struct htable)) / sizeof(struct hbucket *)
if ((INT_MAX - sizeof(struct htable)) / sizeof(struct hbucket *)
< hsize)
return 0;

View File

@ -1468,6 +1468,10 @@ int __init ip_vs_conn_init(void)
int idx;
/* Compute size and mask */
if (ip_vs_conn_tab_bits < 8 || ip_vs_conn_tab_bits > 20) {
pr_info("conn_tab_bits not in [8, 20]. Using default value\n");
ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
}
ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1;

View File

@ -74,10 +74,14 @@ static __read_mostly struct kmem_cache *nf_conntrack_cachep;
static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;
/* serialize hash resizes and nf_ct_iterate_cleanup */
static DEFINE_MUTEX(nf_conntrack_mutex);
#define GC_SCAN_INTERVAL (120u * HZ)
#define GC_SCAN_MAX_DURATION msecs_to_jiffies(10)
#define MAX_CHAINLEN 64u
#define MIN_CHAINLEN 8u
#define MAX_CHAINLEN (32u - MIN_CHAINLEN)
static struct conntrack_gc_work conntrack_gc_work;
@ -188,11 +192,13 @@ seqcount_spinlock_t nf_conntrack_generation __read_mostly;
static siphash_key_t nf_conntrack_hash_rnd __read_mostly;
static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
unsigned int zoneid,
const struct net *net)
{
struct {
struct nf_conntrack_man src;
union nf_inet_addr dst_addr;
unsigned int zone;
u32 net_mix;
u16 dport;
u16 proto;
@ -205,6 +211,7 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
/* The direction must be ignored, so handle usable members manually. */
combined.src = tuple->src;
combined.dst_addr = tuple->dst.u3;
combined.zone = zoneid;
combined.net_mix = net_hash_mix(net);
combined.dport = (__force __u16)tuple->dst.u.all;
combined.proto = tuple->dst.protonum;
@ -219,15 +226,17 @@ static u32 scale_hash(u32 hash)
static u32 __hash_conntrack(const struct net *net,
const struct nf_conntrack_tuple *tuple,
unsigned int zoneid,
unsigned int size)
{
return reciprocal_scale(hash_conntrack_raw(tuple, net), size);
return reciprocal_scale(hash_conntrack_raw(tuple, zoneid, net), size);
}
static u32 hash_conntrack(const struct net *net,
const struct nf_conntrack_tuple *tuple)
const struct nf_conntrack_tuple *tuple,
unsigned int zoneid)
{
return scale_hash(hash_conntrack_raw(tuple, net));
return scale_hash(hash_conntrack_raw(tuple, zoneid, net));
}
static bool nf_ct_get_tuple_ports(const struct sk_buff *skb,
@ -650,9 +659,11 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
do {
sequence = read_seqcount_begin(&nf_conntrack_generation);
hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL));
reply_hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
clean_from_lists(ct);
@ -819,8 +830,20 @@ struct nf_conntrack_tuple_hash *
nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple)
{
return __nf_conntrack_find_get(net, zone, tuple,
hash_conntrack_raw(tuple, net));
unsigned int rid, zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL);
struct nf_conntrack_tuple_hash *thash;
thash = __nf_conntrack_find_get(net, zone, tuple,
hash_conntrack_raw(tuple, zone_id, net));
if (thash)
return thash;
rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY);
if (rid != zone_id)
return __nf_conntrack_find_get(net, zone, tuple,
hash_conntrack_raw(tuple, rid, net));
return thash;
}
EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
@ -842,6 +865,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
unsigned int max_chainlen;
unsigned int chainlen = 0;
unsigned int sequence;
int err = -EEXIST;
@ -852,18 +876,22 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
do {
sequence = read_seqcount_begin(&nf_conntrack_generation);
hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL));
reply_hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
max_chainlen = MIN_CHAINLEN + prandom_u32_max(MAX_CHAINLEN);
/* See if there's one in the list already, including reverse */
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) {
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
zone, net))
goto out;
if (chainlen++ > MAX_CHAINLEN)
if (chainlen++ > max_chainlen)
goto chaintoolong;
}
@ -873,7 +901,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
zone, net))
goto out;
if (chainlen++ > MAX_CHAINLEN)
if (chainlen++ > max_chainlen)
goto chaintoolong;
}
@ -1103,8 +1131,8 @@ drop:
int
__nf_conntrack_confirm(struct sk_buff *skb)
{
unsigned int chainlen = 0, sequence, max_chainlen;
const struct nf_conntrack_zone *zone;
unsigned int chainlen = 0, sequence;
unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
@ -1133,8 +1161,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
hash = scale_hash(hash);
reply_hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
/* We're not in hash table, and we refuse to set up related
@ -1168,6 +1196,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
goto dying;
}
max_chainlen = MIN_CHAINLEN + prandom_u32_max(MAX_CHAINLEN);
/* See if there's one in the list already, including reverse:
NAT could have grabbed it without realizing, since we're
not in the hash. If there is, we lost race. */
@ -1175,7 +1204,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
zone, net))
goto out;
if (chainlen++ > MAX_CHAINLEN)
if (chainlen++ > max_chainlen)
goto chaintoolong;
}
@ -1184,7 +1213,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
zone, net))
goto out;
if (chainlen++ > MAX_CHAINLEN) {
if (chainlen++ > max_chainlen) {
chaintoolong:
nf_ct_add_to_dying_list(ct);
NF_CT_STAT_INC(net, chaintoolong);
@ -1246,7 +1275,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
rcu_read_lock();
begin:
nf_conntrack_get_ht(&ct_hash, &hsize);
hash = __hash_conntrack(net, tuple, hsize);
hash = __hash_conntrack(net, tuple, nf_ct_zone_id(zone, IP_CT_DIR_REPLY), hsize);
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
@ -1687,8 +1716,8 @@ resolve_normal_ct(struct nf_conn *tmpl,
struct nf_conntrack_tuple_hash *h;
enum ip_conntrack_info ctinfo;
struct nf_conntrack_zone tmp;
u32 hash, zone_id, rid;
struct nf_conn *ct;
u32 hash;
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
dataoff, state->pf, protonum, state->net,
@ -1699,8 +1728,20 @@ resolve_normal_ct(struct nf_conn *tmpl,
/* look for tuple match */
zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
hash = hash_conntrack_raw(&tuple, state->net);
zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL);
hash = hash_conntrack_raw(&tuple, zone_id, state->net);
h = __nf_conntrack_find_get(state->net, zone, &tuple, hash);
if (!h) {
rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY);
if (zone_id != rid) {
u32 tmp = hash_conntrack_raw(&tuple, rid, state->net);
h = __nf_conntrack_find_get(state->net, zone, &tuple, tmp);
}
}
if (!h) {
h = init_conntrack(state->net, tmpl, &tuple,
skb, dataoff, hash);
@ -2225,28 +2266,31 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
spinlock_t *lockp;
for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
struct hlist_nulls_head *hslot = &nf_conntrack_hash[*bucket];
if (hlist_nulls_empty(hslot))
continue;
lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
local_bh_disable();
nf_conntrack_lock(lockp);
if (*bucket < nf_conntrack_htable_size) {
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) {
if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY)
continue;
/* All nf_conn objects are added to hash table twice, one
* for original direction tuple, once for the reply tuple.
*
* Exception: In the IPS_NAT_CLASH case, only the reply
* tuple is added (the original tuple already existed for
* a different object).
*
* We only need to call the iterator once for each
* conntrack, so we just use the 'reply' direction
* tuple while iterating.
*/
ct = nf_ct_tuplehash_to_ctrack(h);
if (iter(ct, data))
goto found;
}
hlist_nulls_for_each_entry(h, n, hslot, hnnode) {
if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY)
continue;
/* All nf_conn objects are added to hash table twice, one
* for original direction tuple, once for the reply tuple.
*
* Exception: In the IPS_NAT_CLASH case, only the reply
* tuple is added (the original tuple already existed for
* a different object).
*
* We only need to call the iterator once for each
* conntrack, so we just use the 'reply' direction
* tuple while iterating.
*/
ct = nf_ct_tuplehash_to_ctrack(h);
if (iter(ct, data))
goto found;
}
spin_unlock(lockp);
local_bh_enable();
@ -2264,26 +2308,20 @@ found:
static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data),
void *data, u32 portid, int report)
{
unsigned int bucket = 0, sequence;
unsigned int bucket = 0;
struct nf_conn *ct;
might_sleep();
for (;;) {
sequence = read_seqcount_begin(&nf_conntrack_generation);
mutex_lock(&nf_conntrack_mutex);
while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
/* Time to push up daises... */
while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
/* Time to push up daises... */
nf_ct_delete(ct, portid, report);
nf_ct_put(ct);
cond_resched();
}
if (!read_seqcount_retry(&nf_conntrack_generation, sequence))
break;
bucket = 0;
nf_ct_delete(ct, portid, report);
nf_ct_put(ct);
cond_resched();
}
mutex_unlock(&nf_conntrack_mutex);
}
struct iter_data {
@ -2519,8 +2557,10 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
if (!hash)
return -ENOMEM;
mutex_lock(&nf_conntrack_mutex);
old_size = nf_conntrack_htable_size;
if (old_size == hashsize) {
mutex_unlock(&nf_conntrack_mutex);
kvfree(hash);
return 0;
}
@ -2537,12 +2577,16 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
for (i = 0; i < nf_conntrack_htable_size; i++) {
while (!hlist_nulls_empty(&nf_conntrack_hash[i])) {
unsigned int zone_id;
h = hlist_nulls_entry(nf_conntrack_hash[i].first,
struct nf_conntrack_tuple_hash, hnnode);
ct = nf_ct_tuplehash_to_ctrack(h);
hlist_nulls_del_rcu(&h->hnnode);
zone_id = nf_ct_zone_id(nf_ct_zone(ct), NF_CT_DIRECTION(h));
bucket = __hash_conntrack(nf_ct_net(ct),
&h->tuple, hashsize);
&h->tuple, zone_id, hashsize);
hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
}
}
@ -2556,6 +2600,8 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
nf_conntrack_all_unlock();
local_bh_enable();
mutex_unlock(&nf_conntrack_mutex);
synchronize_net();
kvfree(old_hash);
return 0;

View File

@ -150,13 +150,16 @@ static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl)
/* We keep an extra hash for each conntrack, for fast searching. */
static unsigned int
hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
hash_by_src(const struct net *net,
const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple)
{
unsigned int hash;
struct {
struct nf_conntrack_man src;
u32 net_mix;
u32 protonum;
u32 zone;
} __aligned(SIPHASH_ALIGNMENT) combined;
get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
@ -165,9 +168,13 @@ hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
/* Original src, to ensure we map it consistently if poss. */
combined.src = tuple->src;
combined.net_mix = net_hash_mix(n);
combined.net_mix = net_hash_mix(net);
combined.protonum = tuple->dst.protonum;
/* Zone ID can be used provided its valid for both directions */
if (zone->dir == NF_CT_DEFAULT_ZONE_DIR)
combined.zone = zone->id;
hash = siphash(&combined, sizeof(combined), &nf_nat_hash_rnd);
return reciprocal_scale(hash, nf_nat_htable_size);
@ -272,7 +279,7 @@ find_appropriate_src(struct net *net,
struct nf_conntrack_tuple *result,
const struct nf_nat_range2 *range)
{
unsigned int h = hash_by_src(net, tuple);
unsigned int h = hash_by_src(net, zone, tuple);
const struct nf_conn *ct;
hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) {
@ -619,7 +626,7 @@ nf_nat_setup_info(struct nf_conn *ct,
unsigned int srchash;
spinlock_t *lock;
srchash = hash_by_src(net,
srchash = hash_by_src(net, nf_ct_zone(ct),
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
lock = &nf_nat_locks[srchash % CONNTRACK_LOCKS];
spin_lock_bh(lock);
@ -788,7 +795,7 @@ static void __nf_nat_cleanup_conntrack(struct nf_conn *ct)
{
unsigned int h;
h = hash_by_src(nf_ct_net(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
h = hash_by_src(nf_ct_net(ct), nf_ct_zone(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
spin_lock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
hlist_del_rcu(&ct->nat_bysource);
spin_unlock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);

View File

@ -9,8 +9,19 @@
#include <net/netfilter/nf_nat_masquerade.h>
struct masq_dev_work {
struct work_struct work;
struct net *net;
union nf_inet_addr addr;
int ifindex;
int (*iter)(struct nf_conn *i, void *data);
};
#define MAX_MASQ_WORKER_COUNT 16
static DEFINE_MUTEX(masq_mutex);
static unsigned int masq_refcnt __read_mostly;
static atomic_t masq_worker_count __read_mostly;
unsigned int
nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
@ -63,13 +74,71 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
static int device_cmp(struct nf_conn *i, void *ifindex)
static void iterate_cleanup_work(struct work_struct *work)
{
struct masq_dev_work *w;
w = container_of(work, struct masq_dev_work, work);
nf_ct_iterate_cleanup_net(w->net, w->iter, (void *)w, 0, 0);
put_net(w->net);
kfree(w);
atomic_dec(&masq_worker_count);
module_put(THIS_MODULE);
}
/* Iterate conntrack table in the background and remove conntrack entries
* that use the device/address being removed.
*
* In case too many work items have been queued already or memory allocation
* fails iteration is skipped, conntrack entries will time out eventually.
*/
static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr,
int ifindex,
int (*iter)(struct nf_conn *i, void *data),
gfp_t gfp_flags)
{
struct masq_dev_work *w;
if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT)
return;
net = maybe_get_net(net);
if (!net)
return;
if (!try_module_get(THIS_MODULE))
goto err_module;
w = kzalloc(sizeof(*w), gfp_flags);
if (w) {
/* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */
atomic_inc(&masq_worker_count);
INIT_WORK(&w->work, iterate_cleanup_work);
w->ifindex = ifindex;
w->net = net;
w->iter = iter;
if (addr)
w->addr = *addr;
schedule_work(&w->work);
return;
}
module_put(THIS_MODULE);
err_module:
put_net(net);
}
static int device_cmp(struct nf_conn *i, void *arg)
{
const struct nf_conn_nat *nat = nfct_nat(i);
const struct masq_dev_work *w = arg;
if (!nat)
return 0;
return nat->masq_index == (int)(long)ifindex;
return nat->masq_index == w->ifindex;
}
static int masq_device_event(struct notifier_block *this,
@ -85,8 +154,8 @@ static int masq_device_event(struct notifier_block *this,
* and forget them.
*/
nf_ct_iterate_cleanup_net(net, device_cmp,
(void *)(long)dev->ifindex, 0, 0);
nf_nat_masq_schedule(net, NULL, dev->ifindex,
device_cmp, GFP_KERNEL);
}
return NOTIFY_DONE;
@ -94,35 +163,45 @@ static int masq_device_event(struct notifier_block *this,
static int inet_cmp(struct nf_conn *ct, void *ptr)
{
struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
struct net_device *dev = ifa->ifa_dev->dev;
struct nf_conntrack_tuple *tuple;
struct masq_dev_work *w = ptr;
if (!device_cmp(ct, (void *)(long)dev->ifindex))
if (!device_cmp(ct, ptr))
return 0;
tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
return ifa->ifa_address == tuple->dst.u3.ip;
return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3);
}
static int masq_inet_event(struct notifier_block *this,
unsigned long event,
void *ptr)
{
struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev;
struct net *net = dev_net(idev->dev);
const struct in_ifaddr *ifa = ptr;
const struct in_device *idev;
const struct net_device *dev;
union nf_inet_addr addr;
if (event != NETDEV_DOWN)
return NOTIFY_DONE;
/* The masq_dev_notifier will catch the case of the device going
* down. So if the inetdev is dead and being destroyed we have
* no work to do. Otherwise this is an individual address removal
* and we have to perform the flush.
*/
idev = ifa->ifa_dev;
if (idev->dead)
return NOTIFY_DONE;
if (event == NETDEV_DOWN)
nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0);
memset(&addr, 0, sizeof(addr));
addr.ip = ifa->ifa_address;
dev = idev->dev;
nf_nat_masq_schedule(dev_net(idev->dev), &addr, dev->ifindex,
inet_cmp, GFP_KERNEL);
return NOTIFY_DONE;
}
@ -136,8 +215,6 @@ static struct notifier_block masq_inet_notifier = {
};
#if IS_ENABLED(CONFIG_IPV6)
static atomic_t v6_worker_count __read_mostly;
static int
nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
const struct in6_addr *daddr, unsigned int srcprefs,
@ -187,40 +264,6 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
struct masq_dev_work {
struct work_struct work;
struct net *net;
struct in6_addr addr;
int ifindex;
};
static int inet6_cmp(struct nf_conn *ct, void *work)
{
struct masq_dev_work *w = (struct masq_dev_work *)work;
struct nf_conntrack_tuple *tuple;
if (!device_cmp(ct, (void *)(long)w->ifindex))
return 0;
tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6);
}
static void iterate_cleanup_work(struct work_struct *work)
{
struct masq_dev_work *w;
w = container_of(work, struct masq_dev_work, work);
nf_ct_iterate_cleanup_net(w->net, inet6_cmp, (void *)w, 0, 0);
put_net(w->net);
kfree(w);
atomic_dec(&v6_worker_count);
module_put(THIS_MODULE);
}
/* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep).
*
* Defer it to the system workqueue.
@ -233,36 +276,19 @@ static int masq_inet6_event(struct notifier_block *this,
{
struct inet6_ifaddr *ifa = ptr;
const struct net_device *dev;
struct masq_dev_work *w;
struct net *net;
union nf_inet_addr addr;
if (event != NETDEV_DOWN || atomic_read(&v6_worker_count) >= 16)
if (event != NETDEV_DOWN)
return NOTIFY_DONE;
dev = ifa->idev->dev;
net = maybe_get_net(dev_net(dev));
if (!net)
return NOTIFY_DONE;
if (!try_module_get(THIS_MODULE))
goto err_module;
memset(&addr, 0, sizeof(addr));
w = kmalloc(sizeof(*w), GFP_ATOMIC);
if (w) {
atomic_inc(&v6_worker_count);
addr.in6 = ifa->addr;
INIT_WORK(&w->work, iterate_cleanup_work);
w->ifindex = dev->ifindex;
w->net = net;
w->addr = ifa->addr;
schedule_work(&w->work);
return NOTIFY_DONE;
}
module_put(THIS_MODULE);
err_module:
put_net(net);
nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet_cmp,
GFP_ATOMIC);
return NOTIFY_DONE;
}

View File

@ -4336,7 +4336,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (ops->privsize != NULL)
size = ops->privsize(nla, &desc);
alloc_size = sizeof(*set) + size + udlen;
if (alloc_size < size)
if (alloc_size < size || alloc_size > INT_MAX)
return -ENOMEM;
set = kvzalloc(alloc_size, GFP_KERNEL);
if (!set)
@ -9599,7 +9599,6 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
table->use--;
nf_tables_chain_destroy(&ctx);
}
list_del(&table->list);
nf_tables_table_destroy(&ctx);
}
@ -9612,6 +9611,8 @@ static void __nft_release_tables(struct net *net)
if (nft_table_has_owner(table))
continue;
list_del(&table->list);
__nft_release_table(net, table);
}
}
@ -9619,31 +9620,38 @@ static void __nft_release_tables(struct net *net)
static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct nft_table *table, *to_delete[8];
struct nftables_pernet *nft_net;
struct netlink_notify *n = ptr;
struct nft_table *table, *nt;
struct net *net = n->net;
bool release = false;
unsigned int deleted;
bool restart = false;
if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER)
return NOTIFY_DONE;
nft_net = nft_pernet(net);
deleted = 0;
mutex_lock(&nft_net->commit_mutex);
again:
list_for_each_entry(table, &nft_net->tables, list) {
if (nft_table_has_owner(table) &&
n->portid == table->nlpid) {
__nft_release_hook(net, table);
release = true;
list_del_rcu(&table->list);
to_delete[deleted++] = table;
if (deleted >= ARRAY_SIZE(to_delete))
break;
}
}
if (release) {
if (deleted) {
restart = deleted >= ARRAY_SIZE(to_delete);
synchronize_rcu();
list_for_each_entry_safe(table, nt, &nft_net->tables, list) {
if (nft_table_has_owner(table) &&
n->portid == table->nlpid)
__nft_release_table(net, table);
}
while (deleted)
__nft_release_table(net, to_delete[--deleted]);
if (restart)
goto again;
}
mutex_unlock(&nft_net->commit_mutex);

View File

@ -19,6 +19,7 @@
#include <linux/netfilter_bridge/ebtables.h>
#include <linux/netfilter_arp/arp_tables.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_log.h>
/* Used for matches where *info is larger than X byte */
#define NFT_MATCH_LARGE_THRESH 192
@ -257,8 +258,22 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
nft_compat_wait_for_destructors();
ret = xt_check_target(&par, size, proto, inv);
if (ret < 0)
if (ret < 0) {
if (ret == -ENOENT) {
const char *modname = NULL;
if (strcmp(target->name, "LOG") == 0)
modname = "nf_log_syslog";
else if (strcmp(target->name, "NFLOG") == 0)
modname = "nfnetlink_log";
if (modname &&
nft_request_module(ctx->net, "%s", modname) == -EAGAIN)
return -EAGAIN;
}
return ret;
}
/* The standard target cannot be used */
if (!target->target)

View File

@ -44,6 +44,7 @@ log_tg(struct sk_buff *skb, const struct xt_action_param *par)
static int log_tg_check(const struct xt_tgchk_param *par)
{
const struct xt_log_info *loginfo = par->targinfo;
int ret;
if (par->family != NFPROTO_IPV4 && par->family != NFPROTO_IPV6)
return -EINVAL;
@ -58,7 +59,14 @@ static int log_tg_check(const struct xt_tgchk_param *par)
return -EINVAL;
}
return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
ret = nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
if (ret != 0 && !par->nft_compat) {
request_module("%s", "nf_log_syslog");
ret = nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
}
return ret;
}
static void log_tg_destroy(const struct xt_tgdtor_param *par)

View File

@ -42,13 +42,21 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
static int nflog_tg_check(const struct xt_tgchk_param *par)
{
const struct xt_nflog_info *info = par->targinfo;
int ret;
if (info->flags & ~XT_NFLOG_MASK)
return -EINVAL;
if (info->prefix[sizeof(info->prefix) - 1] != '\0')
return -EINVAL;
return nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG);
ret = nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG);
if (ret != 0 && !par->nft_compat) {
request_module("%s", "nfnetlink_log");
ret = nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG);
}
return ret;
}
static void nflog_tg_destroy(const struct xt_tgdtor_param *par)

View File

@ -0,0 +1,309 @@
#!/bin/bash
#
# Test connection tracking zone and NAT source port reallocation support.
#
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
# Don't increase too much, 2000 clients should work
# just fine but script can then take several minutes with
# KASAN/debug builds.
maxclients=100
have_iperf=1
ret=0
# client1---.
# veth1-.
# |
# NAT Gateway --veth0--> Server
# | |
# veth2-' |
# client2---' |
# .... |
# clientX----vethX---'
# All clients share identical IP address.
# NAT Gateway uses policy routing and conntrack zones to isolate client
# namespaces. Each client connects to Server, each with colliding tuples:
# clientsaddr:10000 -> serveraddr:dport
# NAT Gateway is supposed to do port reallocation for each of the
# connections.
sfx=$(mktemp -u "XXXXXXXX")
gw="ns-gw-$sfx"
cl1="ns-cl1-$sfx"
cl2="ns-cl2-$sfx"
srv="ns-srv-$sfx"
v4gc1=$(sysctl -n net.ipv4.neigh.default.gc_thresh1 2>/dev/null)
v4gc2=$(sysctl -n net.ipv4.neigh.default.gc_thresh2 2>/dev/null)
v4gc3=$(sysctl -n net.ipv4.neigh.default.gc_thresh3 2>/dev/null)
v6gc1=$(sysctl -n net.ipv6.neigh.default.gc_thresh1 2>/dev/null)
v6gc2=$(sysctl -n net.ipv6.neigh.default.gc_thresh2 2>/dev/null)
v6gc3=$(sysctl -n net.ipv6.neigh.default.gc_thresh3 2>/dev/null)
cleanup()
{
ip netns del $gw
ip netns del $srv
for i in $(seq 1 $maxclients); do
ip netns del ns-cl$i-$sfx 2>/dev/null
done
sysctl -q net.ipv4.neigh.default.gc_thresh1=$v4gc1 2>/dev/null
sysctl -q net.ipv4.neigh.default.gc_thresh2=$v4gc2 2>/dev/null
sysctl -q net.ipv4.neigh.default.gc_thresh3=$v4gc3 2>/dev/null
sysctl -q net.ipv6.neigh.default.gc_thresh1=$v6gc1 2>/dev/null
sysctl -q net.ipv6.neigh.default.gc_thresh2=$v6gc2 2>/dev/null
sysctl -q net.ipv6.neigh.default.gc_thresh3=$v6gc3 2>/dev/null
}
nft --version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without nft tool"
exit $ksft_skip
fi
ip -Version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without ip tool"
exit $ksft_skip
fi
conntrack -V > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without conntrack tool"
exit $ksft_skip
fi
iperf3 -v >/dev/null 2>&1
if [ $? -ne 0 ];then
have_iperf=0
fi
ip netns add "$gw"
if [ $? -ne 0 ];then
echo "SKIP: Could not create net namespace $gw"
exit $ksft_skip
fi
ip -net "$gw" link set lo up
trap cleanup EXIT
ip netns add "$srv"
if [ $? -ne 0 ];then
echo "SKIP: Could not create server netns $srv"
exit $ksft_skip
fi
ip link add veth0 netns "$gw" type veth peer name eth0 netns "$srv"
ip -net "$gw" link set veth0 up
ip -net "$srv" link set lo up
ip -net "$srv" link set eth0 up
sysctl -q net.ipv6.neigh.default.gc_thresh1=512 2>/dev/null
sysctl -q net.ipv6.neigh.default.gc_thresh2=1024 2>/dev/null
sysctl -q net.ipv6.neigh.default.gc_thresh3=4096 2>/dev/null
sysctl -q net.ipv4.neigh.default.gc_thresh1=512 2>/dev/null
sysctl -q net.ipv4.neigh.default.gc_thresh2=1024 2>/dev/null
sysctl -q net.ipv4.neigh.default.gc_thresh3=4096 2>/dev/null
for i in $(seq 1 $maxclients);do
cl="ns-cl$i-$sfx"
ip netns add "$cl"
if [ $? -ne 0 ];then
echo "SKIP: Could not create client netns $cl"
exit $ksft_skip
fi
ip link add veth$i netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: No virtual ethernet pair device support in kernel"
exit $ksft_skip
fi
done
for i in $(seq 1 $maxclients);do
cl="ns-cl$i-$sfx"
echo netns exec "$cl" ip link set lo up
echo netns exec "$cl" ip link set eth0 up
echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2
echo netns exec "$gw" ip link set veth$i up
echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.arp_ignore=2
echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.rp_filter=0
# clients have same IP addresses.
echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0
echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0
echo netns exec "$cl" ip route add default via 10.1.0.2 dev eth0
echo netns exec "$cl" ip route add default via dead:1::2 dev eth0
# NB: same addresses on client-facing interfaces.
echo netns exec "$gw" ip addr add 10.1.0.2/24 dev veth$i
echo netns exec "$gw" ip addr add dead:1::2/64 dev veth$i
# gw: policy routing
echo netns exec "$gw" ip route add 10.1.0.0/24 dev veth$i table $((1000+i))
echo netns exec "$gw" ip route add dead:1::0/64 dev veth$i table $((1000+i))
echo netns exec "$gw" ip route add 10.3.0.0/24 dev veth0 table $((1000+i))
echo netns exec "$gw" ip route add dead:3::0/64 dev veth0 table $((1000+i))
echo netns exec "$gw" ip rule add fwmark $i lookup $((1000+i))
done | ip -batch /dev/stdin
ip -net "$gw" addr add 10.3.0.1/24 dev veth0
ip -net "$gw" addr add dead:3::1/64 dev veth0
ip -net "$srv" addr add 10.3.0.99/24 dev eth0
ip -net "$srv" addr add dead:3::99/64 dev eth0
ip netns exec $gw nft -f /dev/stdin<<EOF
table inet raw {
map iiftomark {
type ifname : mark
}
map iiftozone {
typeof iifname : ct zone
}
set inicmp {
flags dynamic
type ipv4_addr . ifname . ipv4_addr
}
set inflows {
flags dynamic
type ipv4_addr . inet_service . ifname . ipv4_addr . inet_service
}
set inflows6 {
flags dynamic
type ipv6_addr . inet_service . ifname . ipv6_addr . inet_service
}
chain prerouting {
type filter hook prerouting priority -64000; policy accept;
ct original zone set meta iifname map @iiftozone
meta mark set meta iifname map @iiftomark
tcp flags & (syn|ack) == ack add @inflows { ip saddr . tcp sport . meta iifname . ip daddr . tcp dport counter }
add @inflows6 { ip6 saddr . tcp sport . meta iifname . ip6 daddr . tcp dport counter }
ip protocol icmp add @inicmp { ip saddr . meta iifname . ip daddr counter }
}
chain nat_postrouting {
type nat hook postrouting priority 0; policy accept;
ct mark set meta mark meta oifname veth0 masquerade
}
chain mangle_prerouting {
type filter hook prerouting priority -100; policy accept;
ct direction reply meta mark set ct mark
}
}
EOF
( echo add element inet raw iiftomark \{
for i in $(seq 1 $((maxclients-1))); do
echo \"veth$i\" : $i,
done
echo \"veth$maxclients\" : $maxclients \}
echo add element inet raw iiftozone \{
for i in $(seq 1 $((maxclients-1))); do
echo \"veth$i\" : $i,
done
echo \"veth$maxclients\" : $maxclients \}
) | ip netns exec $gw nft -f /dev/stdin
ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null
ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null
# useful for debugging: allows to use 'ping' from clients to gateway.
ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null
ip netns exec "$gw" sysctl -q net.ipv6.fwmark_reflect=1 > /dev/null
for i in $(seq 1 $maxclients); do
cl="ns-cl$i-$sfx"
ip netns exec $cl ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 &
if [ $? -ne 0 ]; then
echo FAIL: Ping failure from $cl 1>&2
ret=1
break
fi
done
wait
for i in $(seq 1 $maxclients); do
ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }"
if [ $? -ne 0 ];then
ret=1
echo "FAIL: counter icmp mismatch for veth$i" 1>&2
ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2
break
fi
done
ip netns exec $gw nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
if [ $? -ne 0 ];then
ret=1
echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
ip netns exec $gw nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2
fi
if [ $ret -eq 0 ]; then
echo "PASS: ping test from all $maxclients namespaces"
fi
if [ $have_iperf -eq 0 ];then
echo "SKIP: iperf3 not installed"
if [ $ret -ne 0 ];then
exit $ret
fi
exit $ksft_skip
fi
ip netns exec $srv iperf3 -s > /dev/null 2>&1 &
iperfpid=$!
sleep 1
for i in $(seq 1 $maxclients); do
if [ $ret -ne 0 ]; then
break
fi
cl="ns-cl$i-$sfx"
ip netns exec $cl iperf3 -c 10.3.0.99 --cport 10000 -n 1 > /dev/null
if [ $? -ne 0 ]; then
echo FAIL: Failure to connect for $cl 1>&2
ip netns exec $gw conntrack -S 1>&2
ret=1
fi
done
if [ $ret -eq 0 ];then
echo "PASS: iperf3 connections for all $maxclients net namespaces"
fi
kill $iperfpid
wait
for i in $(seq 1 $maxclients); do
ip netns exec $gw nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null
if [ $? -ne 0 ];then
ret=1
echo "FAIL: can't find expected tcp entry for veth$i" 1>&2
break
fi
done
if [ $ret -eq 0 ];then
echo "PASS: Found client connection for all $maxclients net namespaces"
fi
ip netns exec $gw nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null
if [ $? -ne 0 ];then
ret=1
echo "FAIL: cannot find return entry on veth0" 1>&2
fi
exit $ret

View File

@ -0,0 +1,156 @@
#!/bin/bash
# Test insertion speed for packets with identical addresses/ports
# that are all placed in distinct conntrack zones.
sfx=$(mktemp -u "XXXXXXXX")
ns="ns-$sfx"
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
zones=20000
have_ct_tool=0
ret=0
cleanup()
{
ip netns del $ns
}
ip netns add $ns
if [ $? -ne 0 ];then
echo "SKIP: Could not create net namespace $gw"
exit $ksft_skip
fi
trap cleanup EXIT
conntrack -V > /dev/null 2>&1
if [ $? -eq 0 ];then
have_ct_tool=1
fi
ip -net "$ns" link set lo up
test_zones() {
local max_zones=$1
ip netns exec $ns sysctl -q net.netfilter.nf_conntrack_udp_timeout=3600
ip netns exec $ns nft -f /dev/stdin<<EOF
flush ruleset
table inet raw {
map rndzone {
typeof numgen inc mod $max_zones : ct zone
}
chain output {
type filter hook output priority -64000; policy accept;
udp dport 12345 ct zone set numgen inc mod 65536 map @rndzone
}
}
EOF
(
echo "add element inet raw rndzone {"
for i in $(seq 1 $max_zones);do
echo -n "$i : $i"
if [ $i -lt $max_zones ]; then
echo ","
else
echo "}"
fi
done
) | ip netns exec $ns nft -f /dev/stdin
local i=0
local j=0
local outerstart=$(date +%s%3N)
local stop=$outerstart
while [ $i -lt $max_zones ]; do
local start=$(date +%s%3N)
i=$((i + 10000))
j=$((j + 1))
dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" nc -w 1 -q 1 -u -p 12345 127.0.0.1 12345 > /dev/null
if [ $? -ne 0 ] ;then
ret=1
break
fi
stop=$(date +%s%3N)
local duration=$((stop-start))
echo "PASS: added 10000 entries in $duration ms (now $i total, loop $j)"
done
if [ $have_ct_tool -eq 1 ]; then
local count=$(ip netns exec "$ns" conntrack -C)
local duration=$((stop-outerstart))
if [ $count -eq $max_zones ]; then
echo "PASS: inserted $count entries from packet path in $duration ms total"
else
ip netns exec $ns conntrack -S 1>&2
echo "FAIL: inserted $count entries from packet path in $duration ms total, expected $max_zones entries"
ret=1
fi
fi
if [ $ret -ne 0 ];then
echo "FAIL: insert $max_zones entries from packet path" 1>&2
fi
}
test_conntrack_tool() {
local max_zones=$1
ip netns exec $ns conntrack -F >/dev/null 2>/dev/null
local outerstart=$(date +%s%3N)
local start=$(date +%s%3N)
local stop=$start
local i=0
while [ $i -lt $max_zones ]; do
i=$((i + 1))
ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
--timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i >/dev/null 2>&1
if [ $? -ne 0 ];then
ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
--timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i > /dev/null
echo "FAIL: conntrack -I returned an error"
ret=1
break
fi
if [ $((i%10000)) -eq 0 ];then
stop=$(date +%s%3N)
local duration=$((stop-start))
echo "PASS: added 10000 entries in $duration ms (now $i total)"
start=$stop
fi
done
local count=$(ip netns exec "$ns" conntrack -C)
local duration=$((stop-outerstart))
if [ $count -eq $max_zones ]; then
echo "PASS: inserted $count entries via ctnetlink in $duration ms"
else
ip netns exec $ns conntrack -S 1>&2
echo "FAIL: inserted $count entries via ctnetlink in $duration ms, expected $max_zones entries ($duration ms)"
ret=1
fi
}
test_zones $zones
if [ $have_ct_tool -eq 1 ];then
test_conntrack_tool $zones
else
echo "SKIP: Could not run ctnetlink insertion test without conntrack tool"
if [ $ret -eq 0 ];then
exit $ksft_skip
fi
fi
exit $ret