Merge branch 'ipv4-avoid-pathological-hash-tables'

Eric Dumazet says:

====================
ipv4: avoid pathological hash tables

This series speeds up netns dismantles on hosts
having many active netns, by making sure two hash tables
used for IPV4 fib contains uniformly spread items.

v2: changed second patch to add fib_info_laddrhash_bucket()
    for consistency (David Ahern suggestion).
====================

Link: https://lore.kernel.org/r/20220119100413.4077866-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2022-01-19 08:14:42 -08:00
commit ff9fc0a31d

View File

@ -29,6 +29,7 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/netlink.h>
#include <linux/hash.h>
#include <net/arp.h>
#include <net/ip.h>
@ -51,6 +52,7 @@ static DEFINE_SPINLOCK(fib_info_lock);
static struct hlist_head *fib_info_hash;
static struct hlist_head *fib_info_laddrhash;
static unsigned int fib_info_hash_size;
static unsigned int fib_info_hash_bits;
static unsigned int fib_info_cnt;
#define DEVINDEX_HASHBITS 8
@ -319,11 +321,15 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
static inline unsigned int fib_devindex_hashfn(unsigned int val)
{
unsigned int mask = DEVINDEX_HASHSIZE - 1;
return hash_32(val, DEVINDEX_HASHBITS);
}
return (val ^
(val >> DEVINDEX_HASHBITS) ^
(val >> (DEVINDEX_HASHBITS * 2))) & mask;
static struct hlist_head *
fib_info_devhash_bucket(const struct net_device *dev)
{
u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex;
return &fib_info_devhash[fib_devindex_hashfn(val)];
}
static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
@ -433,12 +439,11 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev)
{
struct hlist_head *head;
struct fib_nh *nh;
unsigned int hash;
spin_lock(&fib_info_lock);
hash = fib_devindex_hashfn(dev->ifindex);
head = &fib_info_devhash[hash];
head = fib_info_devhash_bucket(dev);
hlist_for_each_entry(nh, head, nh_hash) {
if (nh->fib_nh_dev == dev &&
nh->fib_nh_gw4 == gw &&
@ -1243,13 +1248,13 @@ int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
return err;
}
static inline unsigned int fib_laddr_hashfn(__be32 val)
static struct hlist_head *
fib_info_laddrhash_bucket(const struct net *net, __be32 val)
{
unsigned int mask = (fib_info_hash_size - 1);
u32 slot = hash_32(net_hash_mix(net) ^ (__force u32)val,
fib_info_hash_bits);
return ((__force u32)val ^
((__force u32)val >> 7) ^
((__force u32)val >> 14)) & mask;
return &fib_info_laddrhash[slot];
}
static struct hlist_head *fib_info_hash_alloc(int bytes)
@ -1285,6 +1290,7 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
old_info_hash = fib_info_hash;
old_laddrhash = fib_info_laddrhash;
fib_info_hash_size = new_size;
fib_info_hash_bits = ilog2(new_size);
for (i = 0; i < old_size; i++) {
struct hlist_head *head = &fib_info_hash[i];
@ -1302,21 +1308,20 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
}
fib_info_hash = new_info_hash;
fib_info_laddrhash = new_laddrhash;
for (i = 0; i < old_size; i++) {
struct hlist_head *lhead = &fib_info_laddrhash[i];
struct hlist_head *lhead = &old_laddrhash[i];
struct hlist_node *n;
struct fib_info *fi;
hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) {
struct hlist_head *ldest;
unsigned int new_hash;
new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
ldest = &new_laddrhash[new_hash];
ldest = fib_info_laddrhash_bucket(fi->fib_net,
fi->fib_prefsrc);
hlist_add_head(&fi->fib_lhash, ldest);
}
}
fib_info_laddrhash = new_laddrhash;
spin_unlock_bh(&fib_info_lock);
@ -1601,7 +1606,7 @@ link_it:
if (fi->fib_prefsrc) {
struct hlist_head *head;
head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
head = fib_info_laddrhash_bucket(net, fi->fib_prefsrc);
hlist_add_head(&fi->fib_lhash, head);
}
if (fi->nh) {
@ -1609,12 +1614,10 @@ link_it:
} else {
change_nexthops(fi) {
struct hlist_head *head;
unsigned int hash;
if (!nexthop_nh->fib_nh_dev)
continue;
hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex);
head = &fib_info_devhash[hash];
head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev);
hlist_add_head(&nexthop_nh->nh_hash, head);
} endfor_nexthops(fi)
}
@ -1875,16 +1878,16 @@ nla_put_failure:
*/
int fib_sync_down_addr(struct net_device *dev, __be32 local)
{
int ret = 0;
unsigned int hash = fib_laddr_hashfn(local);
struct hlist_head *head = &fib_info_laddrhash[hash];
int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
struct net *net = dev_net(dev);
struct hlist_head *head;
struct fib_info *fi;
int ret = 0;
if (!fib_info_laddrhash || local == 0)
return 0;
head = fib_info_laddrhash_bucket(net, local);
hlist_for_each_entry(fi, head, fib_lhash) {
if (!net_eq(fi->fib_net, net) ||
fi->fib_tb_id != tb_id)
@ -1966,8 +1969,7 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
{
unsigned int hash = fib_devindex_hashfn(dev->ifindex);
struct hlist_head *head = &fib_info_devhash[hash];
struct hlist_head *head = fib_info_devhash_bucket(dev);
struct fib_nh *nh;
hlist_for_each_entry(nh, head, nh_hash) {
@ -1986,12 +1988,11 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
*/
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
{
int ret = 0;
int scope = RT_SCOPE_NOWHERE;
struct hlist_head *head = fib_info_devhash_bucket(dev);
struct fib_info *prev_fi = NULL;
unsigned int hash = fib_devindex_hashfn(dev->ifindex);
struct hlist_head *head = &fib_info_devhash[hash];
int scope = RT_SCOPE_NOWHERE;
struct fib_nh *nh;
int ret = 0;
if (force)
scope = -1;
@ -2136,7 +2137,6 @@ out:
int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
{
struct fib_info *prev_fi;
unsigned int hash;
struct hlist_head *head;
struct fib_nh *nh;
int ret;
@ -2152,8 +2152,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
}
prev_fi = NULL;
hash = fib_devindex_hashfn(dev->ifindex);
head = &fib_info_devhash[hash];
head = fib_info_devhash_bucket(dev);
ret = 0;
hlist_for_each_entry(nh, head, nh_hash) {