diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 89d102270570..f9ecf32bce55 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -102,8 +103,9 @@ struct rhashtable; * @max_size: Maximum size while expanding * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker + * @insecure_elasticity: Set to true to disable chain length checks * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) - * @hashfn: Function to hash key + * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) * @obj_hashfn: Function to hash object * @obj_cmpfn: Function to compare key with object */ @@ -115,6 +117,7 @@ struct rhashtable_params { unsigned int max_size; unsigned int min_size; u32 nulls_base; + bool insecure_elasticity; size_t locks_mul; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; @@ -125,6 +128,8 @@ struct rhashtable_params { * struct rhashtable - Hash table handle * @tbl: Bucket table * @nelems: Number of elements in table + * @key_len: Key length for hashfn + * @elasticity: Maximum chain length before rehash * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping @@ -134,6 +139,8 @@ struct rhashtable { struct bucket_table __rcu *tbl; atomic_t nelems; bool being_destroyed; + unsigned int key_len; + unsigned int elasticity; struct rhashtable_params p; struct work_struct run_work; struct mutex mutex; @@ -199,9 +206,31 @@ static inline unsigned int rht_key_hashfn( struct rhashtable *ht, const struct bucket_table *tbl, const void *key, const struct rhashtable_params params) { - return rht_bucket_index(tbl, params.hashfn(key, params.key_len ?: - ht->p.key_len, - tbl->hash_rnd)); + unsigned hash; + + /* params must be equal to ht->p if it isn't constant. */ + if (!__builtin_constant_p(params.key_len)) + hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd); + else if (params.key_len) { + unsigned key_len = params.key_len; + + if (params.hashfn) + hash = params.hashfn(key, key_len, tbl->hash_rnd); + else if (key_len & (sizeof(u32) - 1)) + hash = jhash(key, key_len, tbl->hash_rnd); + else + hash = jhash2(key, key_len / sizeof(u32), + tbl->hash_rnd); + } else { + unsigned key_len = ht->p.key_len; + + if (params.hashfn) + hash = params.hashfn(key, key_len, tbl->hash_rnd); + else + hash = jhash(key, key_len, tbl->hash_rnd); + } + + return rht_bucket_index(tbl, hash); } static inline unsigned int rht_head_hashfn( @@ -241,6 +270,17 @@ static inline bool rht_shrink_below_30(const struct rhashtable *ht, tbl->size > ht->p.min_size; } +/** + * rht_grow_above_100 - returns true if nelems > table-size + * @ht: hash table + * @tbl: current table + */ +static inline bool rht_grow_above_100(const struct rhashtable *ht, + const struct bucket_table *tbl) +{ + return atomic_read(&ht->nelems) > tbl->size; +} + /* The bucket lock is selected based on the hash and protects mutations * on a group of hash buckets. * @@ -282,9 +322,7 @@ int rhashtable_init(struct rhashtable *ht, int rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj, struct bucket_table *old_tbl); - -int rhashtable_expand(struct rhashtable *ht); -int rhashtable_shrink(struct rhashtable *ht); +int rhashtable_insert_rehash(struct rhashtable *ht); int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter); void rhashtable_walk_exit(struct rhashtable_iter *iter); @@ -507,43 +545,64 @@ static inline int __rhashtable_insert_fast( .ht = ht, .key = key, }; - int err = -EEXIST; struct bucket_table *tbl, *new_tbl; struct rhash_head *head; spinlock_t *lock; + unsigned elasticity; unsigned hash; + int err; +restart: rcu_read_lock(); tbl = rht_dereference_rcu(ht->tbl, ht); - hash = rht_head_hashfn(ht, tbl, obj, params); - lock = rht_bucket_lock(tbl, hash); - spin_lock_bh(lock); - - /* Because we have already taken the bucket lock in tbl, - * if we find that future_tbl is not yet visible then - * that guarantees all other insertions of the same entry - * will also grab the bucket lock in tbl because until - * the rehash completes ht->tbl won't be changed. + /* All insertions must grab the oldest table containing + * the hashed bucket that is yet to be rehashed. */ + for (;;) { + hash = rht_head_hashfn(ht, tbl, obj, params); + lock = rht_bucket_lock(tbl, hash); + spin_lock_bh(lock); + + if (tbl->rehash <= hash) + break; + + spin_unlock_bh(lock); + tbl = rht_dereference_rcu(tbl->future_tbl, ht); + } + new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (unlikely(new_tbl)) { err = rhashtable_insert_slow(ht, key, obj, new_tbl); + if (err == -EAGAIN) + goto slow_path; goto out; } - if (!key) - goto skip_lookup; + if (unlikely(rht_grow_above_100(ht, tbl))) { +slow_path: + spin_unlock_bh(lock); + rcu_read_unlock(); + err = rhashtable_insert_rehash(ht); + if (err) + return err; + goto restart; + } + + err = -EEXIST; + elasticity = ht->elasticity; rht_for_each(head, tbl, hash) { - if (unlikely(!(params.obj_cmpfn ? + if (key && + unlikely(!(params.obj_cmpfn ? params.obj_cmpfn(&arg, rht_obj(ht, head)) : rhashtable_compare(&arg, rht_obj(ht, head))))) goto out; + if (!--elasticity) + goto slow_path; } -skip_lookup: err = 0; head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 83cfedd6612a..7686c1e9934a 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -58,7 +58,8 @@ EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held); #endif -static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl) +static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl, + gfp_t gfp) { unsigned int i, size; #if defined(CONFIG_PROVE_LOCKING) @@ -75,12 +76,13 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl) if (sizeof(spinlock_t) != 0) { #ifdef CONFIG_NUMA - if (size * sizeof(spinlock_t) > PAGE_SIZE) + if (size * sizeof(spinlock_t) > PAGE_SIZE && + gfp == GFP_KERNEL) tbl->locks = vmalloc(size * sizeof(spinlock_t)); else #endif tbl->locks = kmalloc_array(size, sizeof(spinlock_t), - GFP_KERNEL); + gfp); if (!tbl->locks) return -ENOMEM; for (i = 0; i < size; i++) @@ -105,23 +107,25 @@ static void bucket_table_free_rcu(struct rcu_head *head) } static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, - size_t nbuckets) + size_t nbuckets, + gfp_t gfp) { struct bucket_table *tbl = NULL; size_t size; int i; size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); - if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) - tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); - if (tbl == NULL) + if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) || + gfp != GFP_KERNEL) + tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY); + if (tbl == NULL && gfp == GFP_KERNEL) tbl = vzalloc(size); if (tbl == NULL) return NULL; tbl->size = nbuckets; - if (alloc_bucket_locks(ht, tbl) < 0) { + if (alloc_bucket_locks(ht, tbl, gfp) < 0) { bucket_table_free(tbl); return NULL; } @@ -136,11 +140,24 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, return tbl; } +static struct bucket_table *rhashtable_last_table(struct rhashtable *ht, + struct bucket_table *tbl) +{ + struct bucket_table *new_tbl; + + do { + new_tbl = tbl; + tbl = rht_dereference_rcu(tbl->future_tbl, ht); + } while (tbl); + + return new_tbl; +} + static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); - struct bucket_table *new_tbl = - rht_dereference(old_tbl->future_tbl, ht) ?: old_tbl; + struct bucket_table *new_tbl = rhashtable_last_table(ht, + rht_dereference_rcu(old_tbl->future_tbl, ht)); struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash]; int err = -ENOENT; struct rhash_head *head, *next, *entry; @@ -196,12 +213,18 @@ static void rhashtable_rehash_chain(struct rhashtable *ht, unsigned old_hash) spin_unlock_bh(old_bucket_lock); } -static void rhashtable_rehash(struct rhashtable *ht, - struct bucket_table *new_tbl) +static int rhashtable_rehash_attach(struct rhashtable *ht, + struct bucket_table *old_tbl, + struct bucket_table *new_tbl) { - struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); - struct rhashtable_walker *walker; - unsigned old_hash; + /* Protect future_tbl using the first bucket lock. */ + spin_lock_bh(old_tbl->locks); + + /* Did somebody beat us to it? */ + if (rcu_access_pointer(old_tbl->future_tbl)) { + spin_unlock_bh(old_tbl->locks); + return -EEXIST; + } /* Make insertions go into the new, empty table right away. Deletions * and lookups will be attempted in both tables until we synchronize. @@ -211,6 +234,22 @@ static void rhashtable_rehash(struct rhashtable *ht, /* Ensure the new table is visible to readers. */ smp_wmb(); + spin_unlock_bh(old_tbl->locks); + + return 0; +} + +static int rhashtable_rehash_table(struct rhashtable *ht) +{ + struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); + struct bucket_table *new_tbl; + struct rhashtable_walker *walker; + unsigned old_hash; + + new_tbl = rht_dereference(old_tbl->future_tbl, ht); + if (!new_tbl) + return 0; + for (old_hash = 0; old_hash < old_tbl->size; old_hash++) rhashtable_rehash_chain(ht, old_hash); @@ -225,6 +264,8 @@ static void rhashtable_rehash(struct rhashtable *ht, * remain. */ call_rcu(&old_tbl->rcu, bucket_table_free_rcu); + + return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0; } /** @@ -242,27 +283,32 @@ static void rhashtable_rehash(struct rhashtable *ht, * It is valid to have concurrent insertions and deletions protected by per * bucket locks or concurrent RCU protected lookups and traversals. */ -int rhashtable_expand(struct rhashtable *ht) +static int rhashtable_expand(struct rhashtable *ht) { struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); + int err; ASSERT_RHT_MUTEX(ht); - new_tbl = bucket_table_alloc(ht, old_tbl->size * 2); + old_tbl = rhashtable_last_table(ht, old_tbl); + + new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL); if (new_tbl == NULL) return -ENOMEM; - rhashtable_rehash(ht, new_tbl); - return 0; + err = rhashtable_rehash_attach(ht, old_tbl, new_tbl); + if (err) + bucket_table_free(new_tbl); + + return err; } -EXPORT_SYMBOL_GPL(rhashtable_expand); /** * rhashtable_shrink - Shrink hash table while allowing concurrent lookups * @ht: the hash table to shrink * - * This function may only be called in a context where it is safe to call - * synchronize_rcu(), e.g. not within a rcu_read_lock() section. + * This function shrinks the hash table to fit, i.e., the smallest + * size would not cause it to expand right away automatically. * * The caller must ensure that no concurrent resizing occurs by holding * ht->mutex. @@ -273,25 +319,39 @@ EXPORT_SYMBOL_GPL(rhashtable_expand); * It is valid to have concurrent insertions and deletions protected by per * bucket locks or concurrent RCU protected lookups and traversals. */ -int rhashtable_shrink(struct rhashtable *ht) +static int rhashtable_shrink(struct rhashtable *ht) { struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); + unsigned size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2); + int err; ASSERT_RHT_MUTEX(ht); - new_tbl = bucket_table_alloc(ht, old_tbl->size / 2); + if (size < ht->p.min_size) + size = ht->p.min_size; + + if (old_tbl->size <= size) + return 0; + + if (rht_dereference(old_tbl->future_tbl, ht)) + return -EEXIST; + + new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL); if (new_tbl == NULL) return -ENOMEM; - rhashtable_rehash(ht, new_tbl); - return 0; + err = rhashtable_rehash_attach(ht, old_tbl, new_tbl); + if (err) + bucket_table_free(new_tbl); + + return err; } -EXPORT_SYMBOL_GPL(rhashtable_shrink); static void rht_deferred_worker(struct work_struct *work) { struct rhashtable *ht; struct bucket_table *tbl; + int err = 0; ht = container_of(work, struct rhashtable, run_work); mutex_lock(&ht->mutex); @@ -299,29 +359,92 @@ static void rht_deferred_worker(struct work_struct *work) goto unlock; tbl = rht_dereference(ht->tbl, ht); + tbl = rhashtable_last_table(ht, tbl); if (rht_grow_above_75(ht, tbl)) rhashtable_expand(ht); else if (rht_shrink_below_30(ht, tbl)) rhashtable_shrink(ht); + + err = rhashtable_rehash_table(ht); + unlock: mutex_unlock(&ht->mutex); + + if (err) + schedule_work(&ht->run_work); } +static bool rhashtable_check_elasticity(struct rhashtable *ht, + struct bucket_table *tbl, + unsigned hash) +{ + unsigned elasticity = ht->elasticity; + struct rhash_head *head; + + rht_for_each(head, tbl, hash) + if (!--elasticity) + return true; + + return false; +} + +int rhashtable_insert_rehash(struct rhashtable *ht) +{ + struct bucket_table *old_tbl; + struct bucket_table *new_tbl; + struct bucket_table *tbl; + unsigned int size; + int err; + + old_tbl = rht_dereference_rcu(ht->tbl, ht); + tbl = rhashtable_last_table(ht, old_tbl); + + size = tbl->size; + + if (rht_grow_above_75(ht, tbl)) + size *= 2; + /* More than two rehashes (not resizes) detected. */ + else if (WARN_ON(old_tbl != tbl && old_tbl->size == size)) + return -EBUSY; + + new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC); + if (new_tbl == NULL) + return -ENOMEM; + + err = rhashtable_rehash_attach(ht, tbl, new_tbl); + if (err) { + bucket_table_free(new_tbl); + if (err == -EEXIST) + err = 0; + } else + schedule_work(&ht->run_work); + + return err; +} +EXPORT_SYMBOL_GPL(rhashtable_insert_rehash); + int rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj, struct bucket_table *tbl) { struct rhash_head *head; unsigned hash; - int err = -EEXIST; + int err; + tbl = rhashtable_last_table(ht, tbl); hash = head_hashfn(ht, tbl, obj); spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); + err = -EEXIST; if (key && rhashtable_lookup_fast(ht, key, ht->p)) goto exit; + err = -EAGAIN; + if (rhashtable_check_elasticity(ht, tbl, hash) || + rht_grow_above_100(ht, tbl)) + goto exit; + err = 0; head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); @@ -477,6 +600,9 @@ next: iter->skip = 0; } + /* Ensure we see any new tables. */ + smp_rmb(); + iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (iter->walker->tbl) { iter->slot = 0; @@ -529,6 +655,11 @@ static size_t rounded_hashtable_size(const struct rhashtable_params *params) (unsigned long)params->min_size); } +static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) +{ + return jhash2(key, length, seed); +} + /** * rhashtable_init - initialize a new hash table * @ht: hash table to be initialized @@ -580,7 +711,7 @@ int rhashtable_init(struct rhashtable *ht, size = HASH_DEFAULT_SIZE; - if ((!(params->key_len && params->hashfn) && !params->obj_hashfn) || + if ((!params->key_len && !params->obj_hashfn) || (params->obj_hashfn && !params->obj_cmpfn)) return -EINVAL; @@ -602,12 +733,25 @@ int rhashtable_init(struct rhashtable *ht, ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE); + if (!params->insecure_elasticity) + ht->elasticity = 16; + if (params->locks_mul) ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); else ht->p.locks_mul = BUCKET_LOCKS_PER_CPU; - tbl = bucket_table_alloc(ht, size); + ht->key_len = ht->p.key_len; + if (!params->hashfn) { + ht->p.hashfn = jhash; + + if (!(ht->key_len & (sizeof(u32) - 1))) { + ht->key_len /= sizeof(u32); + ht->p.hashfn = rhashtable_jhash2; + } + } + + tbl = bucket_table_alloc(ht, size, GFP_KERNEL); if (tbl == NULL) return -ENOMEM; diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index a2ba6adb60a2..a42a0d44e818 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -155,30 +155,6 @@ static int __init test_rhashtable(struct rhashtable *ht) test_rht_lookup(ht); rcu_read_unlock(); - for (i = 0; i < TEST_NEXPANDS; i++) { - pr_info(" Table expansion iteration %u...\n", i); - mutex_lock(&ht->mutex); - rhashtable_expand(ht); - mutex_unlock(&ht->mutex); - - rcu_read_lock(); - pr_info(" Verifying lookups...\n"); - test_rht_lookup(ht); - rcu_read_unlock(); - } - - for (i = 0; i < TEST_NEXPANDS; i++) { - pr_info(" Table shrinkage iteration %u...\n", i); - mutex_lock(&ht->mutex); - rhashtable_shrink(ht); - mutex_unlock(&ht->mutex); - - rcu_read_lock(); - pr_info(" Verifying lookups...\n"); - test_rht_lookup(ht); - rcu_read_unlock(); - } - rcu_read_lock(); test_bucket_stats(ht, true); rcu_read_unlock(); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 651792141f07..e2f7f28148e0 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -3133,13 +3133,12 @@ static inline u32 netlink_hash(const void *data, u32 seed) struct netlink_compare_arg arg; netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid); - return jhash(&arg, netlink_compare_arg_len, seed); + return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed); } static const struct rhashtable_params netlink_rhashtable_params = { .head_offset = offsetof(struct netlink_sock, node), .key_len = netlink_compare_arg_len, - .hashfn = jhash, .obj_hashfn = netlink_hash, .obj_cmpfn = netlink_compare, .max_size = 65536, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 73c2f518a7c0..6dd5bd95236a 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -35,7 +35,6 @@ */ #include -#include #include "core.h" #include "name_table.h" #include "node.h" @@ -2294,7 +2293,6 @@ static const struct rhashtable_params tsk_rht_params = { .head_offset = offsetof(struct tipc_sock, node), .key_offset = offsetof(struct tipc_sock, portid), .key_len = sizeof(u32), /* portid */ - .hashfn = jhash, .max_size = 1048576, .min_size = 256, };