8f0db01800
This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the bucket pointer to lock the hash chain for that bucket. The benefits of a bit spin_lock are: - no need to allocate a separate array of locks. - no need to have a configuration option to guide the choice of the size of this array - locking cost is often a single test-and-set in a cache line that will have to be loaded anyway. When inserting at, or removing from, the head of the chain, the unlock is free - writing the new address in the bucket head implicitly clears the lock bit. For __rhashtable_insert_fast() we ensure this always happens when adding a new key. - even when lockings costs 2 updates (lock and unlock), they are in a cacheline that needs to be read anyway. The cost of using a bit spin_lock is a little bit of code complexity, which I think is quite manageable. Bit spin_locks are sometimes inappropriate because they are not fair - if multiple CPUs repeatedly contend of the same lock, one CPU can easily be starved. This is not a credible situation with rhashtable. Multiple CPUs may want to repeatedly add or remove objects, but they will typically do so at different buckets, so they will attempt to acquire different locks. As we have more bit-locks than we previously had spinlocks (by at least a factor of two) we can expect slightly less contention to go with the slightly better cache behavior and reduced memory consumption. To enhance type checking, a new struct is introduced to represent the pointer plus lock-bit that is stored in the bucket-table. This is "struct rhash_lock_head" and is empty. A pointer to this needs to be cast to either an unsigned lock, or a "struct rhash_head *" to be useful. Variables of this type are most often called "bkt". Previously "pprev" would sometimes point to a bucket, and sometimes a ->next pointer in an rhash_head. As these are now different types, pprev is NULL when it would have pointed to the bucket. In that case, 'blk' is used, together with correct locking protocol. Signed-off-by: NeilBrown <neilb@suse.com> Signed-off-by: David S. Miller <davem@davemloft.net>
136 lines
3.5 KiB
C
136 lines
3.5 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* Resizable, Scalable, Concurrent Hash Table
|
|
*
|
|
* Simple structures that might be needed in include
|
|
* files.
|
|
*/
|
|
|
|
#ifndef _LINUX_RHASHTABLE_TYPES_H
|
|
#define _LINUX_RHASHTABLE_TYPES_H
|
|
|
|
#include <linux/atomic.h>
|
|
#include <linux/compiler.h>
|
|
#include <linux/mutex.h>
|
|
#include <linux/workqueue.h>
|
|
|
|
struct rhash_head {
|
|
struct rhash_head __rcu *next;
|
|
};
|
|
|
|
struct rhlist_head {
|
|
struct rhash_head rhead;
|
|
struct rhlist_head __rcu *next;
|
|
};
|
|
|
|
struct bucket_table;
|
|
|
|
/**
|
|
* struct rhashtable_compare_arg - Key for the function rhashtable_compare
|
|
* @ht: Hash table
|
|
* @key: Key to compare against
|
|
*/
|
|
struct rhashtable_compare_arg {
|
|
struct rhashtable *ht;
|
|
const void *key;
|
|
};
|
|
|
|
typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed);
|
|
typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed);
|
|
typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg,
|
|
const void *obj);
|
|
|
|
/**
|
|
* struct rhashtable_params - Hash table construction parameters
|
|
* @nelem_hint: Hint on number of elements, should be 75% of desired size
|
|
* @key_len: Length of key
|
|
* @key_offset: Offset of key in struct to be hashed
|
|
* @head_offset: Offset of rhash_head in struct to be hashed
|
|
* @max_size: Maximum size while expanding
|
|
* @min_size: Minimum size while shrinking
|
|
* @automatic_shrinking: Enable automatic shrinking of tables
|
|
* @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash)
|
|
* @obj_hashfn: Function to hash object
|
|
* @obj_cmpfn: Function to compare key with object
|
|
*/
|
|
struct rhashtable_params {
|
|
u16 nelem_hint;
|
|
u16 key_len;
|
|
u16 key_offset;
|
|
u16 head_offset;
|
|
unsigned int max_size;
|
|
u16 min_size;
|
|
bool automatic_shrinking;
|
|
rht_hashfn_t hashfn;
|
|
rht_obj_hashfn_t obj_hashfn;
|
|
rht_obj_cmpfn_t obj_cmpfn;
|
|
};
|
|
|
|
/**
|
|
* struct rhashtable - Hash table handle
|
|
* @tbl: Bucket table
|
|
* @key_len: Key length for hashfn
|
|
* @max_elems: Maximum number of elements in table
|
|
* @p: Configuration parameters
|
|
* @rhlist: True if this is an rhltable
|
|
* @run_work: Deferred worker to expand/shrink asynchronously
|
|
* @mutex: Mutex to protect current/future table swapping
|
|
* @lock: Spin lock to protect walker list
|
|
* @nelems: Number of elements in table
|
|
*/
|
|
struct rhashtable {
|
|
struct bucket_table __rcu *tbl;
|
|
unsigned int key_len;
|
|
unsigned int max_elems;
|
|
struct rhashtable_params p;
|
|
bool rhlist;
|
|
struct work_struct run_work;
|
|
struct mutex mutex;
|
|
spinlock_t lock;
|
|
atomic_t nelems;
|
|
};
|
|
|
|
/**
|
|
* struct rhltable - Hash table with duplicate objects in a list
|
|
* @ht: Underlying rhtable
|
|
*/
|
|
struct rhltable {
|
|
struct rhashtable ht;
|
|
};
|
|
|
|
/**
|
|
* struct rhashtable_walker - Hash table walker
|
|
* @list: List entry on list of walkers
|
|
* @tbl: The table that we were walking over
|
|
*/
|
|
struct rhashtable_walker {
|
|
struct list_head list;
|
|
struct bucket_table *tbl;
|
|
};
|
|
|
|
/**
|
|
* struct rhashtable_iter - Hash table iterator
|
|
* @ht: Table to iterate through
|
|
* @p: Current pointer
|
|
* @list: Current hash list pointer
|
|
* @walker: Associated rhashtable walker
|
|
* @slot: Current slot
|
|
* @skip: Number of entries to skip in slot
|
|
*/
|
|
struct rhashtable_iter {
|
|
struct rhashtable *ht;
|
|
struct rhash_head *p;
|
|
struct rhlist_head *list;
|
|
struct rhashtable_walker walker;
|
|
unsigned int slot;
|
|
unsigned int skip;
|
|
bool end_of_table;
|
|
};
|
|
|
|
int rhashtable_init(struct rhashtable *ht,
|
|
const struct rhashtable_params *params);
|
|
int rhltable_init(struct rhltable *hlt,
|
|
const struct rhashtable_params *params);
|
|
|
|
#endif /* _LINUX_RHASHTABLE_TYPES_H */
|