mirror of
https://github.com/torvalds/linux.git
synced 2024-11-10 14:11:52 +00:00
radix-tree: add radix_tree_join
This new function allows for the replacement of many smaller entries in the radix tree with one larger multiorder entry. From the point of view of an RCU walker, they may see a mixture of the smaller entries and the large entry during the same walk, but they will never see NULL for an index which was populated before the join. Link: http://lkml.kernel.org/r/1480369871-5271-58-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox <willy@linux.intel.com> Tested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Matthew Wilcox <mawilcox@microsoft.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
268f42de71
commit
175542f575
@ -335,6 +335,9 @@ static inline void radix_tree_preload_end(void)
|
|||||||
preempt_enable();
|
preempt_enable();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int radix_tree_join(struct radix_tree_root *, unsigned long index,
|
||||||
|
unsigned new_order, void *);
|
||||||
|
|
||||||
#define RADIX_TREE_ITER_TAG_MASK 0x00FF /* tag index in lower byte */
|
#define RADIX_TREE_ITER_TAG_MASK 0x00FF /* tag index in lower byte */
|
||||||
#define RADIX_TREE_ITER_TAGGED 0x0100 /* lookup tagged slots */
|
#define RADIX_TREE_ITER_TAGGED 0x0100 /* lookup tagged slots */
|
||||||
#define RADIX_TREE_ITER_CONTIG 0x0200 /* stop at first hole */
|
#define RADIX_TREE_ITER_CONTIG 0x0200 /* stop at first hole */
|
||||||
|
191
lib/radix-tree.c
191
lib/radix-tree.c
@ -339,17 +339,14 @@ static void radix_tree_node_rcu_free(struct rcu_head *head)
|
|||||||
{
|
{
|
||||||
struct radix_tree_node *node =
|
struct radix_tree_node *node =
|
||||||
container_of(head, struct radix_tree_node, rcu_head);
|
container_of(head, struct radix_tree_node, rcu_head);
|
||||||
int i;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* must only free zeroed nodes into the slab. radix_tree_shrink
|
* Must only free zeroed nodes into the slab. We can be left with
|
||||||
* can leave us with a non-NULL entry in the first slot, so clear
|
* non-NULL entries by radix_tree_free_nodes, so clear the entries
|
||||||
* that here to make sure.
|
* and tags here.
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < RADIX_TREE_MAX_TAGS; i++)
|
memset(node->slots, 0, sizeof(node->slots));
|
||||||
tag_clear(node, i, 0);
|
memset(node->tags, 0, sizeof(node->tags));
|
||||||
|
|
||||||
node->slots[0] = NULL;
|
|
||||||
INIT_LIST_HEAD(&node->private_list);
|
INIT_LIST_HEAD(&node->private_list);
|
||||||
|
|
||||||
kmem_cache_free(radix_tree_node_cachep, node);
|
kmem_cache_free(radix_tree_node_cachep, node);
|
||||||
@ -678,14 +675,14 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
|
|||||||
shift = radix_tree_load_root(root, &child, &maxindex);
|
shift = radix_tree_load_root(root, &child, &maxindex);
|
||||||
|
|
||||||
/* Make sure the tree is high enough. */
|
/* Make sure the tree is high enough. */
|
||||||
|
if (order > 0 && max == ((1UL << order) - 1))
|
||||||
|
max++;
|
||||||
if (max > maxindex) {
|
if (max > maxindex) {
|
||||||
int error = radix_tree_extend(root, max, shift);
|
int error = radix_tree_extend(root, max, shift);
|
||||||
if (error < 0)
|
if (error < 0)
|
||||||
return error;
|
return error;
|
||||||
shift = error;
|
shift = error;
|
||||||
child = root->rnode;
|
child = root->rnode;
|
||||||
if (order == shift)
|
|
||||||
shift += RADIX_TREE_MAP_SHIFT;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
while (shift > order) {
|
while (shift > order) {
|
||||||
@ -697,6 +694,8 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
|
|||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
child->shift = shift;
|
child->shift = shift;
|
||||||
child->offset = offset;
|
child->offset = offset;
|
||||||
|
child->count = 0;
|
||||||
|
child->exceptional = 0;
|
||||||
child->parent = node;
|
child->parent = node;
|
||||||
rcu_assign_pointer(*slot, node_to_entry(child));
|
rcu_assign_pointer(*slot, node_to_entry(child));
|
||||||
if (node)
|
if (node)
|
||||||
@ -710,25 +709,6 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
|
|||||||
slot = &node->slots[offset];
|
slot = &node->slots[offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_RADIX_TREE_MULTIORDER
|
|
||||||
/* Insert pointers to the canonical entry */
|
|
||||||
if (order > shift) {
|
|
||||||
unsigned i, n = 1 << (order - shift);
|
|
||||||
offset = offset & ~(n - 1);
|
|
||||||
slot = &node->slots[offset];
|
|
||||||
child = node_to_entry(slot);
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
if (slot[i])
|
|
||||||
return -EEXIST;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 1; i < n; i++) {
|
|
||||||
rcu_assign_pointer(slot[i], child);
|
|
||||||
node->count++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (nodep)
|
if (nodep)
|
||||||
*nodep = node;
|
*nodep = node;
|
||||||
if (slotp)
|
if (slotp)
|
||||||
@ -736,6 +716,115 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_RADIX_TREE_MULTIORDER
|
||||||
|
/*
|
||||||
|
* Free any nodes below this node. The tree is presumed to not need
|
||||||
|
* shrinking, and any user data in the tree is presumed to not need a
|
||||||
|
* destructor called on it. If we need to add a destructor, we can
|
||||||
|
* add that functionality later. Note that we may not clear tags or
|
||||||
|
* slots from the tree as an RCU walker may still have a pointer into
|
||||||
|
* this subtree. We could replace the entries with RADIX_TREE_RETRY,
|
||||||
|
* but we'll still have to clear those in rcu_free.
|
||||||
|
*/
|
||||||
|
static void radix_tree_free_nodes(struct radix_tree_node *node)
|
||||||
|
{
|
||||||
|
unsigned offset = 0;
|
||||||
|
struct radix_tree_node *child = entry_to_node(node);
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
void *entry = child->slots[offset];
|
||||||
|
if (radix_tree_is_internal_node(entry) &&
|
||||||
|
!is_sibling_entry(child, entry)) {
|
||||||
|
child = entry_to_node(entry);
|
||||||
|
offset = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
offset++;
|
||||||
|
while (offset == RADIX_TREE_MAP_SIZE) {
|
||||||
|
struct radix_tree_node *old = child;
|
||||||
|
offset = child->offset + 1;
|
||||||
|
child = child->parent;
|
||||||
|
radix_tree_node_free(old);
|
||||||
|
if (old == entry_to_node(node))
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int insert_entries(struct radix_tree_node *node, void **slot,
|
||||||
|
void *item, unsigned order, bool replace)
|
||||||
|
{
|
||||||
|
struct radix_tree_node *child;
|
||||||
|
unsigned i, n, tag, offset, tags = 0;
|
||||||
|
|
||||||
|
if (node) {
|
||||||
|
n = 1 << (order - node->shift);
|
||||||
|
offset = get_slot_offset(node, slot);
|
||||||
|
} else {
|
||||||
|
n = 1;
|
||||||
|
offset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (n > 1) {
|
||||||
|
offset = offset & ~(n - 1);
|
||||||
|
slot = &node->slots[offset];
|
||||||
|
}
|
||||||
|
child = node_to_entry(slot);
|
||||||
|
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
if (slot[i]) {
|
||||||
|
if (replace) {
|
||||||
|
node->count--;
|
||||||
|
for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
|
||||||
|
if (tag_get(node, tag, offset + i))
|
||||||
|
tags |= 1 << tag;
|
||||||
|
} else
|
||||||
|
return -EEXIST;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
struct radix_tree_node *old = slot[i];
|
||||||
|
if (i) {
|
||||||
|
rcu_assign_pointer(slot[i], child);
|
||||||
|
for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
|
||||||
|
if (tags & (1 << tag))
|
||||||
|
tag_clear(node, tag, offset + i);
|
||||||
|
} else {
|
||||||
|
rcu_assign_pointer(slot[i], item);
|
||||||
|
for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
|
||||||
|
if (tags & (1 << tag))
|
||||||
|
tag_set(node, tag, offset);
|
||||||
|
}
|
||||||
|
if (radix_tree_is_internal_node(old) &&
|
||||||
|
!is_sibling_entry(node, old))
|
||||||
|
radix_tree_free_nodes(old);
|
||||||
|
if (radix_tree_exceptional_entry(old))
|
||||||
|
node->exceptional--;
|
||||||
|
}
|
||||||
|
if (node) {
|
||||||
|
node->count += n;
|
||||||
|
if (radix_tree_exceptional_entry(item))
|
||||||
|
node->exceptional += n;
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static inline int insert_entries(struct radix_tree_node *node, void **slot,
|
||||||
|
void *item, unsigned order, bool replace)
|
||||||
|
{
|
||||||
|
if (*slot)
|
||||||
|
return -EEXIST;
|
||||||
|
rcu_assign_pointer(*slot, item);
|
||||||
|
if (node) {
|
||||||
|
node->count++;
|
||||||
|
if (radix_tree_exceptional_entry(item))
|
||||||
|
node->exceptional++;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* __radix_tree_insert - insert into a radix tree
|
* __radix_tree_insert - insert into a radix tree
|
||||||
* @root: radix tree root
|
* @root: radix tree root
|
||||||
@ -757,15 +846,13 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
|
|||||||
error = __radix_tree_create(root, index, order, &node, &slot);
|
error = __radix_tree_create(root, index, order, &node, &slot);
|
||||||
if (error)
|
if (error)
|
||||||
return error;
|
return error;
|
||||||
if (*slot != NULL)
|
|
||||||
return -EEXIST;
|
error = insert_entries(node, slot, item, order, false);
|
||||||
rcu_assign_pointer(*slot, item);
|
if (error < 0)
|
||||||
|
return error;
|
||||||
|
|
||||||
if (node) {
|
if (node) {
|
||||||
unsigned offset = get_slot_offset(node, slot);
|
unsigned offset = get_slot_offset(node, slot);
|
||||||
node->count++;
|
|
||||||
if (radix_tree_exceptional_entry(item))
|
|
||||||
node->exceptional++;
|
|
||||||
BUG_ON(tag_get(node, 0, offset));
|
BUG_ON(tag_get(node, 0, offset));
|
||||||
BUG_ON(tag_get(node, 1, offset));
|
BUG_ON(tag_get(node, 1, offset));
|
||||||
BUG_ON(tag_get(node, 2, offset));
|
BUG_ON(tag_get(node, 2, offset));
|
||||||
@ -942,6 +1029,40 @@ void radix_tree_replace_slot(struct radix_tree_root *root,
|
|||||||
replace_slot(root, NULL, slot, item, true);
|
replace_slot(root, NULL, slot, item, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_RADIX_TREE_MULTIORDER
|
||||||
|
/**
|
||||||
|
* radix_tree_join - replace multiple entries with one multiorder entry
|
||||||
|
* @root: radix tree root
|
||||||
|
* @index: an index inside the new entry
|
||||||
|
* @order: order of the new entry
|
||||||
|
* @item: new entry
|
||||||
|
*
|
||||||
|
* Call this function to replace several entries with one larger entry.
|
||||||
|
* The existing entries are presumed to not need freeing as a result of
|
||||||
|
* this call.
|
||||||
|
*
|
||||||
|
* The replacement entry will have all the tags set on it that were set
|
||||||
|
* on any of the entries it is replacing.
|
||||||
|
*/
|
||||||
|
int radix_tree_join(struct radix_tree_root *root, unsigned long index,
|
||||||
|
unsigned order, void *item)
|
||||||
|
{
|
||||||
|
struct radix_tree_node *node;
|
||||||
|
void **slot;
|
||||||
|
int error;
|
||||||
|
|
||||||
|
BUG_ON(radix_tree_is_internal_node(item));
|
||||||
|
|
||||||
|
error = __radix_tree_create(root, index, order, &node, &slot);
|
||||||
|
if (!error)
|
||||||
|
error = insert_entries(node, slot, item, order, true);
|
||||||
|
if (error > 0)
|
||||||
|
error = 0;
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* radix_tree_tag_set - set a tag on a radix tree node
|
* radix_tree_tag_set - set a tag on a radix tree node
|
||||||
* @root: radix tree root
|
* @root: radix tree root
|
||||||
|
@ -332,6 +332,63 @@ void multiorder_tagged_iteration(void)
|
|||||||
item_kill_tree(&tree);
|
item_kill_tree(&tree);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void __multiorder_join(unsigned long index,
|
||||||
|
unsigned order1, unsigned order2)
|
||||||
|
{
|
||||||
|
unsigned long loc;
|
||||||
|
void *item, *item2 = item_create(index + 1, order1);
|
||||||
|
RADIX_TREE(tree, GFP_KERNEL);
|
||||||
|
|
||||||
|
item_insert_order(&tree, index, order2);
|
||||||
|
item = radix_tree_lookup(&tree, index);
|
||||||
|
radix_tree_join(&tree, index + 1, order1, item2);
|
||||||
|
loc = find_item(&tree, item);
|
||||||
|
if (loc == -1)
|
||||||
|
free(item);
|
||||||
|
item = radix_tree_lookup(&tree, index + 1);
|
||||||
|
assert(item == item2);
|
||||||
|
item_kill_tree(&tree);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __multiorder_join2(unsigned order1, unsigned order2)
|
||||||
|
{
|
||||||
|
RADIX_TREE(tree, GFP_KERNEL);
|
||||||
|
struct radix_tree_node *node;
|
||||||
|
void *item1 = item_create(0, order1);
|
||||||
|
void *item2;
|
||||||
|
|
||||||
|
item_insert_order(&tree, 0, order2);
|
||||||
|
radix_tree_insert(&tree, 1 << order2, (void *)0x12UL);
|
||||||
|
item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
|
||||||
|
assert(item2 == (void *)0x12UL);
|
||||||
|
assert(node->exceptional == 1);
|
||||||
|
|
||||||
|
radix_tree_join(&tree, 0, order1, item1);
|
||||||
|
item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
|
||||||
|
assert(item2 == item1);
|
||||||
|
assert(node->exceptional == 0);
|
||||||
|
item_kill_tree(&tree);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void multiorder_join(void)
|
||||||
|
{
|
||||||
|
int i, j, idx;
|
||||||
|
|
||||||
|
for (idx = 0; idx < 1024; idx = idx * 2 + 3) {
|
||||||
|
for (i = 1; i < 15; i++) {
|
||||||
|
for (j = 0; j < i; j++) {
|
||||||
|
__multiorder_join(idx, i, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 1; i < 15; i++) {
|
||||||
|
for (j = 0; j < i; j++) {
|
||||||
|
__multiorder_join2(i, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void multiorder_checks(void)
|
void multiorder_checks(void)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
@ -349,4 +406,5 @@ void multiorder_checks(void)
|
|||||||
multiorder_tag_tests();
|
multiorder_tag_tests();
|
||||||
multiorder_iteration();
|
multiorder_iteration();
|
||||||
multiorder_tagged_iteration();
|
multiorder_tagged_iteration();
|
||||||
|
multiorder_join();
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user