summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2016-12-12 16:43:41 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-12 18:55:08 -0800
commitf7942430e40f14c6d2ca48a1875add509938c07d (patch)
tree8c363206d1f107bd86513597a027184ff7ae544a /lib
parentb936887e8739d3fa83f87d899f68d136735d9816 (diff)
lib: radix-tree: native accounting of exceptional entries
The way the page cache is sneaking shadow entries of evicted pages into the radix tree past the node entry accounting and tracking them manually in the upper bits of node->count is fraught with problems. These shadow entries are marked in the tree as exceptional entries, which are a native concept to the radix tree. Maintain an explicit counter of exceptional entries in the radix tree node. Subsequent patches will switch shadow entry tracking over to that counter. DAX and shmem are the other users of exceptional entries. Since slot replacements that change the entry type from regular to exceptional must now be accounted, introduce a __radix_tree_replace() function that does replacement and accounting, and switch DAX and shmem over. The increase in radix tree node size is temporary. A followup patch switches the shadow tracking to this new scheme and we'll no longer need the upper bits in node->count and shrink that back to one byte. Link: http://lkml.kernel.org/r/20161117192945.GA23430@cmpxchg.org Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Jan Kara <jack@suse.cz> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Hugh Dickins <hughd@google.com> Cc: Matthew Wilcox <mawilcox@linuxonhyperv.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'lib')
-rw-r--r--lib/radix-tree.c46
1 files changed, 43 insertions, 3 deletions
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 8e6d552c40dd..7885796d35ae 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -220,10 +220,10 @@ static void dump_node(struct radix_tree_node *node, unsigned long index)
{
unsigned long i;
- pr_debug("radix node: %p offset %d tags %lx %lx %lx shift %d count %d parent %p\n",
+ pr_debug("radix node: %p offset %d tags %lx %lx %lx shift %d count %d exceptional %d parent %p\n",
node, node->offset,
node->tags[0][0], node->tags[1][0], node->tags[2][0],
- node->shift, node->count, node->parent);
+ node->shift, node->count, node->exceptional, node->parent);
for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
unsigned long first = index | (i << node->shift);
@@ -522,8 +522,13 @@ static int radix_tree_extend(struct radix_tree_root *root,
node->offset = 0;
node->count = 1;
node->parent = NULL;
- if (radix_tree_is_internal_node(slot))
+ if (radix_tree_is_internal_node(slot)) {
entry_to_node(slot)->parent = node;
+ } else {
+ /* Moving an exceptional root->rnode to a node */
+ if (radix_tree_exceptional_entry(slot))
+ node->exceptional = 1;
+ }
node->slots[0] = slot;
slot = node_to_entry(node);
rcu_assign_pointer(root->rnode, slot);
@@ -649,6 +654,8 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
if (node) {
unsigned offset = get_slot_offset(node, slot);
node->count++;
+ if (radix_tree_exceptional_entry(item))
+ node->exceptional++;
BUG_ON(tag_get(node, 0, offset));
BUG_ON(tag_get(node, 1, offset));
BUG_ON(tag_get(node, 2, offset));
@@ -747,6 +754,37 @@ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
EXPORT_SYMBOL(radix_tree_lookup);
/**
+ * __radix_tree_replace - replace item in a slot
+ * @root: radix tree root
+ * @node: pointer to tree node
+ * @slot: pointer to slot in @node
+ * @item: new item to store in the slot.
+ *
+ * For use with __radix_tree_lookup(). Caller must hold tree write locked
+ * across slot lookup and replacement.
+ */
+void __radix_tree_replace(struct radix_tree_root *root,
+ struct radix_tree_node *node,
+ void **slot, void *item)
+{
+ void *old = rcu_dereference_raw(*slot);
+ int exceptional;
+
+ WARN_ON_ONCE(radix_tree_is_internal_node(item));
+ WARN_ON_ONCE(!!item - !!old);
+
+ exceptional = !!radix_tree_exceptional_entry(item) -
+ !!radix_tree_exceptional_entry(old);
+
+ WARN_ON_ONCE(exceptional && !node && slot != (void **)&root->rnode);
+
+ if (node)
+ node->exceptional += exceptional;
+
+ rcu_assign_pointer(*slot, item);
+}
+
+/**
* radix_tree_tag_set - set a tag on a radix tree node
* @root: radix tree root
* @index: index key
@@ -1561,6 +1599,8 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
delete_sibling_entries(node, node_to_entry(slot), offset);
node->slots[offset] = NULL;
node->count--;
+ if (radix_tree_exceptional_entry(entry))
+ node->exceptional--;
__radix_tree_delete_node(root, node);