From 3159f943aafdbacb2f94c38fdaadabf2bbde2a14 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 3 Nov 2017 13:30:42 -0400 Subject: xarray: Replace exceptional entries Introduce xarray value entries and tagged pointers to replace radix tree exceptional entries. This is a slight change in encoding to allow the use of an extra bit (we can now store BITS_PER_LONG - 1 bits in a value entry). It is also a change in emphasis; exceptional entries are intimidating and different. As the comment explains, you can choose to store values or pointers in the xarray and they are both first-class citizens. Signed-off-by: Matthew Wilcox Reviewed-by: Josef Bacik --- mm/workingset.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'mm/workingset.c') diff --git a/mm/workingset.c b/mm/workingset.c index 4516dd790129..bb109b3afac2 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -155,8 +155,8 @@ * refault distance will immediately activate the refaulting page. */ -#define EVICTION_SHIFT (RADIX_TREE_EXCEPTIONAL_ENTRY + \ - NODES_SHIFT + \ +#define EVICTION_SHIFT ((BITS_PER_LONG - BITS_PER_XA_VALUE) + \ + NODES_SHIFT + \ MEM_CGROUP_ID_SHIFT) #define EVICTION_MASK (~0UL >> EVICTION_SHIFT) @@ -173,20 +173,19 @@ static unsigned int bucket_order __read_mostly; static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction) { eviction >>= bucket_order; + eviction &= EVICTION_MASK; eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid; eviction = (eviction << NODES_SHIFT) | pgdat->node_id; - eviction = (eviction << RADIX_TREE_EXCEPTIONAL_SHIFT); - return (void *)(eviction | RADIX_TREE_EXCEPTIONAL_ENTRY); + return xa_mk_value(eviction); } static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat, unsigned long *evictionp) { - unsigned long entry = (unsigned long)shadow; + unsigned long entry = xa_to_value(shadow); int memcgid, nid; - entry >>= RADIX_TREE_EXCEPTIONAL_SHIFT; nid = entry & ((1UL << NODES_SHIFT) - 1); entry >>= NODES_SHIFT; memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1); @@ -453,7 +452,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, goto out_invalid; for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { if (node->slots[i]) { - if (WARN_ON_ONCE(!radix_tree_exceptional_entry(node->slots[i]))) + if (WARN_ON_ONCE(!xa_is_value(node->slots[i]))) goto out_invalid; if (WARN_ON_ONCE(!node->exceptional)) goto out_invalid; -- cgit v1.2.3 From 01959dfe771c6893365482ec78dc1d9cbbbe6de8 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 9 Nov 2017 09:23:56 -0500 Subject: xarray: Define struct xa_node This is a direct replacement for struct radix_tree_node. A couple of struct members have changed name, so convert those. Use a #define so that radix tree users continue to work without change. Signed-off-by: Matthew Wilcox Reviewed-by: Josef Bacik --- include/linux/radix-tree.h | 29 +++------------------ include/linux/xarray.h | 27 ++++++++++++++++++++ lib/radix-tree.c | 48 +++++++++++++++++------------------ mm/workingset.c | 16 ++++++------ tools/testing/radix-tree/multiorder.c | 30 +++++++++++----------- 5 files changed, 77 insertions(+), 73 deletions(-) (limited to 'mm/workingset.c') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 0b080bd88ab7..15388b7e38b9 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -32,6 +32,7 @@ /* Keep unconverted code working */ #define radix_tree_root xarray +#define radix_tree_node xa_node /* * The bottom two bits of the slot determine how the remaining bits in the @@ -60,41 +61,17 @@ static inline bool radix_tree_is_internal_node(void *ptr) /*** radix-tree API starts here ***/ -#define RADIX_TREE_MAX_TAGS 3 - #define RADIX_TREE_MAP_SHIFT XA_CHUNK_SHIFT #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) -#define RADIX_TREE_TAG_LONGS \ - ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) +#define RADIX_TREE_MAX_TAGS XA_MAX_MARKS +#define RADIX_TREE_TAG_LONGS XA_MARK_LONGS #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ RADIX_TREE_MAP_SHIFT)) -/* - * @count is the count of every non-NULL element in the ->slots array - * whether that is a value entry, a retry entry, a user pointer, - * a sibling entry or a pointer to the next level of the tree. - * @exceptional is the count of every element in ->slots which is - * either a value entry or a sibling of a value entry. - */ -struct radix_tree_node { - unsigned char shift; /* Bits remaining in each slot */ - unsigned char offset; /* Slot offset in parent */ - unsigned char count; /* Total entry count */ - unsigned char exceptional; /* Exceptional entry count */ - struct radix_tree_node *parent; /* Used when ascending tree */ - struct radix_tree_root *root; /* The tree we belong to */ - union { - struct list_head private_list; /* For tree user */ - struct rcu_head rcu_head; /* Used when freeing node */ - }; - void __rcu *slots[RADIX_TREE_MAP_SIZE]; - unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; -}; - /* The IDR tag is stored in the low bits of xa_flags */ #define ROOT_IS_IDR ((__force gfp_t)4) /* The top bits of xa_flags are used to store the root tags */ diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 9122cf8bf52a..52141dfc5a90 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -252,6 +252,33 @@ static inline void xa_init(struct xarray *xa) #endif #define XA_CHUNK_SIZE (1UL << XA_CHUNK_SHIFT) #define XA_CHUNK_MASK (XA_CHUNK_SIZE - 1) +#define XA_MAX_MARKS 3 +#define XA_MARK_LONGS DIV_ROUND_UP(XA_CHUNK_SIZE, BITS_PER_LONG) + +/* + * @count is the count of every non-NULL element in the ->slots array + * whether that is a value entry, a retry entry, a user pointer, + * a sibling entry or a pointer to the next level of the tree. + * @nr_values is the count of every element in ->slots which is + * either a value entry or a sibling of a value entry. + */ +struct xa_node { + unsigned char shift; /* Bits remaining in each slot */ + unsigned char offset; /* Slot offset in parent */ + unsigned char count; /* Total entry count */ + unsigned char nr_values; /* Value entry count */ + struct xa_node __rcu *parent; /* NULL at top of tree */ + struct xarray *array; /* The array we belong to */ + union { + struct list_head private_list; /* For tree user */ + struct rcu_head rcu_head; /* Used when freeing node */ + }; + void __rcu *slots[XA_CHUNK_SIZE]; + union { + unsigned long tags[XA_MAX_MARKS][XA_MARK_LONGS]; + unsigned long marks[XA_MAX_MARKS][XA_MARK_LONGS]; + }; +}; /* Private */ static inline bool xa_is_node(const void *entry) diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 299d4bdba109..8a568cea1237 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -260,11 +260,11 @@ static void dump_node(struct radix_tree_node *node, unsigned long index) { unsigned long i; - pr_debug("radix node: %p offset %d indices %lu-%lu parent %p tags %lx %lx %lx shift %d count %d exceptional %d\n", + pr_debug("radix node: %p offset %d indices %lu-%lu parent %p tags %lx %lx %lx shift %d count %d nr_values %d\n", node, node->offset, index, index | node_maxindex(node), node->parent, node->tags[0][0], node->tags[1][0], node->tags[2][0], - node->shift, node->count, node->exceptional); + node->shift, node->count, node->nr_values); for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { unsigned long first = index | (i << node->shift); @@ -354,7 +354,7 @@ static struct radix_tree_node * radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent, struct radix_tree_root *root, unsigned int shift, unsigned int offset, - unsigned int count, unsigned int exceptional) + unsigned int count, unsigned int nr_values) { struct radix_tree_node *ret = NULL; @@ -401,9 +401,9 @@ out: ret->shift = shift; ret->offset = offset; ret->count = count; - ret->exceptional = exceptional; + ret->nr_values = nr_values; ret->parent = parent; - ret->root = root; + ret->array = root; } return ret; } @@ -633,8 +633,8 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp, if (radix_tree_is_internal_node(entry)) { entry_to_node(entry)->parent = node; } else if (xa_is_value(entry)) { - /* Moving an exceptional root->xa_head to a node */ - node->exceptional = 1; + /* Moving a value entry root->xa_head to a node */ + node->nr_values = 1; } /* * entry was already in the radix tree, so we do not need @@ -928,12 +928,12 @@ static inline int insert_entries(struct radix_tree_node *node, if (xa_is_node(old)) radix_tree_free_nodes(old); if (xa_is_value(old)) - node->exceptional--; + node->nr_values--; } if (node) { node->count += n; if (xa_is_value(item)) - node->exceptional += n; + node->nr_values += n; } return n; } @@ -947,7 +947,7 @@ static inline int insert_entries(struct radix_tree_node *node, if (node) { node->count++; if (xa_is_value(item)) - node->exceptional++; + node->nr_values++; } return 1; } @@ -1083,7 +1083,7 @@ void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index) EXPORT_SYMBOL(radix_tree_lookup); static inline void replace_sibling_entries(struct radix_tree_node *node, - void __rcu **slot, int count, int exceptional) + void __rcu **slot, int count, int values) { #ifdef CONFIG_RADIX_TREE_MULTIORDER unsigned offset = get_slot_offset(node, slot); @@ -1096,18 +1096,18 @@ static inline void replace_sibling_entries(struct radix_tree_node *node, node->slots[offset] = NULL; node->count--; } - node->exceptional += exceptional; + node->nr_values += values; } #endif } static void replace_slot(void __rcu **slot, void *item, - struct radix_tree_node *node, int count, int exceptional) + struct radix_tree_node *node, int count, int values) { - if (node && (count || exceptional)) { + if (node && (count || values)) { node->count += count; - node->exceptional += exceptional; - replace_sibling_entries(node, slot, count, exceptional); + node->nr_values += values; + replace_sibling_entries(node, slot, count, values); } rcu_assign_pointer(*slot, item); @@ -1161,17 +1161,17 @@ void __radix_tree_replace(struct radix_tree_root *root, radix_tree_update_node_t update_node) { void *old = rcu_dereference_raw(*slot); - int exceptional = !!xa_is_value(item) - !!xa_is_value(old); + int values = !!xa_is_value(item) - !!xa_is_value(old); int count = calculate_count(root, node, slot, item, old); /* - * This function supports replacing exceptional entries and + * This function supports replacing value entries and * deleting entries, but that needs accounting against the * node unless the slot is root->xa_head. */ WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) && - (count || exceptional)); - replace_slot(slot, item, node, count, exceptional); + (count || values)); + replace_slot(slot, item, node, count, values); if (!node) return; @@ -1193,7 +1193,7 @@ void __radix_tree_replace(struct radix_tree_root *root, * across slot lookup and replacement. * * NOTE: This cannot be used to switch between non-entries (empty slots), - * regular entries, and exceptional entries, as that requires accounting + * regular entries, and value entries, as that requires accounting * inside the radix tree node. When switching from one type of entry or * deleting, use __radix_tree_lookup() and __radix_tree_replace() or * radix_tree_iter_replace(). @@ -1301,7 +1301,7 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index, rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY); } rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY); - parent->exceptional -= (end - offset); + parent->nr_values -= (end - offset); if (order == parent->shift) return 0; @@ -1961,7 +1961,7 @@ static bool __radix_tree_delete(struct radix_tree_root *root, struct radix_tree_node *node, void __rcu **slot) { void *old = rcu_dereference_raw(*slot); - int exceptional = xa_is_value(old) ? -1 : 0; + int values = xa_is_value(old) ? -1 : 0; unsigned offset = get_slot_offset(node, slot); int tag; @@ -1971,7 +1971,7 @@ static bool __radix_tree_delete(struct radix_tree_root *root, for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) node_tag_clear(root, node, tag, offset); - replace_slot(slot, NULL, node, -1, exceptional); + replace_slot(slot, NULL, node, -1, values); return node && delete_node(root, node, NULL); } diff --git a/mm/workingset.c b/mm/workingset.c index bb109b3afac2..c201cbb8c00f 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -349,7 +349,7 @@ void workingset_update_node(struct radix_tree_node *node) * already where they should be. The list_empty() test is safe * as node->private_list is protected by the i_pages lock. */ - if (node->count && node->count == node->exceptional) { + if (node->count && node->count == node->nr_values) { if (list_empty(&node->private_list)) list_lru_add(&shadow_nodes, &node->private_list); } else { @@ -428,8 +428,8 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, * to reclaim, take the node off-LRU, and drop the lru_lock. */ - node = container_of(item, struct radix_tree_node, private_list); - mapping = container_of(node->root, struct address_space, i_pages); + node = container_of(item, struct xa_node, private_list); + mapping = container_of(node->array, struct address_space, i_pages); /* Coming from the list, invert the lock order */ if (!xa_trylock(&mapping->i_pages)) { @@ -446,25 +446,25 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, * no pages, so we expect to be able to remove them all and * delete and free the empty node afterwards. */ - if (WARN_ON_ONCE(!node->exceptional)) + if (WARN_ON_ONCE(!node->nr_values)) goto out_invalid; - if (WARN_ON_ONCE(node->count != node->exceptional)) + if (WARN_ON_ONCE(node->count != node->nr_values)) goto out_invalid; for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { if (node->slots[i]) { if (WARN_ON_ONCE(!xa_is_value(node->slots[i]))) goto out_invalid; - if (WARN_ON_ONCE(!node->exceptional)) + if (WARN_ON_ONCE(!node->nr_values)) goto out_invalid; if (WARN_ON_ONCE(!mapping->nrexceptional)) goto out_invalid; node->slots[i] = NULL; - node->exceptional--; + node->nr_values--; node->count--; mapping->nrexceptional--; } } - if (WARN_ON_ONCE(node->exceptional)) + if (WARN_ON_ONCE(node->nr_values)) goto out_invalid; inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM); __radix_tree_delete_node(&mapping->i_pages, node, diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 080aea450430..60786fa55302 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -393,7 +393,7 @@ static void multiorder_join2(unsigned order1, unsigned order2) radix_tree_insert(&tree, 1 << order2, xa_mk_value(5)); item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL); assert(item2 == xa_mk_value(5)); - assert(node->exceptional == 1); + assert(node->nr_values == 1); item2 = radix_tree_lookup(&tree, 0); free(item2); @@ -401,7 +401,7 @@ static void multiorder_join2(unsigned order1, unsigned order2) radix_tree_join(&tree, 0, order1, item1); item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL); assert(item2 == item1); - assert(node->exceptional == 0); + assert(node->nr_values == 0); item_kill_tree(&tree); } @@ -409,7 +409,7 @@ static void multiorder_join2(unsigned order1, unsigned order2) * This test revealed an accounting bug for value entries at one point. * Nodes were being freed back into the pool with an elevated exception count * by radix_tree_join() and then radix_tree_split() was failing to zero the - * count of exceptional entries. + * count of value entries. */ static void multiorder_join3(unsigned int order) { @@ -433,7 +433,7 @@ static void multiorder_join3(unsigned int order) } __radix_tree_lookup(&tree, 0, &node, NULL); - assert(node->exceptional == node->count); + assert(node->nr_values == node->count); item_kill_tree(&tree); } @@ -520,7 +520,7 @@ static void __multiorder_split2(int old_order, int new_order) item = __radix_tree_lookup(&tree, 0, &node, NULL); assert(item == xa_mk_value(5)); - assert(node->exceptional > 0); + assert(node->nr_values > 0); radix_tree_split(&tree, 0, new_order); radix_tree_for_each_slot(slot, &tree, &iter, 0) { @@ -530,7 +530,7 @@ static void __multiorder_split2(int old_order, int new_order) item = __radix_tree_lookup(&tree, 0, &node, NULL); assert(item != xa_mk_value(5)); - assert(node->exceptional == 0); + assert(node->nr_values == 0); item_kill_tree(&tree); } @@ -547,7 +547,7 @@ static void __multiorder_split3(int old_order, int new_order) item = __radix_tree_lookup(&tree, 0, &node, NULL); assert(item == xa_mk_value(5)); - assert(node->exceptional > 0); + assert(node->nr_values > 0); radix_tree_split(&tree, 0, new_order); radix_tree_for_each_slot(slot, &tree, &iter, 0) { @@ -556,7 +556,7 @@ static void __multiorder_split3(int old_order, int new_order) item = __radix_tree_lookup(&tree, 0, &node, NULL); assert(item == xa_mk_value(7)); - assert(node->exceptional > 0); + assert(node->nr_values > 0); item_kill_tree(&tree); @@ -564,7 +564,7 @@ static void __multiorder_split3(int old_order, int new_order) item = __radix_tree_lookup(&tree, 0, &node, NULL); assert(item == xa_mk_value(5)); - assert(node->exceptional > 0); + assert(node->nr_values > 0); radix_tree_split(&tree, 0, new_order); radix_tree_for_each_slot(slot, &tree, &iter, 0) { @@ -577,13 +577,13 @@ static void __multiorder_split3(int old_order, int new_order) item = __radix_tree_lookup(&tree, 1 << new_order, &node, NULL); assert(item == xa_mk_value(7)); - assert(node->count == node->exceptional); + assert(node->count == node->nr_values); do { node = node->parent; if (!node) break; assert(node->count == 1); - assert(node->exceptional == 0); + assert(node->nr_values == 0); } while (1); item_kill_tree(&tree); @@ -611,15 +611,15 @@ static void multiorder_account(void) __radix_tree_insert(&tree, 1 << 5, 5, xa_mk_value(5)); __radix_tree_lookup(&tree, 0, &node, NULL); - assert(node->count == node->exceptional * 2); + assert(node->count == node->nr_values * 2); radix_tree_delete(&tree, 1 << 5); - assert(node->exceptional == 0); + assert(node->nr_values == 0); __radix_tree_insert(&tree, 1 << 5, 5, xa_mk_value(5)); __radix_tree_lookup(&tree, 1 << 5, &node, &slot); - assert(node->count == node->exceptional * 2); + assert(node->count == node->nr_values * 2); __radix_tree_replace(&tree, node, slot, NULL, NULL); - assert(node->exceptional == 0); + assert(node->nr_values == 0); item_kill_tree(&tree); } -- cgit v1.2.3 From a97e7904c0806309fd77103005bb7820c3f1c5e4 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 24 Nov 2017 14:24:59 -0500 Subject: mm: Convert workingset to XArray We construct an XA_STATE and use it to delete the node with xas_store() rather than adding a special function for this unique use case. Includes a test that simulates this usage for the test suite. Signed-off-by: Matthew Wilcox --- include/linux/swap.h | 9 -------- lib/test_xarray.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++ mm/workingset.c | 51 +++++++++++++++++------------------------ 3 files changed, 86 insertions(+), 39 deletions(-) (limited to 'mm/workingset.c') diff --git a/include/linux/swap.h b/include/linux/swap.h index 6ea50cf41b4c..112cebcf0402 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -306,15 +306,6 @@ void workingset_update_node(struct xa_node *node); xas_set_update(xas, workingset_update_node); \ } while (0) -/* Returns workingset_update_node() if the mapping has shadow entries. */ -#define workingset_lookup_update(mapping) \ -({ \ - radix_tree_update_node_t __helper = workingset_update_node; \ - if (dax_mapping(mapping) || shmem_mapping(mapping)) \ - __helper = NULL; \ - __helper; \ -}) - /* linux/mm/page_alloc.c */ extern unsigned long totalram_pages; extern unsigned long totalreserve_pages; diff --git a/lib/test_xarray.c b/lib/test_xarray.c index a752e6a37e6f..128c6489082f 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -863,6 +863,67 @@ static noinline void check_create_range(struct xarray *xa) check_create_range_3(); } +static LIST_HEAD(shadow_nodes); + +static void test_update_node(struct xa_node *node) +{ + if (node->count && node->count == node->nr_values) { + if (list_empty(&node->private_list)) + list_add(&shadow_nodes, &node->private_list); + } else { + if (!list_empty(&node->private_list)) + list_del_init(&node->private_list); + } +} + +static noinline void shadow_remove(struct xarray *xa) +{ + struct xa_node *node; + + xa_lock(xa); + while ((node = list_first_entry_or_null(&shadow_nodes, + struct xa_node, private_list))) { + XA_STATE(xas, node->array, 0); + XA_BUG_ON(xa, node->array != xa); + list_del_init(&node->private_list); + xas.xa_node = xa_parent_locked(node->array, node); + xas.xa_offset = node->offset; + xas.xa_shift = node->shift + XA_CHUNK_SHIFT; + xas_set_update(&xas, test_update_node); + xas_store(&xas, NULL); + } + xa_unlock(xa); +} + +static noinline void check_workingset(struct xarray *xa, unsigned long index) +{ + XA_STATE(xas, xa, index); + xas_set_update(&xas, test_update_node); + + do { + xas_lock(&xas); + xas_store(&xas, xa_mk_value(0)); + xas_next(&xas); + xas_store(&xas, xa_mk_value(1)); + xas_unlock(&xas); + } while (xas_nomem(&xas, GFP_KERNEL)); + + XA_BUG_ON(xa, list_empty(&shadow_nodes)); + + xas_lock(&xas); + xas_next(&xas); + xas_store(&xas, &xas); + XA_BUG_ON(xa, !list_empty(&shadow_nodes)); + + xas_store(&xas, xa_mk_value(2)); + xas_unlock(&xas); + XA_BUG_ON(xa, list_empty(&shadow_nodes)); + + shadow_remove(xa); + XA_BUG_ON(xa, !list_empty(&shadow_nodes)); + XA_BUG_ON(xa, !xa_empty(xa)); +} + static noinline void check_destroy(struct xarray *xa) { unsigned long index; @@ -916,6 +977,10 @@ static int xarray_checks(void) check_create_range(&array); check_store_iter(&array); + check_workingset(&array, 0); + check_workingset(&array, 64); + check_workingset(&array, 4096); + printk("XArray: %u of %u tests passed\n", tests_passed, tests_run); return (tests_run == tests_passed) ? 0 : -EINVAL; } diff --git a/mm/workingset.c b/mm/workingset.c index c201cbb8c00f..5cfb29ec3fd9 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -148,7 +148,7 @@ * and activations is maintained (node->inactive_age). * * On eviction, a snapshot of this counter (along with some bits to - * identify the node) is stored in the now empty page cache radix tree + * identify the node) is stored in the now empty page cache * slot of the evicted page. This is called a shadow entry. * * On cache misses for which there are shadow entries, an eligible @@ -162,7 +162,7 @@ /* * Eviction timestamps need to be able to cover the full range of - * actionable refaults. However, bits are tight in the radix tree + * actionable refaults. However, bits are tight in the xarray * entry, and after storing the identifier for the lruvec there might * not be enough left to represent every single actionable refault. In * that case, we have to sacrifice granularity for distance, and group @@ -339,7 +339,7 @@ out: static struct list_lru shadow_nodes; -void workingset_update_node(struct radix_tree_node *node) +void workingset_update_node(struct xa_node *node) { /* * Track non-empty nodes that contain only shadow entries; @@ -368,7 +368,7 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker, nodes = list_lru_shrink_count(&shadow_nodes, sc); /* - * Approximate a reasonable limit for the radix tree nodes + * Approximate a reasonable limit for the nodes * containing shadow entries. We don't need to keep more * shadow entries than possible pages on the active list, * since refault distances bigger than that are dismissed. @@ -383,11 +383,11 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker, * worst-case density of 1/8th. Below that, not all eligible * refaults can be detected anymore. * - * On 64-bit with 7 radix_tree_nodes per page and 64 slots + * On 64-bit with 7 xa_nodes per page and 64 slots * each, this will reclaim shadow entries when they consume * ~1.8% of available memory: * - * PAGE_SIZE / radix_tree_nodes / node_entries * 8 / PAGE_SIZE + * PAGE_SIZE / xa_nodes / node_entries * 8 / PAGE_SIZE */ if (sc->memcg) { cache = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid, @@ -396,7 +396,7 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker, cache = node_page_state(NODE_DATA(sc->nid), NR_ACTIVE_FILE) + node_page_state(NODE_DATA(sc->nid), NR_INACTIVE_FILE); } - max_nodes = cache >> (RADIX_TREE_MAP_SHIFT - 3); + max_nodes = cache >> (XA_CHUNK_SHIFT - 3); if (!nodes) return SHRINK_EMPTY; @@ -409,11 +409,11 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker, static enum lru_status shadow_lru_isolate(struct list_head *item, struct list_lru_one *lru, spinlock_t *lru_lock, - void *arg) + void *arg) __must_hold(lru_lock) { + struct xa_node *node = container_of(item, struct xa_node, private_list); + XA_STATE(xas, node->array, 0); struct address_space *mapping; - struct radix_tree_node *node; - unsigned int i; int ret; /* @@ -421,14 +421,13 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, * the shadow node LRU under the i_pages lock and the * lru_lock. Because the page cache tree is emptied before * the inode can be destroyed, holding the lru_lock pins any - * address_space that has radix tree nodes on the LRU. + * address_space that has nodes on the LRU. * * We can then safely transition to the i_pages lock to * pin only the address_space of the particular node we want * to reclaim, take the node off-LRU, and drop the lru_lock. */ - node = container_of(item, struct xa_node, private_list); mapping = container_of(node->array, struct address_space, i_pages); /* Coming from the list, invert the lock order */ @@ -450,25 +449,17 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, goto out_invalid; if (WARN_ON_ONCE(node->count != node->nr_values)) goto out_invalid; - for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { - if (node->slots[i]) { - if (WARN_ON_ONCE(!xa_is_value(node->slots[i]))) - goto out_invalid; - if (WARN_ON_ONCE(!node->nr_values)) - goto out_invalid; - if (WARN_ON_ONCE(!mapping->nrexceptional)) - goto out_invalid; - node->slots[i] = NULL; - node->nr_values--; - node->count--; - mapping->nrexceptional--; - } - } - if (WARN_ON_ONCE(node->nr_values)) - goto out_invalid; + mapping->nrexceptional -= node->nr_values; + xas.xa_node = xa_parent_locked(&mapping->i_pages, node); + xas.xa_offset = node->offset; + xas.xa_shift = node->shift + XA_CHUNK_SHIFT; + xas_set_update(&xas, workingset_update_node); + /* + * We could store a shadow entry here which was the minimum of the + * shadow entries we were tracking ... + */ + xas_store(&xas, NULL); inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM); - __radix_tree_delete_node(&mapping->i_pages, node, - workingset_lookup_update(mapping)); out_invalid: xa_unlock_irq(&mapping->i_pages); -- cgit v1.2.3