diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-23 10:05:41 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-23 10:05:41 -0700 |
commit | b3f391fddf3cfaadda59ec8da8fd17f4520bbf42 (patch) | |
tree | 42a950e8661c3f4a3ec2754190323a56a94f71c6 /lib | |
parent | f8ffbc365f703d74ecca8ca787318d05bbee2bf7 (diff) | |
parent | 025c55a4c7f11ea38521c6e797f3192ad8768c93 (diff) |
Merge tag 'bcachefs-2024-09-21' of git://evilpiepirate.org/bcachefs
Pull bcachefs updates from Kent Overstreet:
- rcu_pending, btree key cache rework: this solves lock contenting in
the key cache, eliminating the biggest source of the srcu lock hold
time warnings, and drastically improving performance on some metadata
heavy workloads - on multithreaded creates we're now 3-4x faster than
xfs.
- We're now using an rhashtable instead of the system inode hash table;
this is another significant performance improvement on multithreaded
metadata workloads, eliminating more lock contention.
- for_each_btree_key_in_subvolume_upto(): new helper for iterating over
keys within a specific subvolume, eliminating a lot of open coded
"subvolume_get_snapshot()" and also fixing another source of srcu
lock time warnings, by running each loop iteration in its own
transaction (as the existing for_each_btree_key() does).
- More work on btree_trans locking asserts; we now assert that we don't
hold btree node locks when trans->locked is false, which is important
because we don't use lockdep for tracking individual btree node
locks.
- Some cleanups and improvements in the bset.c btree node lookup code,
from Alan.
- Rework of btree node pinning, which we use in backpointers fsck. The
old hacky implementation, where the shrinker just skipped over nodes
in the pinned range, was causing OOMs; instead we now use another
shrinker with a much higher seeks number for pinned nodes.
- Rebalance now uses BCH_WRITE_ONLY_SPECIFIED_DEVS; this fixes an issue
where rebalance would sometimes fall back to allocating from the full
filesystem, which is not what we want when it's trying to move data
to a specific target.
- Use __GFP_ACCOUNT, GFP_RECLAIMABLE for btree node, key cache
allocations.
- Idmap mounts are now supported (Hongbo Li)
- Rename whiteouts are now supported (Hongbo Li)
- Erasure coding can now handle devices being marked as failed, or
forcibly removed. We still need the evacuate path for erasure coding,
but it's getting very close to ready for people to start using.
* tag 'bcachefs-2024-09-21' of git://evilpiepirate.org/bcachefs: (99 commits)
bcachefs: return err ptr instead of null in read sb clean
bcachefs: Remove duplicated include in backpointers.c
bcachefs: Don't drop devices with stripe pointers
bcachefs: bch2_ec_stripe_head_get() now checks for change in rw devices
bcachefs: bch_fs.rw_devs_change_count
bcachefs: bch2_dev_remove_stripes()
bcachefs: bch2_trigger_ptr() calculates sectors even when no device
bcachefs: improve error messages in bch2_ec_read_extent()
bcachefs: improve error message on too few devices for ec
bcachefs: improve bch2_new_stripe_to_text()
bcachefs: ec_stripe_head.nr_created
bcachefs: bch_stripe.disk_label
bcachefs: stripe_to_mem()
bcachefs: EIO errcode cleanup
bcachefs: Rework btree node pinning
bcachefs: split up btree cache counters for live, freeable
bcachefs: btree cache counters should be size_t
bcachefs: Don't count "skipped access bit" as touched in btree cache scan
bcachefs: Failed devices no longer require mounting in degraded mode
bcachefs: bch2_dev_rcu_noerror()
...
Diffstat (limited to 'lib')
-rw-r--r-- | lib/generic-radix-tree.c | 80 |
1 files changed, 6 insertions, 74 deletions
diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c index fa692c86f069..79e067b51488 100644 --- a/lib/generic-radix-tree.c +++ b/lib/generic-radix-tree.c @@ -5,99 +5,31 @@ #include <linux/gfp.h> #include <linux/kmemleak.h> -#define GENRADIX_ARY (GENRADIX_NODE_SIZE / sizeof(struct genradix_node *)) -#define GENRADIX_ARY_SHIFT ilog2(GENRADIX_ARY) - -struct genradix_node { - union { - /* Interior node: */ - struct genradix_node *children[GENRADIX_ARY]; - - /* Leaf: */ - u8 data[GENRADIX_NODE_SIZE]; - }; -}; - -static inline int genradix_depth_shift(unsigned depth) -{ - return GENRADIX_NODE_SHIFT + GENRADIX_ARY_SHIFT * depth; -} - -/* - * Returns size (of data, in bytes) that a tree of a given depth holds: - */ -static inline size_t genradix_depth_size(unsigned depth) -{ - return 1UL << genradix_depth_shift(depth); -} - -/* depth that's needed for a genradix that can address up to ULONG_MAX: */ -#define GENRADIX_MAX_DEPTH \ - DIV_ROUND_UP(BITS_PER_LONG - GENRADIX_NODE_SHIFT, GENRADIX_ARY_SHIFT) - -#define GENRADIX_DEPTH_MASK \ - ((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1)) - -static inline unsigned genradix_root_to_depth(struct genradix_root *r) -{ - return (unsigned long) r & GENRADIX_DEPTH_MASK; -} - -static inline struct genradix_node *genradix_root_to_node(struct genradix_root *r) -{ - return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK); -} - /* * Returns pointer to the specified byte @offset within @radix, or NULL if not * allocated */ void *__genradix_ptr(struct __genradix *radix, size_t offset) { - struct genradix_root *r = READ_ONCE(radix->root); - struct genradix_node *n = genradix_root_to_node(r); - unsigned level = genradix_root_to_depth(r); - - if (ilog2(offset) >= genradix_depth_shift(level)) - return NULL; - - while (1) { - if (!n) - return NULL; - if (!level) - break; - - level--; - - n = n->children[offset >> genradix_depth_shift(level)]; - offset &= genradix_depth_size(level) - 1; - } - - return &n->data[offset]; + return __genradix_ptr_inlined(radix, offset); } EXPORT_SYMBOL(__genradix_ptr); -static inline struct genradix_node *genradix_alloc_node(gfp_t gfp_mask) -{ - return kzalloc(GENRADIX_NODE_SIZE, gfp_mask); -} - -static inline void genradix_free_node(struct genradix_node *node) -{ - kfree(node); -} - /* * Returns pointer to the specified byte @offset within @radix, allocating it if * necessary - newly allocated slots are always zeroed out: */ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset, + struct genradix_node **preallocated, gfp_t gfp_mask) { struct genradix_root *v = READ_ONCE(radix->root); struct genradix_node *n, *new_node = NULL; unsigned level; + if (preallocated) + swap(new_node, *preallocated); + /* Increase tree depth if necessary: */ while (1) { struct genradix_root *r = v, *new_root; @@ -281,7 +213,7 @@ int __genradix_prealloc(struct __genradix *radix, size_t size, size_t offset; for (offset = 0; offset < size; offset += GENRADIX_NODE_SIZE) - if (!__genradix_ptr_alloc(radix, offset, gfp_mask)) + if (!__genradix_ptr_alloc(radix, offset, NULL, gfp_mask)) return -ENOMEM; return 0; |