From ee526b88caaa4b4182144bf2576af2c3b1e9c759 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 24 Oct 2023 14:46:58 -0400 Subject: closures: Fix race in closure_sync() As pointed out by Linus, closure_sync() was racy; we could skip blocking immediately after a get() and a put(), but then that would skip any barrier corresponding to the other thread's put() barrier. To fix this, always do the full __closure_sync() sequence whenever any get() has happened and the closure might have been used by other threads. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io-direct.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c index 6a9557e7ecab..5b42a76c4796 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -113,6 +113,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) } else { atomic_set(&dio->cl.remaining, CLOSURE_REMAINING_INITIALIZER + 1); + dio->cl.closure_get_happened = true; } dio->req = req; -- cgit v1.2.3 From 20e425d301d673dbd5df0c9d4b186c70b43813bb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 5 Feb 2023 16:18:59 -0500 Subject: six locks: Lock contended tracepoints Signed-off-by: Kent Overstreet --- fs/bcachefs/six.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c index b684b9f00c1b..b775cf0fb7cb 100644 --- a/fs/bcachefs/six.c +++ b/fs/bcachefs/six.c @@ -11,6 +11,8 @@ #include #include +#include + #include "six.h" #ifdef DEBUG @@ -462,11 +464,12 @@ static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type, smp_mb__after_atomic(); } + trace_contention_begin(lock, 0); + lock_contended(&lock->dep_map, ip); + if (six_optimistic_spin(lock, type)) goto out; - lock_contended(&lock->dep_map, ip); - wait->task = current; wait->lock_want = type; wait->lock_acquired = false; @@ -546,6 +549,7 @@ out: six_clear_bitmask(lock, SIX_LOCK_HELD_write); six_lock_wakeup(lock, atomic_read(&lock->state), SIX_LOCK_read); } + trace_contention_end(lock, 0); return ret; } -- cgit v1.2.3 From 0dd092bf1091a114f22136e5776aec21e6e4af2a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 19 Oct 2023 15:23:56 -0400 Subject: bcachefs: Fix lock ordering with snapshot_create_lock We must not hold btree locks while taking snapshot_create_lock - this fixes a lockdep splat. Signed-off-by: Kent Overstreet --- fs/bcachefs/snapshot.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 4982468bfe11..5a62d2e14cb4 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -1447,6 +1447,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) } } + bch2_trans_unlock(trans); down_write(&c->snapshot_create_lock); for_each_btree_key(trans, iter, BTREE_ID_snapshots, -- cgit v1.2.3 From b0b5bbf99fc269e10d01c2a9873de5a042bdc7f5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 19 Oct 2023 21:25:04 -0400 Subject: bcachefs: Don't run bch2_delete_dead_snapshots() unnecessarily Be a bit more careful about when bch2_delete_dead_snapshots needs to run: it only needs to run synchronously if we're running fsck, and it only needs to run at all if we have snapshot nodes to delete or if fsck has noticed that it needs to run. Also: Rename BCH_FS_HAVE_DELETED_SNAPSHOTS -> BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS Kill bch2_delete_dead_snapshots_hook(), move functionality to bch2_mark_snapshot() Factor out bch2_check_snapshot_needs_deletion(), to explicitly check if we need to be running snapshot deletion. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 2 +- fs/bcachefs/fsck.c | 1 + fs/bcachefs/recovery.c | 2 +- fs/bcachefs/recovery_types.h | 2 +- fs/bcachefs/snapshot.c | 77 ++++++++++++++++++++++++++------------------ fs/bcachefs/snapshot.h | 2 -- fs/bcachefs/subvolume.c | 19 ++--------- 7 files changed, 51 insertions(+), 54 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 53ffa88cae16..9863571feebf 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -578,7 +578,7 @@ enum { BCH_FS_INITIAL_GC_UNFIXED, /* kill when we enumerate fsck errors */ BCH_FS_NEED_ANOTHER_GC, - BCH_FS_HAVE_DELETED_SNAPSHOTS, + BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, /* errors: */ BCH_FS_ERROR, diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index b8f9e7475dc5..8d79fc8c690b 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -447,6 +447,7 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, bch2_btree_ids[btree_id], pos.inode, pos.offset, i->id, n.id, n.equiv); + set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags); return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_delete_dead_snapshots); } } diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 4cd660650e5b..1ad12ae38053 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -901,7 +901,7 @@ out: } kfree(clean); - if (!ret && test_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags)) { + if (!ret && test_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags)) { bch2_fs_read_write_early(c); bch2_delete_dead_snapshots_async(c); } diff --git a/fs/bcachefs/recovery_types.h b/fs/bcachefs/recovery_types.h index fbfa9d831d6f..4c1cea2a601d 100644 --- a/fs/bcachefs/recovery_types.h +++ b/fs/bcachefs/recovery_types.h @@ -27,7 +27,7 @@ x(check_snapshot_trees, PASS_FSCK) \ x(check_snapshots, PASS_FSCK) \ x(check_subvols, PASS_FSCK) \ - x(delete_dead_snapshots, PASS_FSCK|PASS_UNCLEAN) \ + x(delete_dead_snapshots, PASS_FSCK) \ x(fs_upgrade_for_subvolumes, 0) \ x(resume_logged_ops, PASS_ALWAYS) \ x(check_inodes, PASS_FSCK) \ diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 5a62d2e14cb4..315e88cc3867 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -325,8 +325,9 @@ int bch2_mark_snapshot(struct btree_trans *trans, __set_is_ancestor_bitmap(c, id); if (BCH_SNAPSHOT_DELETED(s.v)) { - set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); - c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_delete_dead_snapshots); + set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags); + if (c->curr_recovery_pass > BCH_RECOVERY_PASS_delete_dead_snapshots) + bch2_delete_dead_snapshots_async(c); } } else { memset(t, 0, sizeof(*t)); @@ -1251,13 +1252,7 @@ static int move_key_to_correct_snapshot(struct btree_trans *trans, return 0; } -/* - * For a given snapshot, if it doesn't have a subvolume that points to it, and - * it doesn't have child snapshot nodes - it's now redundant and we can mark it - * as deleted. - */ -static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct btree_iter *iter, - struct bkey_s_c k) +static int bch2_snapshot_needs_delete(struct btree_trans *trans, struct bkey_s_c k) { struct bkey_s_c_snapshot snap; u32 children[2]; @@ -1278,10 +1273,21 @@ static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct btre bch2_snapshot_live(trans, children[1]); if (ret < 0) return ret; + return !ret; +} - if (!ret) - return bch2_snapshot_node_set_deleted(trans, k.k->p.offset); - return 0; +/* + * For a given snapshot, if it doesn't have a subvolume that points to it, and + * it doesn't have child snapshot nodes - it's now redundant and we can mark it + * as deleted. + */ +static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct bkey_s_c k) +{ + int ret = bch2_snapshot_needs_delete(trans, k); + + return ret <= 0 + ? ret + : bch2_snapshot_node_set_deleted(trans, k.k->p.offset); } static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n, @@ -1369,6 +1375,9 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) u32 *i, id; int ret = 0; + if (!test_and_clear_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags)) + return 0; + if (!test_bit(BCH_FS_STARTED, &c->flags)) { ret = bch2_fs_read_write_early(c); if (ret) { @@ -1386,7 +1395,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, NULL, NULL, 0, - bch2_delete_redundant_snapshot(trans, &iter, k)); + bch2_delete_redundant_snapshot(trans, k)); if (ret) { bch_err_msg(c, ret, "deleting redundant snapshots"); goto err; @@ -1492,8 +1501,6 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) goto err_create_lock; } } - - clear_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); err_create_lock: up_write(&c->snapshot_create_lock); err: @@ -1509,8 +1516,7 @@ void bch2_delete_dead_snapshots_work(struct work_struct *work) { struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work); - if (test_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags)) - bch2_delete_dead_snapshots(c); + bch2_delete_dead_snapshots(c); bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots); } @@ -1521,20 +1527,6 @@ void bch2_delete_dead_snapshots_async(struct bch_fs *c) bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots); } -int bch2_delete_dead_snapshots_hook(struct btree_trans *trans, - struct btree_trans_commit_hook *h) -{ - struct bch_fs *c = trans->c; - - set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); - - if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_delete_dead_snapshots) - return 0; - - bch2_delete_dead_snapshots_async(c); - return 0; -} - int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, enum btree_id id, struct bpos pos) @@ -1665,6 +1657,26 @@ again: return ret ?: trans_was_restarted(trans, restart_count); } +static int bch2_check_snapshot_needs_deletion(struct btree_trans *trans, struct bkey_s_c k) +{ + struct bch_fs *c = trans->c; + struct bkey_s_c_snapshot snap; + int ret = 0; + + if (k.k->type != KEY_TYPE_snapshot) + return 0; + + snap = bkey_s_c_to_snapshot(k); + if (BCH_SNAPSHOT_DELETED(snap.v) || + bch2_snapshot_equiv(c, k.k->p.offset) != k.k->p.offset || + (ret = bch2_snapshot_needs_delete(trans, k)) > 0) { + set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags); + return 0; + } + + return ret; +} + int bch2_snapshots_read(struct bch_fs *c) { struct btree_iter iter; @@ -1675,7 +1687,8 @@ int bch2_snapshots_read(struct bch_fs *c) for_each_btree_key2(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?: - bch2_snapshot_set_equiv(trans, k)) ?: + bch2_snapshot_set_equiv(trans, k) ?: + bch2_check_snapshot_needs_deletion(trans, k)) ?: for_each_btree_key2(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, (set_is_ancestor_bitmap(c, k.k->p.offset), 0))); diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h index de215d9d1252..01f006cac831 100644 --- a/fs/bcachefs/snapshot.h +++ b/fs/bcachefs/snapshot.h @@ -244,8 +244,6 @@ int bch2_check_snapshot_trees(struct bch_fs *); int bch2_check_snapshots(struct bch_fs *); int bch2_snapshot_node_set_deleted(struct btree_trans *, u32); -int bch2_delete_dead_snapshots_hook(struct btree_trans *, - struct btree_trans_commit_hook *); void bch2_delete_dead_snapshots_work(struct work_struct *); int __bch2_key_has_snapshot_overwrites(struct btree_trans *, enum btree_id, struct bpos); diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index caf2dd7dafff..73ba22c219a1 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -230,7 +230,6 @@ static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) { struct btree_iter iter; struct bkey_s_c_subvolume subvol; - struct btree_trans_commit_hook *h; u32 snapid; int ret = 0; @@ -246,22 +245,8 @@ static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) snapid = le32_to_cpu(subvol.v->snapshot); - ret = bch2_btree_delete_at(trans, &iter, 0); - if (ret) - goto err; - - ret = bch2_snapshot_node_set_deleted(trans, snapid); - if (ret) - goto err; - - h = bch2_trans_kmalloc(trans, sizeof(*h)); - ret = PTR_ERR_OR_ZERO(h); - if (ret) - goto err; - - h->fn = bch2_delete_dead_snapshots_hook; - bch2_trans_commit_hook(trans, h); -err: + ret = bch2_btree_delete_at(trans, &iter, 0) ?: + bch2_snapshot_node_set_deleted(trans, snapid); bch2_trans_iter_exit(trans, &iter); return ret; } -- cgit v1.2.3 From 88dfe193bd2abd08926c1a0d48b770bb68ac8ccb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 19 Oct 2023 22:49:08 -0400 Subject: bcachefs: bch2_btree_id_str() Since we can run with unknown btree IDs, we can't directly index btree IDs into fixed size arrays. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 6 +++--- fs/bcachefs/backpointers.c | 4 ++-- fs/bcachefs/bbpos.h | 3 ++- fs/bcachefs/bkey_methods.c | 3 ++- fs/bcachefs/btree_cache.c | 21 +++++++++++++++++---- fs/bcachefs/btree_cache.h | 5 +++-- fs/bcachefs/btree_gc.c | 20 ++++++++++---------- fs/bcachefs/btree_io.c | 18 ++++-------------- fs/bcachefs/btree_iter.c | 14 +++++++------- fs/bcachefs/btree_key_cache.c | 4 ++-- fs/bcachefs/btree_trans_commit.c | 2 +- fs/bcachefs/debug.c | 8 ++++---- fs/bcachefs/fsck.c | 5 +++-- fs/bcachefs/journal_io.c | 2 +- fs/bcachefs/move.c | 2 +- fs/bcachefs/opts.c | 3 +-- fs/bcachefs/opts.h | 2 +- fs/bcachefs/recovery.c | 6 +++--- fs/bcachefs/sysfs.c | 2 +- fs/bcachefs/trace.h | 12 ++++++------ 20 files changed, 74 insertions(+), 68 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 2d516207e223..455ee0b47f31 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -727,7 +727,7 @@ static int bch2_bucket_do_index(struct btree_trans *trans, "incorrect key when %s %s:%llu:%llu:0 (got %s should be %s)\n" " for %s", set ? "setting" : "clearing", - bch2_btree_ids[btree], + bch2_btree_id_str(btree), iter.pos.inode, iter.pos.offset, bch2_bkey_types[old.k->type], @@ -1245,7 +1245,7 @@ static noinline_for_stack int __bch2_check_discard_freespace_key(struct btree_tr if (fsck_err_on(!bch2_dev_bucket_exists(c, pos), c, "entry in %s btree for nonexistant dev:bucket %llu:%llu", - bch2_btree_ids[iter->btree_id], pos.inode, pos.offset)) + bch2_btree_id_str(iter->btree_id), pos.inode, pos.offset)) goto delete; a = bch2_alloc_to_v4(alloc_k, &a_convert); @@ -1255,7 +1255,7 @@ static noinline_for_stack int __bch2_check_discard_freespace_key(struct btree_tr genbits != alloc_freespace_genbits(*a)), c, "%s\n incorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)", (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf), - bch2_btree_ids[iter->btree_id], + bch2_btree_id_str(iter->btree_id), iter->pos.inode, iter->pos.offset, a->data_type == state, diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index cc856150a948..e74295c21a03 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -55,7 +55,7 @@ int bch2_backpointer_invalid(const struct bch_fs *c, struct bkey_s_c k, void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp) { prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=", - bch2_btree_ids[bp->btree_id], + bch2_btree_id_str(bp->btree_id), bp->level, (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT), (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), @@ -453,7 +453,7 @@ fsck_err: return ret; missing: prt_printf(&buf, "missing backpointer for btree=%s l=%u ", - bch2_btree_ids[bp.btree_id], bp.level); + bch2_btree_id_str(bp.btree_id), bp.level); bch2_bkey_val_to_text(&buf, c, orig_k); prt_printf(&buf, "\nbp pos "); bch2_bpos_to_text(&buf, bp_iter.pos); diff --git a/fs/bcachefs/bbpos.h b/fs/bcachefs/bbpos.h index 1fbed1f8378d..0038bc28ba8c 100644 --- a/fs/bcachefs/bbpos.h +++ b/fs/bcachefs/bbpos.h @@ -3,6 +3,7 @@ #define _BCACHEFS_BBPOS_H #include "bkey_methods.h" +#include "btree_cache.h" struct bbpos { enum btree_id btree; @@ -40,7 +41,7 @@ static inline struct bbpos bbpos_successor(struct bbpos pos) static inline void bch2_bbpos_to_text(struct printbuf *out, struct bbpos pos) { - prt_str(out, bch2_btree_ids[pos.btree]); + prt_str(out, bch2_btree_id_str(pos.btree)); prt_char(out, ':'); bch2_bpos_to_text(out, pos.pos); } diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index d9fb1fc81f1e..d9711a27a71e 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -3,6 +3,7 @@ #include "bcachefs.h" #include "backpointers.h" #include "bkey_methods.h" +#include "btree_cache.h" #include "btree_types.h" #include "alloc_background.h" #include "dirent.h" @@ -164,7 +165,7 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, if (flags & BKEY_INVALID_COMMIT && !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type))) { prt_printf(err, "invalid key type for btree %s (%s)", - bch2_btree_ids[type], bch2_bkey_types[k.k->type]); + bch2_btree_id_str(type), bch2_bkey_types[k.k->type]); return -BCH_ERR_invalid_bkey; } diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 82cf243aa288..864d1755c058 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -783,12 +783,12 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) "btree node header doesn't match ptr\n" "btree %s level %u\n" "ptr: ", - bch2_btree_ids[b->c.btree_id], b->c.level); + bch2_btree_id_str(b->c.btree_id), b->c.level); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); prt_printf(&buf, "\nheader: btree %s level %llu\n" "min ", - bch2_btree_ids[BTREE_NODE_ID(b->data)], + bch2_btree_id_str(BTREE_NODE_ID(b->data)), BTREE_NODE_LEVEL(b->data)); bch2_bpos_to_text(&buf, b->data->min_key); @@ -1151,8 +1151,21 @@ wait_on_io: six_unlock_intent(&b->c.lock); } -void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, - const struct btree *b) +const char *bch2_btree_id_str(enum btree_id btree) +{ + return btree < BTREE_ID_NR ? __bch2_btree_ids[btree] : "(unknown)"; +} + +void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) +{ + prt_printf(out, "%s level %u/%u\n ", + bch2_btree_id_str(b->c.btree_id), + b->c.level, + bch2_btree_id_root(c, b->c.btree_id)->level); + bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key)); +} + +void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) { struct bset_stats stats; diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index 1e562b6efa62..cfb80b201d61 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -123,8 +123,9 @@ static inline struct btree *btree_node_root(struct bch_fs *c, struct btree *b) return bch2_btree_id_root(c, b->c.btree_id)->b; } -void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, - const struct btree *); +const char *bch2_btree_id_str(enum btree_id); +void bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, const struct btree *); +void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, const struct btree *); void bch2_btree_cache_to_text(struct printbuf *, const struct bch_fs *); #endif /* _BCACHEFS_BTREE_CACHE_H */ diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 693ed067b1a7..8cfd2edd1b08 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -101,7 +101,7 @@ static int bch2_gc_check_topology(struct bch_fs *c, "btree node with incorrect min_key at btree %s level %u:\n" " prev %s\n" " cur %s", - bch2_btree_ids[b->c.btree_id], b->c.level, + bch2_btree_id_str(b->c.btree_id), b->c.level, buf1.buf, buf2.buf) && should_restart_for_topology_repair(c)) { bch_info(c, "Halting mark and sweep to start topology repair pass"); @@ -129,7 +129,7 @@ static int bch2_gc_check_topology(struct bch_fs *c, "btree node with incorrect max_key at btree %s level %u:\n" " %s\n" " expected %s", - bch2_btree_ids[b->c.btree_id], b->c.level, + bch2_btree_id_str(b->c.btree_id), b->c.level, buf1.buf, buf2.buf) && should_restart_for_topology_repair(c)) { bch_info(c, "Halting mark and sweep to start topology repair pass"); @@ -290,7 +290,7 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b, "btree node overwritten by next node at btree %s level %u:\n" " node %s\n" " next %s", - bch2_btree_ids[b->c.btree_id], b->c.level, + bch2_btree_id_str(b->c.btree_id), b->c.level, buf1.buf, buf2.buf)) { ret = DROP_PREV_NODE; goto out; @@ -301,7 +301,7 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b, "btree node with incorrect max_key at btree %s level %u:\n" " node %s\n" " next %s", - bch2_btree_ids[b->c.btree_id], b->c.level, + bch2_btree_id_str(b->c.btree_id), b->c.level, buf1.buf, buf2.buf)) ret = set_node_max(c, prev, bpos_predecessor(cur->data->min_key)); @@ -313,7 +313,7 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b, "btree node overwritten by prev node at btree %s level %u:\n" " prev %s\n" " node %s", - bch2_btree_ids[b->c.btree_id], b->c.level, + bch2_btree_id_str(b->c.btree_id), b->c.level, buf1.buf, buf2.buf)) { ret = DROP_THIS_NODE; goto out; @@ -323,7 +323,7 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b, "btree node with incorrect min_key at btree %s level %u:\n" " prev %s\n" " node %s", - bch2_btree_ids[b->c.btree_id], b->c.level, + bch2_btree_id_str(b->c.btree_id), b->c.level, buf1.buf, buf2.buf)) ret = set_node_min(c, cur, expected_start); } @@ -347,7 +347,7 @@ static int btree_repair_node_end(struct bch_fs *c, struct btree *b, "btree node with incorrect max_key at btree %s level %u:\n" " %s\n" " expected %s", - bch2_btree_ids[b->c.btree_id], b->c.level, + bch2_btree_id_str(b->c.btree_id), b->c.level, buf1.buf, buf2.buf)) { ret = set_node_max(c, child, b->key.k.p); if (ret) @@ -398,7 +398,7 @@ again: if (mustfix_fsck_err_on(ret == -EIO, c, "Topology repair: unreadable btree node at btree %s level %u:\n" " %s", - bch2_btree_ids[b->c.btree_id], + bch2_btree_id_str(b->c.btree_id), b->c.level - 1, buf.buf)) { bch2_btree_node_evict(trans, cur_k.k); @@ -506,7 +506,7 @@ again: if (mustfix_fsck_err_on(!have_child, c, "empty interior btree node at btree %s level %u\n" " %s", - bch2_btree_ids[b->c.btree_id], + bch2_btree_id_str(b->c.btree_id), b->c.level, buf.buf)) ret = DROP_THIS_NODE; err: @@ -970,7 +970,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b FSCK_NO_RATELIMIT, "Unreadable btree node at btree %s level %u:\n" " %s", - bch2_btree_ids[b->c.btree_id], + bch2_btree_id_str(b->c.btree_id), b->c.level - 1, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur.k)), buf.buf)) && diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index a869cf6ac7c6..7bf3ee25bc32 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -510,16 +510,6 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b) bch2_trans_node_reinit_iter(trans, b); } -static void btree_pos_to_text(struct printbuf *out, struct bch_fs *c, - struct btree *b) -{ - prt_printf(out, "%s level %u/%u\n ", - bch2_btree_ids[b->c.btree_id], - b->c.level, - bch2_btree_id_root(c, b->c.btree_id)->level); - bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key)); -} - static void btree_err_msg(struct printbuf *out, struct bch_fs *c, struct bch_dev *ca, struct btree *b, struct bset *i, @@ -532,7 +522,7 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c, if (ca) prt_printf(out, "on %s ", ca->name); prt_printf(out, "at btree "); - btree_pos_to_text(out, c, b); + bch2_btree_pos_to_text(out, c, b); prt_printf(out, "\n node offset %u", b->written); if (i) @@ -1177,7 +1167,7 @@ static void btree_node_read_work(struct work_struct *work) } start: printbuf_reset(&buf); - btree_pos_to_text(&buf, c, b); + bch2_btree_pos_to_text(&buf, c, b); bch2_dev_io_err_on(bio->bi_status, ca, "btree read error %s for %s", bch2_blk_status_to_str(bio->bi_status), buf.buf); if (rb->have_ioref) @@ -1213,7 +1203,7 @@ start: printbuf_reset(&buf); bch2_bpos_to_text(&buf, b->key.k.p); bch_info(c, "%s: rewriting btree node at btree=%s level=%u %s due to error", - __func__, bch2_btree_ids[b->c.btree_id], b->c.level, buf.buf); + __func__, bch2_btree_id_str(b->c.btree_id), b->c.level, buf.buf); bch2_btree_node_rewrite_async(c, b); } @@ -1524,7 +1514,7 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b, struct printbuf buf = PRINTBUF; prt_str(&buf, "btree node read error: no device to read from\n at "); - btree_pos_to_text(&buf, c, b); + bch2_btree_pos_to_text(&buf, c, b); bch_err(c, "%s", buf.buf); if (c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology) && diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 1d79514754d7..3b629420655a 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -362,7 +362,7 @@ void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id, bch2_bpos_to_text(&buf, pos); panic("not locked: %s %s%s\n", - bch2_btree_ids[id], buf.buf, + bch2_btree_id_str(id), buf.buf, key_cache ? " cached" : ""); } @@ -1371,7 +1371,7 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) struct bkey_s_c old = { &i->old_k, i->old_v }; prt_printf(buf, "update: btree=%s cached=%u %pS", - bch2_btree_ids[i->btree_id], + bch2_btree_id_str(i->btree_id), i->cached, (void *) i->ip_allocated); prt_newline(buf); @@ -1387,7 +1387,7 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) trans_for_each_wb_update(trans, wb) { prt_printf(buf, "update: btree=%s wb=1 %pS", - bch2_btree_ids[wb->btree], + bch2_btree_id_str(wb->btree), (void *) i->ip_allocated); prt_newline(buf); @@ -1416,7 +1416,7 @@ void bch2_btree_path_to_text(struct printbuf *out, struct btree_path *path) path->idx, path->ref, path->intent_ref, path->preserve ? 'P' : ' ', path->should_be_locked ? 'S' : ' ', - bch2_btree_ids[path->btree_id], + bch2_btree_id_str(path->btree_id), path->level); bch2_bpos_to_text(out, path->pos); @@ -3025,7 +3025,7 @@ leaked: trans_for_each_path(trans, path) if (path->ref) printk(KERN_ERR " btree %s %pS\n", - bch2_btree_ids[path->btree_id], + bch2_btree_id_str(path->btree_id), (void *) path->ip_allocated); /* Be noisy about this: */ bch2_fatal_error(c); @@ -3100,7 +3100,7 @@ bch2_btree_bkey_cached_common_to_text(struct printbuf *out, prt_tab(out); prt_printf(out, "%px %c l=%u %s:", b, b->cached ? 'c' : 'b', - b->level, bch2_btree_ids[b->btree_id]); + b->level, bch2_btree_id_str(b->btree_id)); bch2_bpos_to_text(out, btree_node_pos(b)); prt_tab(out); @@ -3130,7 +3130,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) path->idx, path->cached ? 'c' : 'b', path->level, - bch2_btree_ids[path->btree_id]); + bch2_btree_id_str(path->btree_id)); bch2_bpos_to_text(out, path->pos); prt_newline(out); diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 29a0b566a4fe..c781ce6f389c 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -324,7 +324,7 @@ btree_key_cache_create(struct btree_trans *trans, struct btree_path *path) ck = bkey_cached_reuse(bc); if (unlikely(!ck)) { bch_err(c, "error allocating memory for key cache item, btree %s", - bch2_btree_ids[path->btree_id]); + bch2_btree_id_str(path->btree_id)); return ERR_PTR(-BCH_ERR_ENOMEM_btree_key_cache_create); } @@ -407,7 +407,7 @@ static int btree_key_cache_fill(struct btree_trans *trans, new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL); if (!new_k) { bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u", - bch2_btree_ids[ck->key.btree_id], new_u64s); + bch2_btree_id_str(ck->key.btree_id), new_u64s); ret = -BCH_ERR_ENOMEM_btree_key_cache_fill; goto err; } diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 04c1f4610972..5fb0903e5bd2 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -349,7 +349,7 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOFS); if (!new_k) { bch_err(c, "error allocating memory for key cache key, btree %s u64s %u", - bch2_btree_ids[path->btree_id], new_u64s); + bch2_btree_id_str(path->btree_id), new_u64s); return -BCH_ERR_ENOMEM_btree_key_cache_insert; } diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 75a3dc7cbd47..57c5128db173 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -517,7 +517,7 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs * prt_printf(out, "%px btree=%s l=%u ", b, - bch2_btree_ids[b->c.btree_id], + bch2_btree_id_str(b->c.btree_id), b->c.level); prt_newline(out); @@ -919,18 +919,18 @@ void bch2_fs_debug_init(struct bch_fs *c) bd < c->btree_debug + ARRAY_SIZE(c->btree_debug); bd++) { bd->id = bd - c->btree_debug; - debugfs_create_file(bch2_btree_ids[bd->id], + debugfs_create_file(bch2_btree_id_str(bd->id), 0400, c->btree_debug_dir, bd, &btree_debug_ops); snprintf(name, sizeof(name), "%s-formats", - bch2_btree_ids[bd->id]); + bch2_btree_id_str(bd->id)); debugfs_create_file(name, 0400, c->btree_debug_dir, bd, &btree_format_debug_ops); snprintf(name, sizeof(name), "%s-bfloat-failed", - bch2_btree_ids[bd->id]); + bch2_btree_id_str(bd->id)); debugfs_create_file(name, 0400, c->btree_debug_dir, bd, &bfloat_failed_debug_ops); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 8d79fc8c690b..f26b824e70a8 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -2,6 +2,7 @@ #include "bcachefs.h" #include "bkey_buf.h" +#include "btree_cache.h" #include "btree_update.h" #include "buckets.h" #include "darray.h" @@ -444,7 +445,7 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, if (i->equiv == n.equiv) { bch_err(c, "snapshot deletion did not finish:\n" " duplicate keys in btree %s at %llu:%llu snapshots %u, %u (equiv %u)\n", - bch2_btree_ids[btree_id], + bch2_btree_id_str(btree_id), pos.inode, pos.offset, i->id, n.id, n.equiv); set_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags); @@ -809,7 +810,7 @@ out: return ret; bad_hash: if (fsck_err(c, "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s", - bch2_btree_ids[desc.btree_id], hash_k.k->p.inode, hash_k.k->p.offset, hash, + bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 6a3d6a374e9c..b29ece313e44 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -369,7 +369,7 @@ static void journal_entry_btree_keys_to_text(struct printbuf *out, struct bch_fs prt_newline(out); prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]); } - prt_printf(out, "btree=%s l=%u ", bch2_btree_ids[entry->btree_id], entry->level); + prt_printf(out, "btree=%s l=%u ", bch2_btree_id_str(entry->btree_id), entry->level); bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k)); first = false; } diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 39a14e321680..82f60c7883ba 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -1110,7 +1110,7 @@ static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, str prt_printf(out, " data type %s btree_id %s position: ", bch2_data_types[stats->data_type], - bch2_btree_ids[stats->btree_id]); + bch2_btree_id_str(stats->btree_id)); bch2_bpos_to_text(out, stats->pos); prt_newline(out); printbuf_indent_add(out, 2); diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c index 232f50c73a94..8294f56e45d5 100644 --- a/fs/bcachefs/opts.c +++ b/fs/bcachefs/opts.c @@ -42,9 +42,8 @@ const char * const bch2_sb_compat[] = { NULL }; -const char * const bch2_btree_ids[] = { +const char * const __bch2_btree_ids[] = { BCH_BTREE_IDS() - "interior btree node", NULL }; diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 55014336c5f7..16dd0f0622bc 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -16,7 +16,7 @@ extern const char * const bch2_fsck_fix_opts[]; extern const char * const bch2_version_upgrade_opts[]; extern const char * const bch2_sb_features[]; extern const char * const bch2_sb_compat[]; -extern const char * const bch2_btree_ids[]; +extern const char * const __bch2_btree_ids[]; extern const char * const bch2_csum_types[]; extern const char * const bch2_csum_opts[]; extern const char * const bch2_compression_types[]; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 1ad12ae38053..55663253c9d3 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -182,7 +182,7 @@ static int bch2_journal_replay(struct bch_fs *c) bch2_journal_replay_key(trans, k)); if (ret) { bch_err(c, "journal replay: error while replaying key at btree %s level %u: %s", - bch2_btree_ids[k->btree_id], k->level, bch2_err_str(ret)); + bch2_btree_id_str(k->btree_id), k->level, bch2_err_str(ret)); goto err; } } @@ -367,7 +367,7 @@ static int read_btree_roots(struct bch_fs *c) __fsck_err(c, btree_id_is_alloc(i) ? FSCK_CAN_IGNORE : 0, "invalid btree root %s", - bch2_btree_ids[i]); + bch2_btree_id_str(i)); if (i == BTREE_ID_alloc) c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); } @@ -376,7 +376,7 @@ static int read_btree_roots(struct bch_fs *c) if (ret) { fsck_err(c, "error reading btree root %s", - bch2_btree_ids[i]); + bch2_btree_id_str(i)); if (btree_id_is_alloc(i)) c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); ret = 0; diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index eb764b9a4629..5b079369af95 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -341,7 +341,7 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c) { - prt_printf(out, "%s: ", bch2_btree_ids[c->gc_gens_btree]); + prt_printf(out, "%s: ", bch2_btree_id_str(c->gc_gens_btree)); bch2_bpos_to_text(out, c->gc_gens_pos); prt_printf(out, "\n"); } diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 19264492151b..2308f49f3b2e 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -68,7 +68,7 @@ DECLARE_EVENT_CLASS(btree_node, TP_printk("%d,%d %u %s %llu:%llu:%u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->level, - bch2_btree_ids[__entry->btree_id], + bch2_btree_id_str(__entry->btree_id), __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot) ); @@ -461,7 +461,7 @@ TRACE_EVENT(btree_path_relock_fail, TP_printk("%s %pS btree %s pos %llu:%llu:%u level %u node %s held %u:%u lock count %u:%u iter seq %u lock seq %u", __entry->trans_fn, (void *) __entry->caller_ip, - bch2_btree_ids[__entry->btree_id], + bch2_btree_id_str(__entry->btree_id), __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot, @@ -522,7 +522,7 @@ TRACE_EVENT(btree_path_upgrade_fail, TP_printk("%s %pS btree %s pos %llu:%llu:%u level %u locked %u held %u:%u lock count %u:%u iter seq %u lock seq %u", __entry->trans_fn, (void *) __entry->caller_ip, - bch2_btree_ids[__entry->btree_id], + bch2_btree_id_str(__entry->btree_id), __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot, @@ -1012,7 +1012,7 @@ DECLARE_EVENT_CLASS(transaction_restart_iter, TP_printk("%s %pS btree %s pos %llu:%llu:%u", __entry->trans_fn, (void *) __entry->caller_ip, - bch2_btree_ids[__entry->btree_id], + bch2_btree_id_str(__entry->btree_id), __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot) @@ -1061,7 +1061,7 @@ TRACE_EVENT(trans_restart_upgrade, TP_printk("%s %pS btree %s pos %llu:%llu:%u locks_want %u -> %u", __entry->trans_fn, (void *) __entry->caller_ip, - bch2_btree_ids[__entry->btree_id], + bch2_btree_id_str(__entry->btree_id), __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot, @@ -1219,7 +1219,7 @@ TRACE_EVENT(trans_restart_key_cache_key_realloced, TP_printk("%s %pS btree %s pos %llu:%llu:%u old_u64s %u new_u64s %u", __entry->trans_fn, (void *) __entry->caller_ip, - bch2_btree_ids[__entry->btree_id], + bch2_btree_id_str(__entry->btree_id), __entry->pos_inode, __entry->pos_offset, __entry->pos_snapshot, -- cgit v1.2.3 From 50a38ca1baace3dc66027ad41393917b05318b14 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 20 Oct 2023 00:01:53 -0400 Subject: bcachefs: Fix btree_node_type enum More forwards compatibility fixups: having BKEY_TYPE_btree at the end of the enum conflicts with unnkown btree IDs, this shifts BKEY_TYPE_btree to slot 0 and fixes things up accordingly. Signed-off-by: Kent Overstreet --- fs/bcachefs/bkey_methods.c | 22 ++++++++++++++++------ fs/bcachefs/btree_iter.h | 2 +- fs/bcachefs/btree_trans_commit.c | 11 +++++------ fs/bcachefs/btree_types.h | 35 +++++++++++++++++++---------------- 4 files changed, 41 insertions(+), 29 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index d9711a27a71e..baf491878bf1 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -143,15 +143,20 @@ int bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k, } static u64 bch2_key_types_allowed[] = { -#define x(name, nr, flags, keys) [BKEY_TYPE_##name] = BIT_ULL(KEY_TYPE_deleted)|keys, - BCH_BTREE_IDS() -#undef x [BKEY_TYPE_btree] = BIT_ULL(KEY_TYPE_deleted)| BIT_ULL(KEY_TYPE_btree_ptr)| BIT_ULL(KEY_TYPE_btree_ptr_v2), +#define x(name, nr, flags, keys) [BKEY_TYPE_##name] = BIT_ULL(KEY_TYPE_deleted)|keys, + BCH_BTREE_IDS() +#undef x }; +const char *bch2_btree_node_type_str(enum btree_node_type type) +{ + return type == BKEY_TYPE_btree ? "internal btree node" : bch2_btree_id_str(type - 1); +} + int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, enum btree_node_type type, enum bkey_invalid_flags flags, @@ -162,10 +167,13 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, return -BCH_ERR_invalid_bkey; } + if (type >= BKEY_TYPE_NR) + return 0; + if (flags & BKEY_INVALID_COMMIT && !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type))) { prt_printf(err, "invalid key type for btree %s (%s)", - bch2_btree_id_str(type), bch2_bkey_types[k.k->type]); + bch2_btree_node_type_str(type), bch2_bkey_types[k.k->type]); return -BCH_ERR_invalid_bkey; } @@ -188,13 +196,15 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, } if (type != BKEY_TYPE_btree) { - if (!btree_type_has_snapshots((enum btree_id) type) && + enum btree_id btree = type - 1; + + if (!btree_type_has_snapshots(btree) && k.k->p.snapshot) { prt_printf(err, "nonzero snapshot"); return -BCH_ERR_invalid_bkey; } - if (btree_type_has_snapshots((enum btree_id) type) && + if (btree_type_has_snapshots(btree) && !k.k->p.snapshot) { prt_printf(err, "snapshot == 0"); return -BCH_ERR_invalid_bkey; diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index fbe273453db3..70759ee3e5c7 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -411,7 +411,7 @@ static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans, flags |= BTREE_ITER_ALL_SNAPSHOTS|__BTREE_ITER_ALL_SNAPSHOTS; if (!(flags & (BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_NOT_EXTENTS)) && - btree_node_type_is_extents(btree_id)) + btree_id_is_extents(btree_id)) flags |= BTREE_ITER_IS_EXTENTS; if (!(flags & __BTREE_ITER_ALL_SNAPSHOTS) && diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 5fb0903e5bd2..1000b456d232 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -379,7 +379,7 @@ static int run_one_mem_trigger(struct btree_trans *trans, if (unlikely(flags & BTREE_TRIGGER_NORUN)) return 0; - if (!btree_node_type_needs_gc((enum btree_node_type) i->btree_id)) + if (!btree_node_type_needs_gc(__btree_node_type(i->level, i->btree_id))) return 0; if (old_ops->atomic_trigger == new_ops->atomic_trigger && @@ -776,12 +776,12 @@ static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans bch2_journal_key_overwritten(trans->c, wb->btree, 0, wb->k.k.p); } -static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, unsigned flags, +static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, + enum bkey_invalid_flags flags, struct btree_insert_entry *i, struct printbuf *err) { struct bch_fs *c = trans->c; - int rw = (flags & BTREE_INSERT_JOURNAL_REPLAY) ? READ : WRITE; printbuf_reset(err); prt_printf(err, "invalid bkey on insert from %s -> %ps", @@ -792,8 +792,7 @@ static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, un bch2_bkey_val_to_text(err, c, bkey_i_to_s_c(i->k)); prt_newline(err); - bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), - i->bkey_type, rw, err); + bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type, flags, err); bch2_print_string_as_lines(KERN_ERR, err->buf); bch2_inconsistent_error(c); @@ -1034,7 +1033,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) if (unlikely(bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type, invalid_flags, &buf))) - ret = bch2_trans_commit_bkey_invalid(trans, flags, i, &buf); + ret = bch2_trans_commit_bkey_invalid(trans, invalid_flags, i, &buf); btree_insert_entry_checks(trans, i); printbuf_exit(&buf); diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index c9a38e254949..a039ce4a4809 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -636,16 +636,17 @@ static inline unsigned bset_byte_offset(struct btree *b, void *i) } enum btree_node_type { -#define x(kwd, val, ...) BKEY_TYPE_##kwd = val, + BKEY_TYPE_btree, +#define x(kwd, val, ...) BKEY_TYPE_##kwd = val + 1, BCH_BTREE_IDS() #undef x - BKEY_TYPE_btree, + BKEY_TYPE_NR }; /* Type of a key in btree @id at level @level: */ static inline enum btree_node_type __btree_node_type(unsigned level, enum btree_id id) { - return level ? BKEY_TYPE_btree : (enum btree_node_type) id; + return level ? BKEY_TYPE_btree : (unsigned) id + 1; } /* Type of keys @b contains: */ @@ -654,19 +655,21 @@ static inline enum btree_node_type btree_node_type(struct btree *b) return __btree_node_type(b->c.level, b->c.btree_id); } +const char *bch2_btree_node_type_str(enum btree_node_type); + #define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \ - (BIT(BKEY_TYPE_extents)| \ - BIT(BKEY_TYPE_alloc)| \ - BIT(BKEY_TYPE_inodes)| \ - BIT(BKEY_TYPE_stripes)| \ - BIT(BKEY_TYPE_reflink)| \ - BIT(BKEY_TYPE_btree)) + (BIT_ULL(BKEY_TYPE_extents)| \ + BIT_ULL(BKEY_TYPE_alloc)| \ + BIT_ULL(BKEY_TYPE_inodes)| \ + BIT_ULL(BKEY_TYPE_stripes)| \ + BIT_ULL(BKEY_TYPE_reflink)| \ + BIT_ULL(BKEY_TYPE_btree)) #define BTREE_NODE_TYPE_HAS_MEM_TRIGGERS \ - (BIT(BKEY_TYPE_alloc)| \ - BIT(BKEY_TYPE_inodes)| \ - BIT(BKEY_TYPE_stripes)| \ - BIT(BKEY_TYPE_snapshots)) + (BIT_ULL(BKEY_TYPE_alloc)| \ + BIT_ULL(BKEY_TYPE_inodes)| \ + BIT_ULL(BKEY_TYPE_stripes)| \ + BIT_ULL(BKEY_TYPE_snapshots)) #define BTREE_NODE_TYPE_HAS_TRIGGERS \ (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \ @@ -674,13 +677,13 @@ static inline enum btree_node_type btree_node_type(struct btree *b) static inline bool btree_node_type_needs_gc(enum btree_node_type type) { - return BTREE_NODE_TYPE_HAS_TRIGGERS & (1U << type); + return BTREE_NODE_TYPE_HAS_TRIGGERS & BIT_ULL(type); } static inline bool btree_node_type_is_extents(enum btree_node_type type) { const unsigned mask = 0 -#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << nr) +#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1)) BCH_BTREE_IDS() #undef x ; @@ -690,7 +693,7 @@ static inline bool btree_node_type_is_extents(enum btree_node_type type) static inline bool btree_id_is_extents(enum btree_id btree) { - return btree_node_type_is_extents((enum btree_node_type) btree); + return btree_node_type_is_extents(__btree_node_type(0, btree)); } static inline bool btree_type_has_snapshots(enum btree_id id) -- cgit v1.2.3 From a1d97d8417d3c2f2477847541303621c32652976 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 20 Oct 2023 12:02:14 -0400 Subject: bcachefs: Fix shrinker names Shrinkers are now exported to debugfs, so the names can't have slashes in them. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_cache.c | 2 +- fs/bcachefs/btree_key_cache.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 864d1755c058..0b084fbc478a 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -476,7 +476,7 @@ int bch2_fs_btree_cache_init(struct bch_fs *c) bc->shrink.count_objects = bch2_btree_cache_count; bc->shrink.scan_objects = bch2_btree_cache_scan; bc->shrink.seeks = 4; - ret = register_shrinker(&bc->shrink, "%s/btree_cache", c->name); + ret = register_shrinker(&bc->shrink, "%s-btree_cache", c->name); if (ret) goto err; diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index c781ce6f389c..634ffdcb55f9 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -1042,7 +1042,7 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc) bc->shrink.seeks = 0; bc->shrink.count_objects = bch2_btree_key_cache_count; bc->shrink.scan_objects = bch2_btree_key_cache_scan; - if (register_shrinker(&bc->shrink, "%s/btree_key_cache", c->name)) + if (register_shrinker(&bc->shrink, "%s-btree_key_cache", c->name)) return -BCH_ERR_ENOMEM_fs_btree_cache_init; return 0; } -- cgit v1.2.3 From 253ba178c8d9065748fa56b39343e6a5a55b0023 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 20 Oct 2023 12:24:36 -0400 Subject: bcachefs: Fix ca->oldest_gen allocation The ca->oldest_gen array needs to be the same size as the bucket_gens array; ca->mi.nbuckets is updated with only state_lock held, not gc_lock, so bch2_gc_gens() could race with device resize and allocate too small of an oldest_gens array. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_gc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 8cfd2edd1b08..53d1d1da2640 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1954,19 +1954,17 @@ int bch2_gc_gens(struct bch_fs *c) trans = bch2_trans_get(c); for_each_member_device(ca, c, i) { - struct bucket_gens *gens; + struct bucket_gens *gens = bucket_gens(ca); BUG_ON(ca->oldest_gen); - ca->oldest_gen = kvmalloc(ca->mi.nbuckets, GFP_KERNEL); + ca->oldest_gen = kvmalloc(gens->nbuckets, GFP_KERNEL); if (!ca->oldest_gen) { percpu_ref_put(&ca->ref); ret = -BCH_ERR_ENOMEM_gc_gens; goto err; } - gens = bucket_gens(ca); - for (b = gens->first_bucket; b < gens->nbuckets; b++) ca->oldest_gen[b] = gens->b[b]; -- cgit v1.2.3 From e38356d65ed085a2c0ba056fa9048ad8845da1d0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 20 Oct 2023 14:40:23 -0400 Subject: bcachefs: Kill dead code extent_save() Signed-off-by: Kent Overstreet --- fs/bcachefs/extents.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 879e7d218b6a..acf78f55bdff 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -737,22 +737,4 @@ static inline void bch2_key_resize(struct bkey *k, unsigned new_size) k->size = new_size; } -/* - * In extent_sort_fix_overlapping(), insert_fixup_extent(), - * extent_merge_inline() - we're modifying keys in place that are packed. To do - * that we have to unpack the key, modify the unpacked key - then this - * copies/repacks the unpacked to the original as necessary. - */ -static inline void extent_save(struct btree *b, struct bkey_packed *dst, - struct bkey *src) -{ - struct bkey_format *f = &b->format; - struct bkey_i *dst_unpacked; - - if ((dst_unpacked = packed_to_bkey(dst))) - dst_unpacked->k = *src; - else - BUG_ON(!bch2_bkey_pack_key(dst, src, f)); -} - #endif /* _BCACHEFS_EXTENTS_H */ -- cgit v1.2.3 From d0261559c434abbd7254c9c97c68f5e024daabf4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 21 Oct 2023 13:19:54 -0400 Subject: bcachefs: Delete duplicate time stats initialization This code duplicated initialization already done in bch2_fs_btree_iter_init(). Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 0e85c22672be..646f67a589a4 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -960,12 +960,6 @@ int bch2_fs_start(struct bch_fs *c) bch2_dev_allocator_add(c, ca); bch2_recalc_capacity(c); - for (i = 0; i < BCH_TRANSACTIONS_NR; i++) { - mutex_lock(&c->btree_transaction_stats[i].lock); - bch2_time_stats_init(&c->btree_transaction_stats[i].lock_hold_times); - mutex_unlock(&c->btree_transaction_stats[i].lock); - } - ret = BCH_SB_INITIALIZED(c->disk_sb.sb) ? bch2_fs_recovery(c) : bch2_fs_initialize(c); -- cgit v1.2.3 From bbe682c76789d679cb75effd7792d41b09efea00 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 21 Oct 2023 13:54:39 -0400 Subject: bcachefs: Ensure devices are always correctly initialized We can't mark device superblocks or allocate journal on a device that isn't online. That means we may need to do this on every mount, because we may have formatted a new filesystem and then done the first mount (bch2_fs_initialize()) in degraded mode. Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 32 ++++++++++++++++++++++++-------- fs/bcachefs/buckets.h | 1 + fs/bcachefs/journal.c | 19 +++++++++++++++++++ fs/bcachefs/journal.h | 1 + fs/bcachefs/recovery.c | 24 +++++++++--------------- fs/bcachefs/recovery_types.h | 2 ++ fs/bcachefs/super.c | 30 +++++++++++++++++------------- 7 files changed, 73 insertions(+), 36 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index a1a4b5feadaa..0c5b7b3cb24c 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1825,16 +1825,16 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, bch2_data_types[type], bch2_data_types[type]); ret = -EIO; - goto out; + goto err; } - a->v.data_type = type; - a->v.dirty_sectors = sectors; - - ret = bch2_trans_update(trans, &iter, &a->k_i, 0); - if (ret) - goto out; -out: + if (a->v.data_type != type || + a->v.dirty_sectors != sectors) { + a->v.data_type = type; + a->v.dirty_sectors = sectors; + ret = bch2_trans_update(trans, &iter, &a->k_i, 0); + } +err: bch2_trans_iter_exit(trans, &iter); return ret; } @@ -1929,6 +1929,22 @@ int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca) return ret; } +int bch2_trans_mark_dev_sbs(struct bch_fs *c) +{ + struct bch_dev *ca; + unsigned i; + + for_each_online_member(ca, c, i) { + int ret = bch2_trans_mark_dev_sb(c, ca); + if (ret) { + percpu_ref_put(&ca->ref); + return ret; + } + } + + return 0; +} + /* Disk reservations: */ #define SECTORS_CACHE 1024 diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index bf8d7f407e9c..9767ed035bee 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -345,6 +345,7 @@ int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list * int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *, size_t, enum bch_data_type, unsigned); int bch2_trans_mark_dev_sb(struct bch_fs *, struct bch_dev *); +int bch2_trans_mark_dev_sbs(struct bch_fs *); static inline bool is_superblock_bucket(struct bch_dev *ca, u64 b) { diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 0e7a9ffa3671..5b5d69f2316b 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1019,6 +1019,25 @@ err: return ret; } +int bch2_fs_journal_alloc(struct bch_fs *c) +{ + struct bch_dev *ca; + unsigned i; + + for_each_online_member(ca, c, i) { + if (ca->journal.nr) + continue; + + int ret = bch2_dev_journal_alloc(ca); + if (ret) { + percpu_ref_put(&ca->io_ref); + return ret; + } + } + + return 0; +} + /* startup/shutdown: */ static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx) diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 491133cc52f3..011711e99c8d 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -534,6 +534,7 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *); int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *, unsigned nr); int bch2_dev_journal_alloc(struct bch_dev *); +int bch2_fs_journal_alloc(struct bch_fs *); void bch2_dev_journal_stop(struct journal *, struct bch_dev *); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 55663253c9d3..63faf70434ff 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -946,16 +946,12 @@ int bch2_fs_initialize(struct bch_fs *c) for (i = 0; i < BTREE_ID_NR; i++) bch2_btree_root_alloc(c, i); - for_each_online_member(ca, c, i) + for_each_member_device(ca, c, i) bch2_dev_usage_init(ca); - for_each_online_member(ca, c, i) { - ret = bch2_dev_journal_alloc(ca); - if (ret) { - percpu_ref_put(&ca->io_ref); - goto err; - } - } + ret = bch2_fs_journal_alloc(c); + if (ret) + goto err; /* * journal_res_get() will crash if called before this has @@ -973,15 +969,13 @@ int bch2_fs_initialize(struct bch_fs *c) * btree updates */ bch_verbose(c, "marking superblocks"); - for_each_member_device(ca, c, i) { - ret = bch2_trans_mark_dev_sb(c, ca); - if (ret) { - percpu_ref_put(&ca->ref); - goto err; - } + ret = bch2_trans_mark_dev_sbs(c); + bch_err_msg(c, ret, "marking superblocks"); + if (ret) + goto err; + for_each_online_member(ca, c, i) ca->new_fs_bucket_idx = 0; - } ret = bch2_fs_freespace_init(c); if (ret) diff --git a/fs/bcachefs/recovery_types.h b/fs/bcachefs/recovery_types.h index 4c1cea2a601d..bf43e13c4560 100644 --- a/fs/bcachefs/recovery_types.h +++ b/fs/bcachefs/recovery_types.h @@ -14,6 +14,8 @@ x(snapshots_read, PASS_ALWAYS) \ x(check_topology, 0) \ x(check_allocations, PASS_FSCK) \ + x(trans_mark_dev_sbs, PASS_ALWAYS|PASS_SILENT) \ + x(fs_journal_alloc, PASS_ALWAYS|PASS_SILENT) \ x(set_may_go_rw, PASS_ALWAYS|PASS_SILENT) \ x(journal_replay, PASS_ALWAYS) \ x(check_alloc_info, PASS_FSCK) \ diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 646f67a589a4..9d59d6246ed6 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -948,9 +948,6 @@ int bch2_fs_start(struct bch_fs *c) goto err; } - for_each_online_member(ca, c, i) - bch2_sb_from_fs(c, ca); - for_each_online_member(ca, c, i) bch2_members_v2_get_mut(c->disk_sb.sb, i)->last_mount = cpu_to_le64(now); @@ -1683,13 +1680,13 @@ have_slot: ret = bch2_trans_mark_dev_sb(c, ca); if (ret) { - bch_err_msg(c, ret, "marking new superblock"); + bch_err_msg(ca, ret, "marking new superblock"); goto err_late; } ret = bch2_fs_freespace_init(c); if (ret) { - bch_err_msg(c, ret, "initializing free space"); + bch_err_msg(ca, ret, "initializing free space"); goto err_late; } @@ -1757,19 +1754,26 @@ int bch2_dev_online(struct bch_fs *c, const char *path) if (ca->mi.state == BCH_MEMBER_STATE_rw) __bch2_dev_read_write(c, ca); - mutex_lock(&c->sb_lock); - struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + if (!ca->mi.freespace_initialized) { + ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets); + bch_err_msg(ca, ret, "initializing free space"); + if (ret) + goto err; + } - m->last_mount = - cpu_to_le64(ktime_get_real_seconds()); + if (!ca->journal.nr) { + ret = bch2_dev_journal_alloc(ca); + bch_err_msg(ca, ret, "allocating journal"); + if (ret) + goto err; + } + mutex_lock(&c->sb_lock); + bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = + cpu_to_le64(ktime_get_real_seconds()); bch2_write_super(c); mutex_unlock(&c->sb_lock); - ret = bch2_fs_freespace_init(c); - if (ret) - bch_err_msg(c, ret, "initializing free space"); - up_write(&c->state_lock); return 0; err: -- cgit v1.2.3 From 8480905765c3729025331720d23735ce085ef070 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 21 Oct 2023 15:03:05 -0400 Subject: bcachefs: Improve io option handling in data move path The data move path now correctly picks IO options when inodes in different snapshots have different options applied. Signed-off-by: Kent Overstreet --- fs/bcachefs/move.c | 131 +++++++++++++++++++++++++++++++++-------------------- fs/bcachefs/move.h | 26 +++++++++++ 2 files changed, 107 insertions(+), 50 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 82f60c7883ba..38b076ff1906 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -20,6 +20,7 @@ #include "keylist.h" #include "move.h" #include "replicas.h" +#include "snapshot.h" #include "super-io.h" #include "trace.h" @@ -413,35 +414,87 @@ err: return ret; } -static int lookup_inode(struct btree_trans *trans, struct bpos pos, - struct bch_inode_unpacked *inode) +struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, + struct per_snapshot_io_opts *io_opts, + struct bkey_s_c extent_k) +{ + struct bch_fs *c = trans->c; + u32 restart_count = trans->restart_count; + int ret = 0; + + if (io_opts->cur_inum != extent_k.k->p.inode) { + struct btree_iter iter; + struct bkey_s_c k; + + io_opts->d.nr = 0; + + for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode), + BTREE_ITER_ALL_SNAPSHOTS, k, ret) { + if (k.k->p.offset != extent_k.k->p.inode) + break; + + if (!bkey_is_inode(k.k)) + continue; + + struct bch_inode_unpacked inode; + BUG_ON(bch2_inode_unpack(k, &inode)); + + struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot }; + bch2_inode_opts_get(&e.io_opts, trans->c, &inode); + + ret = darray_push(&io_opts->d, e); + if (ret) + break; + } + bch2_trans_iter_exit(trans, &iter); + io_opts->cur_inum = extent_k.k->p.inode; + } + + ret = ret ?: trans_was_restarted(trans, restart_count); + if (ret) + return ERR_PTR(ret); + + if (extent_k.k->p.snapshot) { + struct snapshot_io_opts_entry *i; + darray_for_each(io_opts->d, i) + if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot)) + return &i->io_opts; + } + + return &io_opts->fs_io_opts; +} + +static int bch2_move_get_io_opts_one(struct btree_trans *trans, + struct bch_io_opts *io_opts, + struct bkey_s_c extent_k) { struct btree_iter iter; struct bkey_s_c k; int ret; - bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, pos, - BTREE_ITER_ALL_SNAPSHOTS); - k = bch2_btree_iter_peek(&iter); + /* reflink btree? */ + if (!extent_k.k->p.inode) { + *io_opts = bch2_opts_to_inode_opts(trans->c->opts); + return 0; + } + + k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, + SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot), + BTREE_ITER_CACHED); ret = bkey_err(k); - if (ret) - goto err; + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + return ret; - if (!k.k || !bkey_eq(k.k->p, pos)) { - ret = -BCH_ERR_ENOENT_inode; - goto err; + if (!ret && bkey_is_inode(k.k)) { + struct bch_inode_unpacked inode; + bch2_inode_unpack(k, &inode); + bch2_inode_opts_get(io_opts, trans->c, &inode); + } else { + *io_opts = bch2_opts_to_inode_opts(trans->c->opts); } - ret = bkey_is_inode(k.k) ? 0 : -EIO; - if (ret) - goto err; - - ret = bch2_inode_unpack(k, inode); - if (ret) - goto err; -err: bch2_trans_iter_exit(trans, &iter); - return ret; + return 0; } static int move_ratelimit(struct btree_trans *trans, @@ -492,30 +545,6 @@ static int move_ratelimit(struct btree_trans *trans, return 0; } -static int move_get_io_opts(struct btree_trans *trans, - struct bch_io_opts *io_opts, - struct bkey_s_c k, u64 *cur_inum) -{ - struct bch_inode_unpacked inode; - int ret; - - if (*cur_inum == k.k->p.inode) - return 0; - - ret = lookup_inode(trans, - SPOS(0, k.k->p.inode, k.k->p.snapshot), - &inode); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - return ret; - - if (!ret) - bch2_inode_opts_get(io_opts, trans->c, &inode); - else - *io_opts = bch2_opts_to_inode_opts(trans->c->opts); - *cur_inum = k.k->p.inode; - return 0; -} - static int __bch2_move_data(struct moving_context *ctxt, struct bpos start, struct bpos end, @@ -523,15 +552,16 @@ static int __bch2_move_data(struct moving_context *ctxt, enum btree_id btree_id) { struct bch_fs *c = ctxt->c; - struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); + struct per_snapshot_io_opts snapshot_io_opts; + struct bch_io_opts *io_opts; struct bkey_buf sk; struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; struct data_update_opts data_opts; - u64 cur_inum = U64_MAX; int ret = 0, ret2; + per_snapshot_io_opts_init(&snapshot_io_opts, c); bch2_bkey_buf_init(&sk); if (ctxt->stats) { @@ -569,12 +599,13 @@ static int __bch2_move_data(struct moving_context *ctxt, if (!bkey_extent_is_direct_data(k.k)) goto next_nondata; - ret = move_get_io_opts(trans, &io_opts, k, &cur_inum); + io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, k); + ret = PTR_ERR_OR_ZERO(io_opts); if (ret) continue; memset(&data_opts, 0, sizeof(data_opts)); - if (!pred(c, arg, k, &io_opts, &data_opts)) + if (!pred(c, arg, k, io_opts, &data_opts)) goto next; /* @@ -585,7 +616,7 @@ static int __bch2_move_data(struct moving_context *ctxt, k = bkey_i_to_s_c(sk.k); ret2 = bch2_move_extent(trans, &iter, ctxt, NULL, - io_opts, btree_id, k, data_opts); + *io_opts, btree_id, k, data_opts); if (ret2) { if (bch2_err_matches(ret2, BCH_ERR_transaction_restart)) continue; @@ -612,6 +643,7 @@ next_nondata: bch2_trans_iter_exit(trans, &iter); bch2_trans_put(trans); bch2_bkey_buf_exit(&sk, c); + per_snapshot_io_opts_exit(&snapshot_io_opts); return ret; } @@ -673,7 +705,6 @@ int __bch2_evacuate_bucket(struct btree_trans *trans, struct data_update_opts data_opts; unsigned dirty_sectors, bucket_size; u64 fragmentation; - u64 cur_inum = U64_MAX; struct bpos bp_pos = POS_MIN; int ret = 0; @@ -737,7 +768,7 @@ int __bch2_evacuate_bucket(struct btree_trans *trans, bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); - ret = move_get_io_opts(trans, &io_opts, k, &cur_inum); + ret = bch2_move_get_io_opts_one(trans, &io_opts, k); if (ret) { bch2_trans_iter_exit(trans, &iter); continue; diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index cbdd58db8782..aa4b65c4f960 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -62,6 +62,32 @@ struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *); void bch2_moving_ctxt_do_pending_writes(struct moving_context *, struct btree_trans *); +/* Inodes in different snapshots may have different IO options: */ +struct snapshot_io_opts_entry { + u32 snapshot; + struct bch_io_opts io_opts; +}; + +struct per_snapshot_io_opts { + u64 cur_inum; + struct bch_io_opts fs_io_opts; + DARRAY(struct snapshot_io_opts_entry) d; +}; + +static inline void per_snapshot_io_opts_init(struct per_snapshot_io_opts *io_opts, struct bch_fs *c) +{ + memset(io_opts, 0, sizeof(*io_opts)); + io_opts->fs_io_opts = bch2_opts_to_inode_opts(c->opts); +} + +static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opts) +{ + darray_exit(&io_opts->d); +} + +struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *, + struct per_snapshot_io_opts *, struct bkey_s_c); + int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *); int bch2_move_data(struct bch_fs *, -- cgit v1.2.3 From 523f33efbf406f2eb0f071123d17fbbd9e40d692 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 22 Jun 2023 20:18:12 -0400 Subject: bcachefs: All triggers are BTREE_TRIGGER_WANTS_OLD_AND_NEW Upcoming rebalance_work btree will require extent triggers to be BTREE_TRIGGER_WANTS_OLD_AND_NEW - so to reduce potential confusion, let's just make all triggers BTREE_TRIGGER_WANTS_OLD_AND_NEW. Signed-off-by: Kent Overstreet --- fs/bcachefs/bkey_methods.h | 10 ---- fs/bcachefs/btree_trans_commit.c | 6 +- fs/bcachefs/buckets.c | 123 ++++++++++++++++++++++++--------------- fs/bcachefs/buckets.h | 14 +++++ fs/bcachefs/reflink.c | 34 +++++------ 5 files changed, 105 insertions(+), 82 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h index 668f595e2fcf..c829c8e381a7 100644 --- a/fs/bcachefs/bkey_methods.h +++ b/fs/bcachefs/bkey_methods.h @@ -119,16 +119,6 @@ enum btree_update_flags { #define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE) #define BTREE_TRIGGER_NOATOMIC (1U << __BTREE_TRIGGER_NOATOMIC) -#define BTREE_TRIGGER_WANTS_OLD_AND_NEW \ - ((1U << KEY_TYPE_alloc)| \ - (1U << KEY_TYPE_alloc_v2)| \ - (1U << KEY_TYPE_alloc_v3)| \ - (1U << KEY_TYPE_alloc_v4)| \ - (1U << KEY_TYPE_stripe)| \ - (1U << KEY_TYPE_inode)| \ - (1U << KEY_TYPE_inode_v2)| \ - (1U << KEY_TYPE_snapshot)) - static inline int bch2_trans_mark_key(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c old, struct bkey_i *new, diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 1000b456d232..53ddcaf042a2 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -382,8 +382,7 @@ static int run_one_mem_trigger(struct btree_trans *trans, if (!btree_node_type_needs_gc(__btree_node_type(i->level, i->btree_id))) return 0; - if (old_ops->atomic_trigger == new_ops->atomic_trigger && - ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) { + if (old_ops->atomic_trigger == new_ops->atomic_trigger) { ret = bch2_mark_key(trans, i->btree_id, i->level, old, bkey_i_to_s_c(new), BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags); @@ -425,8 +424,7 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_ if (!i->insert_trigger_run && !i->overwrite_trigger_run && - old_ops->trans_trigger == new_ops->trans_trigger && - ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) { + old_ops->trans_trigger == new_ops->trans_trigger) { i->overwrite_trigger_run = true; i->insert_trigger_run = true; return bch2_trans_mark_key(trans, i->btree_id, i->level, old, i->k, diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 0c5b7b3cb24c..5c1eca183243 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -935,14 +935,12 @@ static int bch2_mark_stripe_ptr(struct btree_trans *trans, return 0; } -int bch2_mark_extent(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) +static int __mark_extent(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, unsigned flags) { u64 journal_seq = trans->journal_res.seq; struct bch_fs *c = trans->c; - struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; @@ -1018,6 +1016,14 @@ int bch2_mark_extent(struct btree_trans *trans, return 0; } +int bch2_mark_extent(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_s_c new, + unsigned flags) +{ + return mem_trigger_run_overwrite_then_insert(__mark_extent, trans, btree_id, level, old, new, flags); +} + int bch2_mark_stripe(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c old, struct bkey_s_c new, @@ -1124,13 +1130,11 @@ int bch2_mark_stripe(struct btree_trans *trans, return 0; } -int bch2_mark_reservation(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) +static int __mark_reservation(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, unsigned flags) { struct bch_fs *c = trans->c; - struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new; struct bch_fs_usage *fs_usage; unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; s64 sectors = (s64) k.k->size; @@ -1157,6 +1161,14 @@ int bch2_mark_reservation(struct btree_trans *trans, return 0; } +int bch2_mark_reservation(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_s_c new, + unsigned flags) +{ + return mem_trigger_run_overwrite_then_insert(__mark_reservation, trans, btree_id, level, old, new, flags); +} + static s64 __bch2_mark_reflink_p(struct btree_trans *trans, struct bkey_s_c_reflink_p p, u64 start, u64 end, @@ -1211,13 +1223,11 @@ fsck_err: return ret; } -int bch2_mark_reflink_p(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) +static int __mark_reflink_p(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, unsigned flags) { struct bch_fs *c = trans->c; - struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new; struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); struct reflink_gc *ref; size_t l, r, m; @@ -1251,6 +1261,14 @@ int bch2_mark_reflink_p(struct btree_trans *trans, return ret; } +int bch2_mark_reflink_p(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_s_c new, + unsigned flags) +{ + return mem_trigger_run_overwrite_then_insert(__mark_reflink_p, trans, btree_id, level, old, new, flags); +} + void bch2_trans_fs_usage_revert(struct btree_trans *trans, struct replicas_delta_list *deltas) { @@ -1452,15 +1470,11 @@ err: return ret; } -int bch2_trans_mark_extent(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_i *new, - unsigned flags) +static int __trans_mark_extent(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, unsigned flags) { struct bch_fs *c = trans->c; - struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE - ? old - : bkey_i_to_s_c(new); struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; @@ -1517,6 +1531,14 @@ int bch2_trans_mark_extent(struct btree_trans *trans, return ret; } +int bch2_trans_mark_extent(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_i *new, + unsigned flags) +{ + return trigger_run_overwrite_then_insert(__trans_mark_extent, trans, btree_id, level, old, new, flags); +} + static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, struct bkey_s_c_stripe s, unsigned idx, bool deleting) @@ -1670,15 +1692,10 @@ int bch2_trans_mark_stripe(struct btree_trans *trans, return ret; } -int bch2_trans_mark_reservation(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, - struct bkey_i *new, - unsigned flags) +static int __trans_mark_reservation(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, unsigned flags) { - struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE - ? old - : bkey_i_to_s_c(new); unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; s64 sectors = (s64) k.k->size; struct replicas_delta_list *d; @@ -1700,7 +1717,16 @@ int bch2_trans_mark_reservation(struct btree_trans *trans, return 0; } -static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, +int bch2_trans_mark_reservation(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, + struct bkey_i *new, + unsigned flags) +{ + return trigger_run_overwrite_then_insert(__trans_mark_reservation, trans, btree_id, level, old, new, flags); +} + +static int trans_mark_reflink_p_segment(struct btree_trans *trans, struct bkey_s_c_reflink_p p, u64 *idx, unsigned flags) { @@ -1767,35 +1793,38 @@ err: return ret; } -int bch2_trans_mark_reflink_p(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, - struct bkey_i *new, - unsigned flags) +static int __trans_mark_reflink_p(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, unsigned flags) { - struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE - ? old - : bkey_i_to_s_c(new); struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); u64 idx, end_idx; int ret = 0; - if (flags & BTREE_TRIGGER_INSERT) { - struct bch_reflink_p *v = (struct bch_reflink_p *) p.v; - - v->front_pad = v->back_pad = 0; - } - idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad); end_idx = le64_to_cpu(p.v->idx) + p.k->size + le32_to_cpu(p.v->back_pad); while (idx < end_idx && !ret) - ret = __bch2_trans_mark_reflink_p(trans, p, &idx, flags); - + ret = trans_mark_reflink_p_segment(trans, p, &idx, flags); return ret; } +int bch2_trans_mark_reflink_p(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, + struct bkey_i *new, + unsigned flags) +{ + if (flags & BTREE_TRIGGER_INSERT) { + struct bch_reflink_p *v = &bkey_i_to_reflink_p(new)->v; + + v->front_pad = v->back_pad = 0; + } + + return trigger_run_overwrite_then_insert(__trans_mark_reflink_p, trans, btree_id, level, old, new, flags); +} + static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *ca, size_t b, enum bch_data_type type, diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 9767ed035bee..21f6cb356921 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -339,6 +339,20 @@ int bch2_trans_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); +#define mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)\ +({ \ + int ret = 0; \ + \ + if (_old.k->type) \ + ret = _fn(_trans, _btree_id, _level, _old, _flags & ~BTREE_TRIGGER_INSERT); \ + if (!ret && _new.k->type) \ + ret = _fn(_trans, _btree_id, _level, _new, _flags & ~BTREE_TRIGGER_OVERWRITE); \ + ret; \ +}) + +#define trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags) \ + mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, bkey_i_to_s_c(_new), _flags) + void bch2_trans_fs_usage_revert(struct btree_trans *, struct replicas_delta_list *); int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *); diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index d77d0ea9afff..540c78cd4b0c 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -103,21 +103,22 @@ bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r } #endif +static inline void check_indirect_extent_deleting(struct bkey_i *new, unsigned *flags) +{ + if ((*flags & BTREE_TRIGGER_INSERT) && !*bkey_refcount(new)) { + new->k.type = KEY_TYPE_deleted; + new->k.size = 0; + set_bkey_val_u64s(&new->k, 0);; + *flags &= ~BTREE_TRIGGER_INSERT; + } +} + int bch2_trans_mark_reflink_v(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c old, struct bkey_i *new, unsigned flags) { - if (!(flags & BTREE_TRIGGER_OVERWRITE)) { - struct bkey_i_reflink_v *r = bkey_i_to_reflink_v(new); - - if (!r->v.refcount) { - r->k.type = KEY_TYPE_deleted; - r->k.size = 0; - set_bkey_val_u64s(&r->k, 0); - return 0; - } - } + check_indirect_extent_deleting(new, &flags); return bch2_trans_mark_extent(trans, btree_id, level, old, new, flags); } @@ -132,7 +133,7 @@ int bch2_indirect_inline_data_invalid(const struct bch_fs *c, struct bkey_s_c k, } void bch2_indirect_inline_data_to_text(struct printbuf *out, - struct bch_fs *c, struct bkey_s_c k) + struct bch_fs *c, struct bkey_s_c k) { struct bkey_s_c_indirect_inline_data d = bkey_s_c_to_indirect_inline_data(k); unsigned datalen = bkey_inline_data_bytes(k.k); @@ -147,16 +148,7 @@ int bch2_trans_mark_indirect_inline_data(struct btree_trans *trans, struct bkey_s_c old, struct bkey_i *new, unsigned flags) { - if (!(flags & BTREE_TRIGGER_OVERWRITE)) { - struct bkey_i_indirect_inline_data *r = - bkey_i_to_indirect_inline_data(new); - - if (!r->v.refcount) { - r->k.type = KEY_TYPE_deleted; - r->k.size = 0; - set_bkey_val_u64s(&r->k, 0); - } - } + check_indirect_extent_deleting(new, &flags); return 0; } -- cgit v1.2.3 From bf0d9e89de2e62fe9967ebb77b68d58d3812e4db Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 22 Oct 2023 10:54:24 -0400 Subject: bcachefs: Split apart bch2_target_to_text(), bch2_target_to_text_sb() Previously we just had bch2_opt_target_to_text() which could be passed either a filesystem object or just a superblock - depending on if we have a running filesystem or not. Split these into two functions for clarity. Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_groups.c | 95 +++++++++++++++++++++++++++++------------------ fs/bcachefs/disk_groups.h | 1 + 2 files changed, 59 insertions(+), 37 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c index e00133b6ea51..67a04fbbbbee 100644 --- a/fs/bcachefs/disk_groups.c +++ b/fs/bcachefs/disk_groups.c @@ -493,10 +493,7 @@ int bch2_opt_target_parse(struct bch_fs *c, const char *val, u64 *res, return -EINVAL; } -void bch2_opt_target_to_text(struct printbuf *out, - struct bch_fs *c, - struct bch_sb *sb, - u64 v) +void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) { struct target t = target_decode(v); @@ -504,47 +501,71 @@ void bch2_opt_target_to_text(struct printbuf *out, case TARGET_NULL: prt_printf(out, "none"); break; - case TARGET_DEV: - if (c) { - struct bch_dev *ca; - - rcu_read_lock(); - ca = t.dev < c->sb.nr_devices - ? rcu_dereference(c->devs[t.dev]) - : NULL; - - if (ca && percpu_ref_tryget(&ca->io_ref)) { - prt_printf(out, "/dev/%pg", ca->disk_sb.bdev); - percpu_ref_put(&ca->io_ref); - } else if (ca) { - prt_printf(out, "offline device %u", t.dev); - } else { - prt_printf(out, "invalid device %u", t.dev); - } - - rcu_read_unlock(); + case TARGET_DEV: { + struct bch_dev *ca; + + rcu_read_lock(); + ca = t.dev < c->sb.nr_devices + ? rcu_dereference(c->devs[t.dev]) + : NULL; + + if (ca && percpu_ref_tryget(&ca->io_ref)) { + prt_printf(out, "/dev/%pg", ca->disk_sb.bdev); + percpu_ref_put(&ca->io_ref); + } else if (ca) { + prt_printf(out, "offline device %u", t.dev); } else { - struct bch_member m = bch2_sb_member_get(sb, t.dev); - - if (bch2_dev_exists(sb, t.dev)) { - prt_printf(out, "Device "); - pr_uuid(out, m.uuid.b); - prt_printf(out, " (%u)", t.dev); - } else { - prt_printf(out, "Bad device %u", t.dev); - } + prt_printf(out, "invalid device %u", t.dev); } + + rcu_read_unlock(); break; + } case TARGET_GROUP: - if (c) { - mutex_lock(&c->sb_lock); - bch2_disk_path_to_text(out, c->disk_sb.sb, t.group); - mutex_unlock(&c->sb_lock); + mutex_lock(&c->sb_lock); + bch2_disk_path_to_text(out, c->disk_sb.sb, t.group); + mutex_unlock(&c->sb_lock); + break; + default: + BUG(); + } +} + +void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v) +{ + struct target t = target_decode(v); + + switch (t.type) { + case TARGET_NULL: + prt_printf(out, "none"); + break; + case TARGET_DEV: { + struct bch_member m = bch2_sb_member_get(sb, t.dev); + + if (bch2_dev_exists(sb, t.dev)) { + prt_printf(out, "Device "); + pr_uuid(out, m.uuid.b); + prt_printf(out, " (%u)", t.dev); } else { - bch2_disk_path_to_text(out, sb, t.group); + prt_printf(out, "Bad device %u", t.dev); } break; + } + case TARGET_GROUP: + bch2_disk_path_to_text(out, sb, t.group); + break; default: BUG(); } } + +void bch2_opt_target_to_text(struct printbuf *out, + struct bch_fs *c, + struct bch_sb *sb, + u64 v) +{ + if (c) + bch2_target_to_text(out, c, v); + else + bch2_target_to_text_sb(out, sb, v); +} diff --git a/fs/bcachefs/disk_groups.h b/fs/bcachefs/disk_groups.h index bd7711767fd4..ab17dbaaf842 100644 --- a/fs/bcachefs/disk_groups.h +++ b/fs/bcachefs/disk_groups.h @@ -84,6 +84,7 @@ int bch2_disk_path_find(struct bch_sb_handle *, const char *); int bch2_disk_path_find_or_create(struct bch_sb_handle *, const char *); void bch2_disk_path_to_text(struct printbuf *, struct bch_sb *, unsigned); +void bch2_target_to_text(struct printbuf *out, struct bch_fs *, unsigned); int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *, struct printbuf *); void bch2_opt_target_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64); -- cgit v1.2.3 From 37707bb183b4746f27b0beaf0c3273fd7c79dc66 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 22 Oct 2023 10:58:38 -0400 Subject: bcachefs: Split out disk_groups_types.h Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/disk_groups.h | 2 ++ fs/bcachefs/disk_groups_types.h | 17 +++++++++++++++++ fs/bcachefs/super_types.h | 12 ------------ 4 files changed, 20 insertions(+), 12 deletions(-) create mode 100644 fs/bcachefs/disk_groups_types.h (limited to 'fs') diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 9863571feebf..1e0191197de1 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -418,6 +418,7 @@ enum bch_time_stats { #include "buckets_types.h" #include "buckets_waiting_for_journal_types.h" #include "clock_types.h" +#include "disk_groups_types.h" #include "ec_types.h" #include "journal_types.h" #include "keylist_types.h" diff --git a/fs/bcachefs/disk_groups.h b/fs/bcachefs/disk_groups.h index ab17dbaaf842..e03ccc7f13da 100644 --- a/fs/bcachefs/disk_groups.h +++ b/fs/bcachefs/disk_groups.h @@ -2,6 +2,8 @@ #ifndef _BCACHEFS_DISK_GROUPS_H #define _BCACHEFS_DISK_GROUPS_H +#include "disk_groups_types.h" + extern const struct bch_sb_field_ops bch_sb_field_ops_disk_groups; static inline unsigned disk_groups_nr(struct bch_sb_field_disk_groups *groups) diff --git a/fs/bcachefs/disk_groups_types.h b/fs/bcachefs/disk_groups_types.h new file mode 100644 index 000000000000..55a67a4dca76 --- /dev/null +++ b/fs/bcachefs/disk_groups_types.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_DISK_GROUPS_TYPES_H +#define _BCACHEFS_DISK_GROUPS_TYPES_H + +struct bch_disk_group_cpu { + bool deleted; + u16 parent; + struct bch_devs_mask devs; +}; + +struct bch_disk_groups_cpu { + struct rcu_head rcu; + unsigned nr; + struct bch_disk_group_cpu entries[] __counted_by(nr); +}; + +#endif /* _BCACHEFS_DISK_GROUPS_TYPES_H */ diff --git a/fs/bcachefs/super_types.h b/fs/bcachefs/super_types.h index 78d6138db62d..7dda4985b99f 100644 --- a/fs/bcachefs/super_types.h +++ b/fs/bcachefs/super_types.h @@ -37,16 +37,4 @@ struct bch_member_cpu { u8 valid; }; -struct bch_disk_group_cpu { - bool deleted; - u16 parent; - struct bch_devs_mask devs; -}; - -struct bch_disk_groups_cpu { - struct rcu_head rcu; - unsigned nr; - struct bch_disk_group_cpu entries[] __counted_by(nr); -}; - #endif /* _BCACHEFS_SUPER_TYPES_H */ -- cgit v1.2.3 From e677179b35b7ecbe3cefe33011b69d45171e5e9f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 22 Oct 2023 11:12:14 -0400 Subject: bcachefs: bch2_disk_path_to_text() no longer takes sb_lock We're going to be using bch2_target_to_text() -> bch2_disk_path_to_text() from bch2_bkey_ptrs_to_text() and bch2_bkey_ptrs_invalid(), which can be called in any context. This patch adds the actual label to bch_disk_group_cpu so that it can be used by bch2_disk_path_to_text, and splits out bch2_disk_path_to_text() into two variants - like the previous patch, one for when we have a running filesystem and another for when we only have a superblock. Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_groups.c | 59 +++++++++++++++++++++++++++++++++++++---- fs/bcachefs/disk_groups.h | 4 ++- fs/bcachefs/disk_groups_types.h | 1 + fs/bcachefs/super.c | 2 +- fs/bcachefs/sysfs.c | 9 ++----- 5 files changed, 61 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c index 67a04fbbbbee..d613695abf9f 100644 --- a/fs/bcachefs/disk_groups.c +++ b/fs/bcachefs/disk_groups.c @@ -175,6 +175,7 @@ int bch2_sb_disk_groups_to_cpu(struct bch_fs *c) dst->deleted = BCH_GROUP_DELETED(src); dst->parent = BCH_GROUP_PARENT(src); + memcpy(dst->label, src->label, sizeof(dst->label)); } for (i = 0; i < c->disk_sb.sb->nr_devices; i++) { @@ -382,7 +383,57 @@ int bch2_disk_path_find_or_create(struct bch_sb_handle *sb, const char *name) return v; } -void bch2_disk_path_to_text(struct printbuf *out, struct bch_sb *sb, unsigned v) +void bch2_disk_path_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) +{ + struct bch_disk_groups_cpu *groups; + struct bch_disk_group_cpu *g; + unsigned nr = 0; + u16 path[32]; + + out->atomic++; + rcu_read_lock(); + groups = rcu_dereference(c->disk_groups); + if (!groups) + goto invalid; + + while (1) { + if (nr == ARRAY_SIZE(path)) + goto invalid; + + if (v >= groups->nr) + goto invalid; + + g = groups->entries + v; + + if (g->deleted) + goto invalid; + + path[nr++] = v; + + if (!g->parent) + break; + + v = g->parent - 1; + } + + while (nr) { + v = path[--nr]; + g = groups->entries + v; + + prt_printf(out, "%.*s", (int) sizeof(g->label), g->label); + if (nr) + prt_printf(out, "."); + } +out: + rcu_read_unlock(); + out->atomic--; + return; +invalid: + prt_printf(out, "invalid label %u", v); + goto out; +} + +void bch2_disk_path_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v) { struct bch_sb_field_disk_groups *groups = bch2_sb_field_get(sb, disk_groups); @@ -522,9 +573,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) break; } case TARGET_GROUP: - mutex_lock(&c->sb_lock); - bch2_disk_path_to_text(out, c->disk_sb.sb, t.group); - mutex_unlock(&c->sb_lock); + bch2_disk_path_to_text(out, c, t.group); break; default: BUG(); @@ -552,7 +601,7 @@ void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v) break; } case TARGET_GROUP: - bch2_disk_path_to_text(out, sb, t.group); + bch2_disk_path_to_text_sb(out, sb, t.group); break; default: BUG(); diff --git a/fs/bcachefs/disk_groups.h b/fs/bcachefs/disk_groups.h index e03ccc7f13da..441826fff224 100644 --- a/fs/bcachefs/disk_groups.h +++ b/fs/bcachefs/disk_groups.h @@ -85,7 +85,9 @@ int bch2_disk_path_find(struct bch_sb_handle *, const char *); /* Exported for userspace bcachefs-tools: */ int bch2_disk_path_find_or_create(struct bch_sb_handle *, const char *); -void bch2_disk_path_to_text(struct printbuf *, struct bch_sb *, unsigned); +void bch2_disk_path_to_text(struct printbuf *, struct bch_fs *, unsigned); +void bch2_disk_path_to_text_sb(struct printbuf *, struct bch_sb *, unsigned); + void bch2_target_to_text(struct printbuf *out, struct bch_fs *, unsigned); int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *, struct printbuf *); diff --git a/fs/bcachefs/disk_groups_types.h b/fs/bcachefs/disk_groups_types.h index 55a67a4dca76..a54ef085b13d 100644 --- a/fs/bcachefs/disk_groups_types.h +++ b/fs/bcachefs/disk_groups_types.h @@ -5,6 +5,7 @@ struct bch_disk_group_cpu { bool deleted; u16 parent; + u8 label[BCH_SB_LABEL_SIZE]; struct bch_devs_mask devs; }; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 9d59d6246ed6..ce59018b27ac 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1582,7 +1582,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) dev_mi = bch2_sb_member_get(sb.sb, sb.sb->dev_idx); if (BCH_MEMBER_GROUP(&dev_mi)) { - bch2_disk_path_to_text(&label, sb.sb, BCH_MEMBER_GROUP(&dev_mi) - 1); + bch2_disk_path_to_text_sb(&label, sb.sb, BCH_MEMBER_GROUP(&dev_mi) - 1); if (label.allocation_failure) { ret = -ENOMEM; goto err; diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 5b079369af95..3ac6634020d1 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -910,13 +910,8 @@ SHOW(bch2_dev) sysfs_print(discard, ca->mi.discard); if (attr == &sysfs_label) { - if (ca->mi.group) { - mutex_lock(&c->sb_lock); - bch2_disk_path_to_text(out, c->disk_sb.sb, - ca->mi.group - 1); - mutex_unlock(&c->sb_lock); - } - + if (ca->mi.group) + bch2_disk_path_to_text(out, c, ca->mi.group - 1); prt_char(out, '\n'); } -- cgit v1.2.3 From 2d39081291470750cc605c917531d7cd85aebf94 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 22 Oct 2023 11:19:34 -0400 Subject: bcachefs: Ensure we don't exceed encoded_extent_max The write path may (rarely) see an encoded (checksummed) extent that exceeds encoded_extent_max - this can happen when we're moving an existing extent that was not checksummed, but was given a checksum by bch2_write_rechecksum(). Signed-off-by: Kent Overstreet --- fs/bcachefs/io_write.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 6e4f85eb6ec8..4a666f4d2dcc 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -816,6 +816,7 @@ static enum prep_encoded_ret { /* Can we just write the entire extent as is? */ if (op->crc.uncompressed_size == op->crc.live_size && + op->crc.uncompressed_size <= c->opts.encoded_extent_max >> 9 && op->crc.compressed_size <= wp->sectors_free && (op->crc.compression_type == bch2_compression_opt_to_type(op->compression_opt) || op->incompressible)) { -- cgit v1.2.3 From 9db2f86060a8e54e80f99e3c3366832ce6a67d76 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 22 Oct 2023 11:33:02 -0400 Subject: bcachefs: Check for too-large encoded extents We don't yet repair (split) them, just check. Signed-off-by: Kent Overstreet --- fs/bcachefs/extents.c | 8 ++++++++ fs/bcachefs/extents.h | 5 +++++ fs/bcachefs/fsck.c | 49 +++++++++++++++++++++++++++++++++++++++++++- fs/bcachefs/fsck.h | 1 + fs/bcachefs/io_write.c | 4 +--- fs/bcachefs/recovery_types.h | 1 + 6 files changed, 64 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 1b25f84e4b9c..38077b3886d7 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1207,6 +1207,14 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k, return -BCH_ERR_invalid_bkey; } crc_since_last_ptr = true; + + if (crc_is_encoded(crc) && + (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && + (flags & (BKEY_INVALID_WRITE|BKEY_INVALID_COMMIT))) { + prt_printf(err, "too large encoded extent"); + return -BCH_ERR_invalid_bkey; + } + break; case BCH_EXTENT_ENTRY_stripe_ptr: if (have_ec) { diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index acf78f55bdff..ef1b9f18719d 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -190,6 +190,11 @@ static inline bool crc_is_compressed(struct bch_extent_crc_unpacked crc) crc.compression_type != BCH_COMPRESSION_TYPE_incompressible); } +static inline bool crc_is_encoded(struct bch_extent_crc_unpacked crc) +{ + return crc.csum_type != BCH_CSUM_none || crc_is_compressed(crc); +} + /* bkey_ptrs: generically over any key type that has ptrs */ struct bkey_ptrs_c { diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index f26b824e70a8..328cb3b3e213 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1299,6 +1299,28 @@ err: return ret; } +static int check_extent_overbig(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_s_c k) +{ + struct bch_fs *c = trans->c; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + struct bch_extent_crc_unpacked crc; + const union bch_extent_entry *i; + unsigned encoded_extent_max_sectors = c->opts.encoded_extent_max >> 9; + + bkey_for_each_crc(k.k, ptrs, crc, i) + if (crc_is_encoded(crc) && + crc.uncompressed_size > encoded_extent_max_sectors) { + struct printbuf buf = PRINTBUF; + + bch2_bkey_val_to_text(&buf, c, k); + bch_err(c, "overbig encoded extent, please report this:\n %s", buf.buf); + printbuf_exit(&buf); + } + + return 0; +} + static int check_extent(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, struct inode_walker *inode, @@ -1434,7 +1456,8 @@ int bch2_check_extents(struct bch_fs *c) &res, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, ({ bch2_disk_reservation_put(c, &res); - check_extent(trans, &iter, k, &w, &s, &extent_ends); + check_extent(trans, &iter, k, &w, &s, &extent_ends) ?: + check_extent_overbig(trans, &iter, k); })) ?: check_i_sectors(trans, &w); @@ -1448,6 +1471,30 @@ int bch2_check_extents(struct bch_fs *c) return ret; } +int bch2_check_indirect_extents(struct bch_fs *c) +{ + struct btree_trans *trans = bch2_trans_get(c); + struct btree_iter iter; + struct bkey_s_c k; + struct disk_reservation res = { 0 }; + int ret = 0; + + ret = for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, + POS_MIN, + BTREE_ITER_PREFETCH, k, + &res, NULL, + BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, ({ + bch2_disk_reservation_put(c, &res); + check_extent_overbig(trans, &iter, k); + })); + + bch2_disk_reservation_put(c, &res); + bch2_trans_put(trans); + + bch_err_fn(c, ret); + return ret; +} + static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) { struct bch_fs *c = trans->c; diff --git a/fs/bcachefs/fsck.h b/fs/bcachefs/fsck.h index 90c87b5089a0..da991e8cf27e 100644 --- a/fs/bcachefs/fsck.h +++ b/fs/bcachefs/fsck.h @@ -4,6 +4,7 @@ int bch2_check_inodes(struct bch_fs *); int bch2_check_extents(struct bch_fs *); +int bch2_check_indirect_extents(struct bch_fs *); int bch2_check_dirents(struct bch_fs *); int bch2_check_xattrs(struct bch_fs *); int bch2_check_root(struct bch_fs *); diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 4a666f4d2dcc..f7461f60d760 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -1092,9 +1092,7 @@ static bool bch2_extent_is_writeable(struct bch_write_op *op, e = bkey_s_c_to_extent(k); extent_for_each_ptr_decode(e, p, entry) { - if (p.crc.csum_type || - crc_is_compressed(p.crc) || - p.has_ec) + if (crc_is_encoded(p.crc) || p.has_ec) return false; replicas += bch2_extent_ptr_durability(c, &p); diff --git a/fs/bcachefs/recovery_types.h b/fs/bcachefs/recovery_types.h index bf43e13c4560..e2d8771909ef 100644 --- a/fs/bcachefs/recovery_types.h +++ b/fs/bcachefs/recovery_types.h @@ -34,6 +34,7 @@ x(resume_logged_ops, PASS_ALWAYS) \ x(check_inodes, PASS_FSCK) \ x(check_extents, PASS_FSCK) \ + x(check_indirect_extents, PASS_FSCK) \ x(check_dirents, PASS_FSCK) \ x(check_xattrs, PASS_FSCK) \ x(check_root, PASS_FSCK) \ -- cgit v1.2.3 From 48f866e90f520855b6d941eebe46d75dbfbb9a81 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 22 Oct 2023 17:22:53 -0400 Subject: bcachefs: Fix bch2_prt_bitflags() This fixes an infinite loop when there's a set bit at position >= 32. Signed-off-by: Kent Overstreet --- fs/bcachefs/printbuf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/printbuf.c b/fs/bcachefs/printbuf.c index de41f9a14492..5e653eb81d54 100644 --- a/fs/bcachefs/printbuf.c +++ b/fs/bcachefs/printbuf.c @@ -415,11 +415,11 @@ void bch2_prt_bitflags(struct printbuf *out, while (list[nr]) nr++; - while (flags && (bit = __ffs(flags)) < nr) { + while (flags && (bit = __ffs64(flags)) < nr) { if (!first) bch2_prt_printf(out, ","); first = false; bch2_prt_printf(out, "%s", list[bit]); - flags ^= 1 << bit; + flags ^= BIT_ULL(bit); } } -- cgit v1.2.3 From ef435abd6a99206d9bb93462a1b0508e0d876adb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 22 Oct 2023 15:06:27 -0400 Subject: bcachefs: trivial extents.c refactoring Signed-off-by: Kent Overstreet --- fs/bcachefs/extents.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 38077b3886d7..ccb62fa22b04 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -992,10 +992,6 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; - struct bch_extent_crc_unpacked crc; - const struct bch_extent_ptr *ptr; - const struct bch_extent_stripe_ptr *ec; - struct bch_dev *ca; bool first = true; if (c) @@ -1006,9 +1002,9 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, prt_printf(out, " "); switch (__extent_entry_type(entry)) { - case BCH_EXTENT_ENTRY_ptr: - ptr = entry_to_ptr(entry); - ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev] + case BCH_EXTENT_ENTRY_ptr: { + const struct bch_extent_ptr *ptr = entry_to_ptr(entry); + struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev] ? bch_dev_bkey_exists(c, ptr->dev) : NULL; @@ -1030,10 +1026,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, prt_printf(out, " stale"); } break; + } case BCH_EXTENT_ENTRY_crc32: case BCH_EXTENT_ENTRY_crc64: - case BCH_EXTENT_ENTRY_crc128: - crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); + case BCH_EXTENT_ENTRY_crc128: { + struct bch_extent_crc_unpacked crc = + bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress %s", crc.compressed_size, @@ -1042,12 +1040,14 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, bch2_csum_types[crc.csum_type], bch2_compression_types[crc.compression_type]); break; - case BCH_EXTENT_ENTRY_stripe_ptr: - ec = &entry->stripe_ptr; + } + case BCH_EXTENT_ENTRY_stripe_ptr: { + const struct bch_extent_stripe_ptr *ec = &entry->stripe_ptr; prt_printf(out, "ec: idx %llu block %u", (u64) ec->idx, ec->block); break; + } default: prt_printf(out, "(invalid extent entry %.16llx)", *((u64 *) entry)); return; -- cgit v1.2.3 From 6ddedca2180b095aacca0f628e0d03a32477f68f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 22 Oct 2023 18:29:54 -0400 Subject: bcachefs: Guard against unknown compression options Since compression options now include compression level, proper validation is a bit more involved. This adds bch2_compression_opt_valid(), and plumbs it around appropriately. Signed-off-by: Kent Overstreet --- fs/bcachefs/compress.c | 10 ++++++++++ fs/bcachefs/compress.h | 34 +++++++++++++++++++++++++--------- fs/bcachefs/errcode.h | 1 + fs/bcachefs/inode.c | 8 +++++--- fs/bcachefs/opts.c | 3 +++ fs/bcachefs/opts.h | 1 + 6 files changed, 45 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c index 1480b64547b0..0e3981f42526 100644 --- a/fs/bcachefs/compress.c +++ b/fs/bcachefs/compress.c @@ -708,3 +708,13 @@ void bch2_opt_compression_to_text(struct printbuf *out, if (opt.level) prt_printf(out, ":%u", opt.level); } + +int bch2_opt_compression_validate(u64 v, struct printbuf *err) +{ + if (!bch2_compression_opt_valid(v)) { + prt_printf(err, "invalid compression opt %llu", v); + return -BCH_ERR_invalid_sb_opt_compression; + } + + return 0; +} diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h index 052ea303241f..b938fc936365 100644 --- a/fs/bcachefs/compress.h +++ b/fs/bcachefs/compress.h @@ -4,12 +4,18 @@ #include "extents_types.h" +static const unsigned __bch2_compression_opt_to_type[] = { +#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t, + BCH_COMPRESSION_OPTS() +#undef x +}; + struct bch_compression_opt { u8 type:4, level:4; }; -static inline struct bch_compression_opt bch2_compression_decode(unsigned v) +static inline struct bch_compression_opt __bch2_compression_decode(unsigned v) { return (struct bch_compression_opt) { .type = v & 15, @@ -17,17 +23,25 @@ static inline struct bch_compression_opt bch2_compression_decode(unsigned v) }; } +static inline bool bch2_compression_opt_valid(unsigned v) +{ + struct bch_compression_opt opt = __bch2_compression_decode(v); + + return opt.type < ARRAY_SIZE(__bch2_compression_opt_to_type) && !(!opt.type && opt.level); +} + +static inline struct bch_compression_opt bch2_compression_decode(unsigned v) +{ + return bch2_compression_opt_valid(v) + ? __bch2_compression_decode(v) + : (struct bch_compression_opt) { 0 }; +} + static inline unsigned bch2_compression_encode(struct bch_compression_opt opt) { return opt.type|(opt.level << 4); } -static const unsigned __bch2_compression_opt_to_type[] = { -#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t, - BCH_COMPRESSION_OPTS() -#undef x -}; - static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v) { return __bch2_compression_opt_to_type[bch2_compression_decode(v).type]; @@ -46,10 +60,12 @@ int bch2_fs_compress_init(struct bch_fs *); int bch2_opt_compression_parse(struct bch_fs *, const char *, u64 *, struct printbuf *); void bch2_opt_compression_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64); +int bch2_opt_compression_validate(u64, struct printbuf *); #define bch2_opt_compression (struct bch_opt_fn) { \ - .parse = bch2_opt_compression_parse, \ - .to_text = bch2_opt_compression_to_text, \ + .parse = bch2_opt_compression_parse, \ + .to_text = bch2_opt_compression_to_text, \ + .validate = bch2_opt_compression_validate, \ } #endif /* _BCACHEFS_COMPRESS_H */ diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 7cc083776a2e..3e9f09cea6c7 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -213,6 +213,7 @@ x(BCH_ERR_invalid_sb, invalid_sb_crypt) \ x(BCH_ERR_invalid_sb, invalid_sb_clean) \ x(BCH_ERR_invalid_sb, invalid_sb_quota) \ + x(BCH_ERR_invalid_sb, invalid_sb_opt_compression) \ x(BCH_ERR_invalid, invalid_bkey) \ x(BCH_ERR_operation_blocked, nocow_lock_blocked) \ x(EIO, btree_node_read_err) \ diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index bb3f443d8381..a3921c397ea2 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -6,6 +6,7 @@ #include "bkey_methods.h" #include "btree_update.h" #include "buckets.h" +#include "compress.h" #include "error.h" #include "extents.h" #include "extent_update.h" @@ -422,9 +423,10 @@ static int __bch2_inode_invalid(struct bkey_s_c k, struct printbuf *err) return -BCH_ERR_invalid_bkey; } - if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1) { - prt_printf(err, "invalid data checksum type (%u >= %u)", - unpacked.bi_compression, BCH_COMPRESSION_OPT_NR + 1); + if (unpacked.bi_compression && + !bch2_compression_opt_valid(unpacked.bi_compression - 1)) { + prt_printf(err, "invalid compression opt %u", + unpacked.bi_compression - 1); return -BCH_ERR_invalid_bkey; } diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c index 8294f56e45d5..b7722b623697 100644 --- a/fs/bcachefs/opts.c +++ b/fs/bcachefs/opts.c @@ -294,6 +294,9 @@ int bch2_opt_validate(const struct bch_option *opt, u64 v, struct printbuf *err) return -EINVAL; } + if (opt->fn.validate) + return opt->fn.validate(v, err); + return 0; } diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 16dd0f0622bc..2307cdd2a23c 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -74,6 +74,7 @@ enum opt_type { struct bch_opt_fn { int (*parse)(struct bch_fs *, const char *, u64 *, struct printbuf *); void (*to_text)(struct printbuf *, struct bch_fs *, struct bch_sb *, u64); + int (*validate)(u64, struct printbuf *); }; /** -- cgit v1.2.3 From a0bfe3b065cabc669933063cb5a9066b104be406 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 20 Oct 2023 13:32:42 -0400 Subject: bcachefs: move.c exports, refactoring Prep work for the new rebalance code - we need a few helpers exported. Signed-off-by: Kent Overstreet --- fs/bcachefs/move.c | 119 ++++++++++++++++++++++++++---------------------- fs/bcachefs/move.h | 22 ++++++++- fs/bcachefs/rebalance.c | 3 +- 3 files changed, 85 insertions(+), 59 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 38b076ff1906..12167791e34c 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -171,8 +171,8 @@ void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt, } } -static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt, - struct btree_trans *trans) +void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt, + struct btree_trans *trans) { unsigned sectors_pending = atomic_read(&ctxt->write_sectors); @@ -287,14 +287,13 @@ static int bch2_extent_drop_ptrs(struct btree_trans *trans, bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); } -static int bch2_move_extent(struct btree_trans *trans, - struct btree_iter *iter, - struct moving_context *ctxt, - struct move_bucket_in_flight *bucket_in_flight, - struct bch_io_opts io_opts, - enum btree_id btree_id, - struct bkey_s_c k, - struct data_update_opts data_opts) +int bch2_move_extent(struct btree_trans *trans, + struct btree_iter *iter, + struct moving_context *ctxt, + struct move_bucket_in_flight *bucket_in_flight, + struct bch_io_opts io_opts, + struct bkey_s_c k, + struct data_update_opts data_opts) { struct bch_fs *c = trans->c; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); @@ -356,7 +355,7 @@ static int bch2_move_extent(struct btree_trans *trans, io->rbio.bio.bi_end_io = move_read_endio; ret = bch2_data_update_init(trans, ctxt, &io->write, ctxt->wp, - io_opts, data_opts, btree_id, k); + io_opts, data_opts, iter->btree_id, k); if (ret && ret != -BCH_ERR_unwritten_extent_update) goto err_free_pages; @@ -371,6 +370,9 @@ static int bch2_move_extent(struct btree_trans *trans, io->write.ctxt = ctxt; io->write.op.end_io = move_write_done; + if (ctxt->rate) + bch2_ratelimit_increment(ctxt->rate, k.k->size); + if (ctxt->stats) { atomic64_inc(&ctxt->stats->keys_moved); atomic64_add(k.k->size, &ctxt->stats->sectors_moved); @@ -400,7 +402,7 @@ static int bch2_move_extent(struct btree_trans *trans, closure_get(&ctxt->cl); bch2_read_extent(trans, &io->rbio, bkey_start_pos(k.k), - btree_id, k, 0, + iter->btree_id, k, 0, BCH_READ_NODECODE| BCH_READ_LAST_FRAGMENT); return 0; @@ -464,9 +466,9 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, return &io_opts->fs_io_opts; } -static int bch2_move_get_io_opts_one(struct btree_trans *trans, - struct bch_io_opts *io_opts, - struct bkey_s_c extent_k) +int bch2_move_get_io_opts_one(struct btree_trans *trans, + struct bch_io_opts *io_opts, + struct bkey_s_c extent_k) { struct btree_iter iter; struct bkey_s_c k; @@ -497,8 +499,8 @@ static int bch2_move_get_io_opts_one(struct btree_trans *trans, return 0; } -static int move_ratelimit(struct btree_trans *trans, - struct moving_context *ctxt) +int bch2_move_ratelimit(struct btree_trans *trans, + struct moving_context *ctxt) { struct bch_fs *c = trans->c; u64 delay; @@ -545,7 +547,8 @@ static int move_ratelimit(struct btree_trans *trans, return 0; } -static int __bch2_move_data(struct moving_context *ctxt, +static int bch2_move_data_btree(struct btree_trans *trans, + struct moving_context *ctxt, struct bpos start, struct bpos end, move_pred_fn pred, void *arg, @@ -555,7 +558,6 @@ static int __bch2_move_data(struct moving_context *ctxt, struct per_snapshot_io_opts snapshot_io_opts; struct bch_io_opts *io_opts; struct bkey_buf sk; - struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; struct data_update_opts data_opts; @@ -577,7 +579,7 @@ static int __bch2_move_data(struct moving_context *ctxt, if (ctxt->rate) bch2_ratelimit_reset(ctxt->rate); - while (!move_ratelimit(trans, ctxt)) { + while (!bch2_move_ratelimit(trans, ctxt)) { bch2_trans_begin(trans); k = bch2_btree_iter_peek(&iter); @@ -616,7 +618,7 @@ static int __bch2_move_data(struct moving_context *ctxt, k = bkey_i_to_s_c(sk.k); ret2 = bch2_move_extent(trans, &iter, ctxt, NULL, - *io_opts, btree_id, k, data_opts); + *io_opts, k, data_opts); if (ret2) { if (bch2_err_matches(ret2, BCH_ERR_transaction_restart)) continue; @@ -630,9 +632,6 @@ static int __bch2_move_data(struct moving_context *ctxt, /* XXX signal failure */ goto next; } - - if (ctxt->rate) - bch2_ratelimit_increment(ctxt->rate, k.k->size); next: if (ctxt->stats) atomic64_add(k.k->size, &ctxt->stats->sectors_seen); @@ -641,48 +640,60 @@ next_nondata: } bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); bch2_bkey_buf_exit(&sk, c); per_snapshot_io_opts_exit(&snapshot_io_opts); return ret; } -int bch2_move_data(struct bch_fs *c, - enum btree_id start_btree_id, struct bpos start_pos, - enum btree_id end_btree_id, struct bpos end_pos, - struct bch_ratelimit *rate, - struct bch_move_stats *stats, - struct write_point_specifier wp, - bool wait_on_copygc, - move_pred_fn pred, void *arg) +int __bch2_move_data(struct btree_trans *trans, + struct moving_context *ctxt, + struct bbpos start, + struct bbpos end, + move_pred_fn pred, void *arg) { - struct moving_context ctxt; + struct bch_fs *c = trans->c; enum btree_id id; int ret = 0; - bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); - - for (id = start_btree_id; - id <= min_t(unsigned, end_btree_id, btree_id_nr_alive(c) - 1); + for (id = start.btree; + id <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1); id++) { - stats->btree_id = id; + ctxt->stats->btree_id = id; - if (id != BTREE_ID_extents && - id != BTREE_ID_reflink) + if (!btree_type_has_ptrs(id) || + !bch2_btree_id_root(c, id)->b) continue; - if (!bch2_btree_id_root(c, id)->b) - continue; - - ret = __bch2_move_data(&ctxt, - id == start_btree_id ? start_pos : POS_MIN, - id == end_btree_id ? end_pos : POS_MAX, + ret = bch2_move_data_btree(trans, ctxt, + id == start.btree ? start.pos : POS_MIN, + id == end.btree ? end.pos : POS_MAX, pred, arg, id); if (ret) break; } + return ret; +} + +int bch2_move_data(struct bch_fs *c, + struct bbpos start, + struct bbpos end, + struct bch_ratelimit *rate, + struct bch_move_stats *stats, + struct write_point_specifier wp, + bool wait_on_copygc, + move_pred_fn pred, void *arg) +{ + + struct btree_trans *trans; + struct moving_context ctxt; + int ret; + + bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); + trans = bch2_trans_get(c); + ret = __bch2_move_data(trans, &ctxt, start, end, pred, arg); + bch2_trans_put(trans); bch2_moving_ctxt_exit(&ctxt); return ret; @@ -739,7 +750,7 @@ int __bch2_evacuate_bucket(struct btree_trans *trans, goto err; } - while (!(ret = move_ratelimit(trans, ctxt))) { + while (!(ret = bch2_move_ratelimit(trans, ctxt))) { bch2_trans_begin(trans); ret = bch2_get_next_backpointer(trans, bucket, gen, @@ -791,7 +802,7 @@ int __bch2_evacuate_bucket(struct btree_trans *trans, ret = bch2_move_extent(trans, &iter, ctxt, bucket_in_flight, - io_opts, bp.btree_id, k, data_opts); + io_opts, k, data_opts); bch2_trans_iter_exit(trans, &iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) @@ -804,8 +815,6 @@ int __bch2_evacuate_bucket(struct btree_trans *trans, if (ret) goto err; - if (ctxt->rate) - bch2_ratelimit_increment(ctxt->rate, k.k->size); if (ctxt->stats) atomic64_add(k.k->size, &ctxt->stats->sectors_seen); } else { @@ -1087,8 +1096,8 @@ int bch2_data_job(struct bch_fs *c, ret = bch2_replicas_gc2(c) ?: ret; ret = bch2_move_data(c, - op.start_btree, op.start_pos, - op.end_btree, op.end_pos, + (struct bbpos) { op.start_btree, op.start_pos }, + (struct bbpos) { op.end_btree, op.end_pos }, NULL, stats, writepoint_hashed((unsigned long) current), @@ -1111,8 +1120,8 @@ int bch2_data_job(struct bch_fs *c, ret = bch2_replicas_gc2(c) ?: ret; ret = bch2_move_data(c, - op.start_btree, op.start_pos, - op.end_btree, op.end_pos, + (struct bbpos) { op.start_btree, op.start_pos }, + (struct bbpos) { op.end_btree, op.end_pos }, NULL, stats, writepoint_hashed((unsigned long) current), diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index aa4b65c4f960..67ca13f7e772 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -2,6 +2,7 @@ #ifndef _BCACHEFS_MOVE_H #define _BCACHEFS_MOVE_H +#include "bbpos.h" #include "bcachefs_ioctl.h" #include "btree_iter.h" #include "buckets.h" @@ -61,6 +62,9 @@ void bch2_moving_ctxt_init(struct moving_context *, struct bch_fs *, struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *); void bch2_moving_ctxt_do_pending_writes(struct moving_context *, struct btree_trans *); +void bch2_move_ctxt_wait_for_io(struct moving_context *, + struct btree_trans *); +int bch2_move_ratelimit(struct btree_trans *, struct moving_context *); /* Inodes in different snapshots may have different IO options: */ struct snapshot_io_opts_entry { @@ -87,12 +91,26 @@ static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opt struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *, struct per_snapshot_io_opts *, struct bkey_s_c); +int bch2_move_get_io_opts_one(struct btree_trans *, struct bch_io_opts *, struct bkey_s_c); int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *); +int bch2_move_extent(struct btree_trans *, + struct btree_iter *, + struct moving_context *, + struct move_bucket_in_flight *, + struct bch_io_opts, + struct bkey_s_c, + struct data_update_opts); + +int __bch2_move_data(struct btree_trans *, + struct moving_context *, + struct bbpos, + struct bbpos, + move_pred_fn, void *); int bch2_move_data(struct bch_fs *, - enum btree_id, struct bpos, - enum btree_id, struct bpos, + struct bbpos start, + struct bbpos end, struct bch_ratelimit *, struct bch_move_stats *, struct write_point_specifier, diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 568f1e8e7507..92403fa79f1f 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -254,8 +254,7 @@ static int bch2_rebalance_thread(void *arg) rebalance_work_reset(c); bch2_move_data(c, - 0, POS_MIN, - BTREE_ID_NR, POS_MAX, + BBPOS_MIN, BBPOS_MAX, /* ratelimiting disabled for now */ NULL, /* &r->pd.rate, */ &move_stats, -- cgit v1.2.3 From 633169035a7ccdfe3a9eba0202dc2135baa07c72 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 20 Oct 2023 13:32:42 -0400 Subject: bcachefs: moving_context now owns a btree_trans btree_trans and moving_context are used together, and having the moving_context owns the transaction object reduces some plumbing. Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 2 +- fs/bcachefs/move.c | 93 +++++++++++++++++++++-------------------------- fs/bcachefs/move.h | 27 ++++++-------- fs/bcachefs/movinggc.c | 36 ++++++++---------- 4 files changed, 70 insertions(+), 88 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 899ff46de8e0..9b42d37dc344 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -487,7 +487,7 @@ int bch2_data_update_init(struct btree_trans *trans, if (c->opts.nocow_enabled) { if (ctxt) { - move_ctxt_wait_event(ctxt, trans, + move_ctxt_wait_event(ctxt, (locked = bch2_bucket_nocow_trylock(&c->nocow_locks, PTR_BUCKET_POS(c, &p.ptr), 0)) || !atomic_read(&ctxt->read_sectors)); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 12167791e34c..570189eda6fd 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -157,13 +157,11 @@ static void move_read_endio(struct bio *bio) closure_put(&ctxt->cl); } -void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt, - struct btree_trans *trans) +void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt) { struct moving_io *io; - if (trans) - bch2_trans_unlock(trans); + bch2_trans_unlock(ctxt->trans); while ((io = bch2_moving_ctxt_next_pending_write(ctxt))) { list_del(&io->read_list); @@ -171,21 +169,20 @@ void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt, } } -void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt, - struct btree_trans *trans) +void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt) { unsigned sectors_pending = atomic_read(&ctxt->write_sectors); - move_ctxt_wait_event(ctxt, trans, + move_ctxt_wait_event(ctxt, !atomic_read(&ctxt->write_sectors) || atomic_read(&ctxt->write_sectors) != sectors_pending); } void bch2_moving_ctxt_exit(struct moving_context *ctxt) { - struct bch_fs *c = ctxt->c; + struct bch_fs *c = ctxt->trans->c; - move_ctxt_wait_event(ctxt, NULL, list_empty(&ctxt->reads)); + move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads)); closure_sync(&ctxt->cl); EBUG_ON(atomic_read(&ctxt->write_sectors)); @@ -203,6 +200,9 @@ void bch2_moving_ctxt_exit(struct moving_context *ctxt) mutex_lock(&c->moving_context_lock); list_del(&ctxt->list); mutex_unlock(&c->moving_context_lock); + + bch2_trans_put(ctxt->trans); + memset(ctxt, 0, sizeof(*ctxt)); } void bch2_moving_ctxt_init(struct moving_context *ctxt, @@ -214,7 +214,7 @@ void bch2_moving_ctxt_init(struct moving_context *ctxt, { memset(ctxt, 0, sizeof(*ctxt)); - ctxt->c = c; + ctxt->trans = bch2_trans_get(c); ctxt->fn = (void *) _RET_IP_; ctxt->rate = rate; ctxt->stats = stats; @@ -287,14 +287,14 @@ static int bch2_extent_drop_ptrs(struct btree_trans *trans, bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); } -int bch2_move_extent(struct btree_trans *trans, - struct btree_iter *iter, - struct moving_context *ctxt, +int bch2_move_extent(struct moving_context *ctxt, struct move_bucket_in_flight *bucket_in_flight, - struct bch_io_opts io_opts, + struct btree_iter *iter, struct bkey_s_c k, + struct bch_io_opts io_opts, struct data_update_opts data_opts) { + struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); struct moving_io *io; @@ -499,14 +499,13 @@ int bch2_move_get_io_opts_one(struct btree_trans *trans, return 0; } -int bch2_move_ratelimit(struct btree_trans *trans, - struct moving_context *ctxt) +int bch2_move_ratelimit(struct moving_context *ctxt) { - struct bch_fs *c = trans->c; + struct bch_fs *c = ctxt->trans->c; u64 delay; if (ctxt->wait_on_copygc) { - bch2_trans_unlock(trans); + bch2_trans_unlock(ctxt->trans); wait_event_killable(c->copygc_running_wq, !c->copygc_running || kthread_should_stop()); @@ -516,7 +515,7 @@ int bch2_move_ratelimit(struct btree_trans *trans, delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0; if (delay) { - bch2_trans_unlock(trans); + bch2_trans_unlock(ctxt->trans); set_current_state(TASK_INTERRUPTIBLE); } @@ -529,7 +528,7 @@ int bch2_move_ratelimit(struct btree_trans *trans, schedule_timeout(delay); if (unlikely(freezing(current))) { - move_ctxt_wait_event(ctxt, trans, list_empty(&ctxt->reads)); + move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads)); try_to_freeze(); } } while (delay); @@ -538,7 +537,7 @@ int bch2_move_ratelimit(struct btree_trans *trans, * XXX: these limits really ought to be per device, SSDs and hard drives * will want different limits */ - move_ctxt_wait_event(ctxt, trans, + move_ctxt_wait_event(ctxt, atomic_read(&ctxt->write_sectors) < c->opts.move_bytes_in_flight >> 9 && atomic_read(&ctxt->read_sectors) < c->opts.move_bytes_in_flight >> 9 && atomic_read(&ctxt->write_ios) < c->opts.move_ios_in_flight && @@ -547,14 +546,14 @@ int bch2_move_ratelimit(struct btree_trans *trans, return 0; } -static int bch2_move_data_btree(struct btree_trans *trans, - struct moving_context *ctxt, - struct bpos start, - struct bpos end, - move_pred_fn pred, void *arg, - enum btree_id btree_id) +static int bch2_move_data_btree(struct moving_context *ctxt, + struct bpos start, + struct bpos end, + move_pred_fn pred, void *arg, + enum btree_id btree_id) { - struct bch_fs *c = ctxt->c; + struct btree_trans *trans = ctxt->trans; + struct bch_fs *c = trans->c; struct per_snapshot_io_opts snapshot_io_opts; struct bch_io_opts *io_opts; struct bkey_buf sk; @@ -579,7 +578,7 @@ static int bch2_move_data_btree(struct btree_trans *trans, if (ctxt->rate) bch2_ratelimit_reset(ctxt->rate); - while (!bch2_move_ratelimit(trans, ctxt)) { + while (!bch2_move_ratelimit(ctxt)) { bch2_trans_begin(trans); k = bch2_btree_iter_peek(&iter); @@ -617,15 +616,14 @@ static int bch2_move_data_btree(struct btree_trans *trans, bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); - ret2 = bch2_move_extent(trans, &iter, ctxt, NULL, - *io_opts, k, data_opts); + ret2 = bch2_move_extent(ctxt, NULL, &iter, k, *io_opts, data_opts); if (ret2) { if (bch2_err_matches(ret2, BCH_ERR_transaction_restart)) continue; if (ret2 == -ENOMEM) { /* memory allocation failure, wait for some IO to finish */ - bch2_move_ctxt_wait_for_io(ctxt, trans); + bch2_move_ctxt_wait_for_io(ctxt); continue; } @@ -646,13 +644,12 @@ next_nondata: return ret; } -int __bch2_move_data(struct btree_trans *trans, - struct moving_context *ctxt, +int __bch2_move_data(struct moving_context *ctxt, struct bbpos start, struct bbpos end, move_pred_fn pred, void *arg) { - struct bch_fs *c = trans->c; + struct bch_fs *c = ctxt->trans->c; enum btree_id id; int ret = 0; @@ -665,7 +662,7 @@ int __bch2_move_data(struct btree_trans *trans, !bch2_btree_id_root(c, id)->b) continue; - ret = bch2_move_data_btree(trans, ctxt, + ret = bch2_move_data_btree(ctxt, id == start.btree ? start.pos : POS_MIN, id == end.btree ? end.pos : POS_MAX, pred, arg, id); @@ -686,26 +683,23 @@ int bch2_move_data(struct bch_fs *c, move_pred_fn pred, void *arg) { - struct btree_trans *trans; struct moving_context ctxt; int ret; bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); - trans = bch2_trans_get(c); - ret = __bch2_move_data(trans, &ctxt, start, end, pred, arg); - bch2_trans_put(trans); + ret = __bch2_move_data(&ctxt, start, end, pred, arg); bch2_moving_ctxt_exit(&ctxt); return ret; } -int __bch2_evacuate_bucket(struct btree_trans *trans, - struct moving_context *ctxt, +int __bch2_evacuate_bucket(struct moving_context *ctxt, struct move_bucket_in_flight *bucket_in_flight, struct bpos bucket, int gen, struct data_update_opts _data_opts) { - struct bch_fs *c = ctxt->c; + struct btree_trans *trans = ctxt->trans; + struct bch_fs *c = trans->c; struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); struct btree_iter iter; struct bkey_buf sk; @@ -750,7 +744,7 @@ int __bch2_evacuate_bucket(struct btree_trans *trans, goto err; } - while (!(ret = bch2_move_ratelimit(trans, ctxt))) { + while (!(ret = bch2_move_ratelimit(ctxt))) { bch2_trans_begin(trans); ret = bch2_get_next_backpointer(trans, bucket, gen, @@ -800,16 +794,15 @@ int __bch2_evacuate_bucket(struct btree_trans *trans, i++; } - ret = bch2_move_extent(trans, &iter, ctxt, - bucket_in_flight, - io_opts, k, data_opts); + ret = bch2_move_extent(ctxt, bucket_in_flight, + &iter, k, io_opts, data_opts); bch2_trans_iter_exit(trans, &iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret == -ENOMEM) { /* memory allocation failure, wait for some IO to finish */ - bch2_move_ctxt_wait_for_io(ctxt, trans); + bch2_move_ctxt_wait_for_io(ctxt); continue; } if (ret) @@ -865,14 +858,12 @@ int bch2_evacuate_bucket(struct bch_fs *c, struct write_point_specifier wp, bool wait_on_copygc) { - struct btree_trans *trans = bch2_trans_get(c); struct moving_context ctxt; int ret; bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); - ret = __bch2_evacuate_bucket(trans, &ctxt, NULL, bucket, gen, data_opts); + ret = __bch2_evacuate_bucket(&ctxt, NULL, bucket, gen, data_opts); bch2_moving_ctxt_exit(&ctxt); - bch2_trans_put(trans); return ret; } diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index 67ca13f7e772..39e762b103ca 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -12,7 +12,7 @@ struct bch_read_bio; struct moving_context { - struct bch_fs *c; + struct btree_trans *trans; struct list_head list; void *fn; @@ -38,10 +38,10 @@ struct moving_context { wait_queue_head_t wait; }; -#define move_ctxt_wait_event(_ctxt, _trans, _cond) \ +#define move_ctxt_wait_event(_ctxt, _cond) \ do { \ bool cond_finished = false; \ - bch2_moving_ctxt_do_pending_writes(_ctxt, _trans); \ + bch2_moving_ctxt_do_pending_writes(_ctxt); \ \ if (_cond) \ break; \ @@ -60,11 +60,9 @@ void bch2_moving_ctxt_init(struct moving_context *, struct bch_fs *, struct bch_ratelimit *, struct bch_move_stats *, struct write_point_specifier, bool); struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *); -void bch2_moving_ctxt_do_pending_writes(struct moving_context *, - struct btree_trans *); -void bch2_move_ctxt_wait_for_io(struct moving_context *, - struct btree_trans *); -int bch2_move_ratelimit(struct btree_trans *, struct moving_context *); +void bch2_moving_ctxt_do_pending_writes(struct moving_context *); +void bch2_move_ctxt_wait_for_io(struct moving_context *); +int bch2_move_ratelimit(struct moving_context *); /* Inodes in different snapshots may have different IO options: */ struct snapshot_io_opts_entry { @@ -95,16 +93,14 @@ int bch2_move_get_io_opts_one(struct btree_trans *, struct bch_io_opts *, struct int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *); -int bch2_move_extent(struct btree_trans *, - struct btree_iter *, - struct moving_context *, +int bch2_move_extent(struct moving_context *, struct move_bucket_in_flight *, - struct bch_io_opts, + struct btree_iter *, struct bkey_s_c, + struct bch_io_opts, struct data_update_opts); -int __bch2_move_data(struct btree_trans *, - struct moving_context *, +int __bch2_move_data(struct moving_context *, struct bbpos, struct bbpos, move_pred_fn, void *); @@ -117,8 +113,7 @@ int bch2_move_data(struct bch_fs *, bool, move_pred_fn, void *); -int __bch2_evacuate_bucket(struct btree_trans *, - struct moving_context *, +int __bch2_evacuate_bucket(struct moving_context *, struct move_bucket_in_flight *, struct bpos, int, struct data_update_opts); diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 4017120baeee..a2862e322658 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -101,8 +101,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, return ret; } -static void move_buckets_wait(struct btree_trans *trans, - struct moving_context *ctxt, +static void move_buckets_wait(struct moving_context *ctxt, struct buckets_in_flight *list, bool flush) { @@ -111,7 +110,7 @@ static void move_buckets_wait(struct btree_trans *trans, while ((i = list->first)) { if (flush) - move_ctxt_wait_event(ctxt, trans, !atomic_read(&i->count)); + move_ctxt_wait_event(ctxt, !atomic_read(&i->count)); if (atomic_read(&i->count)) break; @@ -129,7 +128,7 @@ static void move_buckets_wait(struct btree_trans *trans, kfree(i); } - bch2_trans_unlock(trans); + bch2_trans_unlock(ctxt->trans); } static bool bucket_in_flight(struct buckets_in_flight *list, @@ -140,11 +139,11 @@ static bool bucket_in_flight(struct buckets_in_flight *list, typedef DARRAY(struct move_bucket) move_buckets; -static int bch2_copygc_get_buckets(struct btree_trans *trans, - struct moving_context *ctxt, +static int bch2_copygc_get_buckets(struct moving_context *ctxt, struct buckets_in_flight *buckets_in_flight, move_buckets *buckets) { + struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_s_c k; @@ -152,7 +151,7 @@ static int bch2_copygc_get_buckets(struct btree_trans *trans, size_t saw = 0, in_flight = 0, not_movable = 0, sectors = 0; int ret; - move_buckets_wait(trans, ctxt, buckets_in_flight, false); + move_buckets_wait(ctxt, buckets_in_flight, false); ret = bch2_btree_write_buffer_flush(trans); if (bch2_fs_fatal_err_on(ret, c, "%s: error %s from bch2_btree_write_buffer_flush()", @@ -188,10 +187,10 @@ static int bch2_copygc_get_buckets(struct btree_trans *trans, } noinline -static int bch2_copygc(struct btree_trans *trans, - struct moving_context *ctxt, +static int bch2_copygc(struct moving_context *ctxt, struct buckets_in_flight *buckets_in_flight) { + struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; struct data_update_opts data_opts = { .btree_insert_flags = BCH_WATERMARK_copygc, @@ -202,7 +201,7 @@ static int bch2_copygc(struct btree_trans *trans, u64 moved = atomic64_read(&ctxt->stats->sectors_moved); int ret = 0; - ret = bch2_copygc_get_buckets(trans, ctxt, buckets_in_flight, &buckets); + ret = bch2_copygc_get_buckets(ctxt, buckets_in_flight, &buckets); if (ret) goto err; @@ -221,7 +220,7 @@ static int bch2_copygc(struct btree_trans *trans, break; } - ret = __bch2_evacuate_bucket(trans, ctxt, f, f->bucket.k.bucket, + ret = __bch2_evacuate_bucket(ctxt, f, f->bucket.k.bucket, f->bucket.k.gen, data_opts); if (ret) goto err; @@ -300,7 +299,6 @@ void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c) static int bch2_copygc_thread(void *arg) { struct bch_fs *c = arg; - struct btree_trans *trans; struct moving_context ctxt; struct bch_move_stats move_stats; struct io_clock *clock = &c->io_clock[WRITE]; @@ -317,7 +315,6 @@ static int bch2_copygc_thread(void *arg) } set_freezable(); - trans = bch2_trans_get(c); bch2_move_stats_init(&move_stats, "copygc"); bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats, @@ -325,16 +322,16 @@ static int bch2_copygc_thread(void *arg) false); while (!ret && !kthread_should_stop()) { - bch2_trans_unlock(trans); + bch2_trans_unlock(ctxt.trans); cond_resched(); if (!c->copy_gc_enabled) { - move_buckets_wait(trans, &ctxt, &buckets, true); + move_buckets_wait(&ctxt, &buckets, true); kthread_wait_freezable(c->copy_gc_enabled); } if (unlikely(freezing(current))) { - move_buckets_wait(trans, &ctxt, &buckets, true); + move_buckets_wait(&ctxt, &buckets, true); __refrigerator(false); continue; } @@ -345,7 +342,7 @@ static int bch2_copygc_thread(void *arg) if (wait > clock->max_slop) { c->copygc_wait_at = last; c->copygc_wait = last + wait; - move_buckets_wait(trans, &ctxt, &buckets, true); + move_buckets_wait(&ctxt, &buckets, true); trace_and_count(c, copygc_wait, c, wait, last + wait); bch2_kthread_io_clock_wait(clock, last + wait, MAX_SCHEDULE_TIMEOUT); @@ -355,15 +352,14 @@ static int bch2_copygc_thread(void *arg) c->copygc_wait = 0; c->copygc_running = true; - ret = bch2_copygc(trans, &ctxt, &buckets); + ret = bch2_copygc(&ctxt, &buckets); c->copygc_running = false; wake_up(&c->copygc_running_wq); } - move_buckets_wait(trans, &ctxt, &buckets, true); + move_buckets_wait(&ctxt, &buckets, true); rhashtable_destroy(&buckets.table); - bch2_trans_put(trans); bch2_moving_ctxt_exit(&ctxt); return 0; -- cgit v1.2.3 From d5eade93452bd1a892e2155e9bb723f04992bdac Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 23 Oct 2023 15:36:45 -0400 Subject: bcachefs: move: convert to bbpos Signed-off-by: Kent Overstreet --- fs/bcachefs/bbpos.h | 14 +------------- fs/bcachefs/bbpos_types.h | 18 ++++++++++++++++++ fs/bcachefs/chardev.c | 4 ++-- fs/bcachefs/data_update.c | 8 +++++--- fs/bcachefs/data_update.h | 1 + fs/bcachefs/move.c | 19 ++++++++----------- fs/bcachefs/move_types.h | 5 +++-- 7 files changed, 38 insertions(+), 31 deletions(-) create mode 100644 fs/bcachefs/bbpos_types.h (limited to 'fs') diff --git a/fs/bcachefs/bbpos.h b/fs/bcachefs/bbpos.h index 0038bc28ba8c..be2edced5213 100644 --- a/fs/bcachefs/bbpos.h +++ b/fs/bcachefs/bbpos.h @@ -2,22 +2,10 @@ #ifndef _BCACHEFS_BBPOS_H #define _BCACHEFS_BBPOS_H +#include "bbpos_types.h" #include "bkey_methods.h" #include "btree_cache.h" -struct bbpos { - enum btree_id btree; - struct bpos pos; -}; - -static inline struct bbpos BBPOS(enum btree_id btree, struct bpos pos) -{ - return (struct bbpos) { btree, pos }; -} - -#define BBPOS_MIN BBPOS(0, POS_MIN) -#define BBPOS_MAX BBPOS(BTREE_ID_NR - 1, POS_MAX) - static inline int bbpos_cmp(struct bbpos l, struct bbpos r) { return cmp_int(l.btree, r.btree) ?: bpos_cmp(l.pos, r.pos); diff --git a/fs/bcachefs/bbpos_types.h b/fs/bcachefs/bbpos_types.h new file mode 100644 index 000000000000..5198e94cf3b8 --- /dev/null +++ b/fs/bcachefs/bbpos_types.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BBPOS_TYPES_H +#define _BCACHEFS_BBPOS_TYPES_H + +struct bbpos { + enum btree_id btree; + struct bpos pos; +}; + +static inline struct bbpos BBPOS(enum btree_id btree, struct bpos pos) +{ + return (struct bbpos) { btree, pos }; +} + +#define BBPOS_MIN BBPOS(0, POS_MIN) +#define BBPOS_MAX BBPOS(BTREE_ID_NR - 1, POS_MAX) + +#endif /* _BCACHEFS_BBPOS_TYPES_H */ diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index f69e15dc699c..4bb88aefed12 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -332,8 +332,8 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf, struct bch_ioctl_data_event e = { .type = BCH_DATA_EVENT_PROGRESS, .p.data_type = ctx->stats.data_type, - .p.btree_id = ctx->stats.btree_id, - .p.pos = ctx->stats.pos, + .p.btree_id = ctx->stats.pos.btree, + .p.pos = ctx->stats.pos.pos, .p.sectors_done = atomic64_read(&ctx->stats.sectors_seen), .p.sectors_total = bch2_fs_usage_read_short(c).used, }; diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 9b42d37dc344..e445c441764c 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -281,11 +281,11 @@ next: } continue; nowork: - if (m->ctxt && m->ctxt->stats) { + if (m->stats && m->stats) { BUG_ON(k.k->p.offset <= iter.pos.offset); - atomic64_inc(&m->ctxt->stats->keys_raced); + atomic64_inc(&m->stats->keys_raced); atomic64_add(k.k->p.offset - iter.pos.offset, - &m->ctxt->stats->sectors_raced); + &m->stats->sectors_raced); } this_cpu_inc(c->counters[BCH_COUNTER_move_extent_fail]); @@ -439,6 +439,8 @@ int bch2_data_update_init(struct btree_trans *trans, bch2_bkey_buf_reassemble(&m->k, c, k); m->btree_id = btree_id; m->data_opts = data_opts; + m->ctxt = ctxt; + m->stats = ctxt->stats; bch2_write_op_init(&m->op, c, io_opts); m->op.pos = bkey_start_pos(k.k); diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h index 7ca1f98d7e94..9dc17b9d8379 100644 --- a/fs/bcachefs/data_update.h +++ b/fs/bcachefs/data_update.h @@ -23,6 +23,7 @@ struct data_update { struct bkey_buf k; struct data_update_opts data_opts; struct moving_context *ctxt; + struct bch_move_stats *stats; struct bch_write_op op; }; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 570189eda6fd..753755a627d5 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -367,7 +367,6 @@ int bch2_move_extent(struct moving_context *ctxt, BUG_ON(ret); - io->write.ctxt = ctxt; io->write.op.end_io = move_write_done; if (ctxt->rate) @@ -567,8 +566,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, if (ctxt->stats) { ctxt->stats->data_type = BCH_DATA_user; - ctxt->stats->btree_id = btree_id; - ctxt->stats->pos = start; + ctxt->stats->pos = BBPOS(btree_id, start); } bch2_trans_iter_init(trans, &iter, btree_id, start, @@ -595,7 +593,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, break; if (ctxt->stats) - ctxt->stats->pos = iter.pos; + ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos); if (!bkey_extent_is_direct_data(k.k)) goto next_nondata; @@ -656,7 +654,7 @@ int __bch2_move_data(struct moving_context *ctxt, for (id = start.btree; id <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1); id++) { - ctxt->stats->btree_id = id; + ctxt->stats->pos = BBPOS(id, POS_MIN); if (!btree_type_has_ptrs(id) || !bch2_btree_id_root(c, id)->b) @@ -894,7 +892,7 @@ static int bch2_move_btree(struct bch_fs *c, for (id = start_btree_id; id <= min_t(unsigned, end_btree_id, btree_id_nr_alive(c) - 1); id++) { - stats->btree_id = id; + stats->pos = BBPOS(id, POS_MIN); if (!bch2_btree_id_root(c, id)->b) continue; @@ -913,7 +911,7 @@ retry: bpos_cmp(b->key.k.p, end_pos)) > 0) break; - stats->pos = iter.pos; + stats->pos = BBPOS(iter.btree_id, iter.pos); if (!pred(c, arg, b, &io_opts, &data_opts)) goto next; @@ -1139,10 +1137,9 @@ static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, str prt_printf(out, "%s (%ps):", stats->name, ctxt->fn); prt_newline(out); - prt_printf(out, " data type %s btree_id %s position: ", - bch2_data_types[stats->data_type], - bch2_btree_id_str(stats->btree_id)); - bch2_bpos_to_text(out, stats->pos); + prt_printf(out, " data type %s position: ", + bch2_data_types[stats->data_type]); + bch2_bbpos_to_text(out, stats->pos); prt_newline(out); printbuf_indent_add(out, 2); diff --git a/fs/bcachefs/move_types.h b/fs/bcachefs/move_types.h index baf1f8570b3f..f402aa179bbe 100644 --- a/fs/bcachefs/move_types.h +++ b/fs/bcachefs/move_types.h @@ -2,10 +2,11 @@ #ifndef _BCACHEFS_MOVE_TYPES_H #define _BCACHEFS_MOVE_TYPES_H +#include "bbpos_types.h" + struct bch_move_stats { enum bch_data_type data_type; - enum btree_id btree_id; - struct bpos pos; + struct bbpos pos; struct list_head list; char name[32]; -- cgit v1.2.3 From 96a363a7e68832054f2a93249335fd3efd870aa3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 23 Oct 2023 16:21:54 -0400 Subject: bcachefs: move: move_stats refactoring data_progress_list is gone - it was redundant with moving_context_list The upcoming rebalance rewrite is going to have it using two different move_stats objects with the same moving_context, depending on whether it's scanning or using the rebalance_work btree - this patch plumbs stats around a bit differently so that will work. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 3 -- fs/bcachefs/data_update.c | 2 +- fs/bcachefs/move.c | 98 +++++++++++++++++++++++++---------------------- fs/bcachefs/move.h | 5 ++- fs/bcachefs/move_types.h | 3 +- fs/bcachefs/movinggc.c | 1 + fs/bcachefs/trace.c | 1 + fs/bcachefs/trace.h | 31 ++++++++++----- 8 files changed, 82 insertions(+), 62 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 1e0191197de1..bff6324447e1 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -939,9 +939,6 @@ struct bch_fs { struct list_head moving_context_list; struct mutex moving_context_lock; - struct list_head data_progress_list; - struct mutex data_progress_lock; - /* REBALANCE */ struct bch_fs_rebalance rebalance; diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index e445c441764c..4860f8293a4f 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -440,7 +440,7 @@ int bch2_data_update_init(struct btree_trans *trans, m->btree_id = btree_id; m->data_opts = data_opts; m->ctxt = ctxt; - m->stats = ctxt->stats; + m->stats = ctxt ? ctxt->stats : NULL; bch2_write_op_init(&m->op, c, io_opts); m->op.pos = bkey_start_pos(k.k); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 753755a627d5..1b15b010461a 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -60,20 +60,6 @@ static void trace_move_extent_alloc_mem_fail2(struct bch_fs *c, struct bkey_s_c } } -static void progress_list_add(struct bch_fs *c, struct bch_move_stats *stats) -{ - mutex_lock(&c->data_progress_lock); - list_add(&stats->list, &c->data_progress_list); - mutex_unlock(&c->data_progress_lock); -} - -static void progress_list_del(struct bch_fs *c, struct bch_move_stats *stats) -{ - mutex_lock(&c->data_progress_lock); - list_del(&stats->list); - mutex_unlock(&c->data_progress_lock); -} - struct moving_io { struct list_head read_list; struct list_head io_list; @@ -190,13 +176,6 @@ void bch2_moving_ctxt_exit(struct moving_context *ctxt) EBUG_ON(atomic_read(&ctxt->read_sectors)); EBUG_ON(atomic_read(&ctxt->read_ios)); - if (ctxt->stats) { - progress_list_del(c, ctxt->stats); - trace_move_data(c, - atomic64_read(&ctxt->stats->sectors_moved), - atomic64_read(&ctxt->stats->keys_moved)); - } - mutex_lock(&c->moving_context_lock); list_del(&ctxt->list); mutex_unlock(&c->moving_context_lock); @@ -231,16 +210,17 @@ void bch2_moving_ctxt_init(struct moving_context *ctxt, mutex_lock(&c->moving_context_lock); list_add(&ctxt->list, &c->moving_context_list); mutex_unlock(&c->moving_context_lock); +} - if (stats) { - progress_list_add(c, stats); - stats->data_type = BCH_DATA_user; - } +void bch2_move_stats_exit(struct bch_move_stats *stats, struct bch_fs *c) +{ + trace_move_data(c, stats); } void bch2_move_stats_init(struct bch_move_stats *stats, char *name) { memset(stats, 0, sizeof(*stats)); + stats->data_type = BCH_DATA_user; scnprintf(stats->name, sizeof(stats->name), "%s", name); } @@ -303,6 +283,8 @@ int bch2_move_extent(struct moving_context *ctxt, unsigned sectors = k.k->size, pages; int ret = -ENOMEM; + if (ctxt->stats) + ctxt->stats->pos = BBPOS(iter->btree_id, iter->pos); trace_move_extent2(c, k); bch2_data_update_opts_normalize(k, &data_opts); @@ -878,14 +860,18 @@ static int bch2_move_btree(struct bch_fs *c, { bool kthread = (current->flags & PF_KTHREAD) != 0; struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); - struct btree_trans *trans = bch2_trans_get(c); + struct moving_context ctxt; + struct btree_trans *trans; struct btree_iter iter; struct btree *b; enum btree_id id; struct data_update_opts data_opts; int ret = 0; - progress_list_add(c, stats); + bch2_moving_ctxt_init(&ctxt, c, NULL, stats, + writepoint_ptr(&c->btree_write_point), + true); + trans = ctxt.trans; stats->data_type = BCH_DATA_btree; @@ -933,14 +919,10 @@ next: break; } - bch2_trans_put(trans); - - if (ret) - bch_err_fn(c, ret); - + bch_err_fn(c, ret); + bch2_moving_ctxt_exit(&ctxt); bch2_btree_interior_updates_flush(c); - progress_list_del(c, stats); return ret; } @@ -1061,8 +1043,7 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats) mutex_unlock(&c->sb_lock); } - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; } @@ -1093,6 +1074,8 @@ int bch2_data_job(struct bch_fs *c, true, rereplicate_pred, c) ?: ret; ret = bch2_replicas_gc2(c) ?: ret; + + bch2_move_stats_exit(stats, c); break; case BCH_DATA_OP_MIGRATE: if (op.migrate.dev >= c->sb.nr_devices) @@ -1117,10 +1100,13 @@ int bch2_data_job(struct bch_fs *c, true, migrate_pred, &op) ?: ret; ret = bch2_replicas_gc2(c) ?: ret; + + bch2_move_stats_exit(stats, c); break; case BCH_DATA_OP_REWRITE_OLD_NODES: bch2_move_stats_init(stats, "rewrite_old_nodes"); ret = bch2_scan_old_btree_nodes(c, stats); + bch2_move_stats_exit(stats, c); break; default: ret = -EINVAL; @@ -1129,18 +1115,43 @@ int bch2_data_job(struct bch_fs *c, return ret; } -static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, struct moving_context *ctxt) +void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats) { - struct bch_move_stats *stats = ctxt->stats; - struct moving_io *io; + prt_printf(out, "%s: data type=%s pos=", + stats->name, + bch2_data_types[stats->data_type]); + bch2_bbpos_to_text(out, stats->pos); + prt_newline(out); + printbuf_indent_add(out, 2); - prt_printf(out, "%s (%ps):", stats->name, ctxt->fn); + prt_str(out, "keys moved: "); + prt_u64(out, atomic64_read(&stats->keys_moved)); prt_newline(out); - prt_printf(out, " data type %s position: ", - bch2_data_types[stats->data_type]); - bch2_bbpos_to_text(out, stats->pos); + prt_str(out, "keys raced: "); + prt_u64(out, atomic64_read(&stats->keys_raced)); + prt_newline(out); + + prt_str(out, "bytes seen: "); + prt_human_readable_u64(out, atomic64_read(&stats->sectors_seen) << 9); + prt_newline(out); + + prt_str(out, "bytes moved: "); + prt_human_readable_u64(out, atomic64_read(&stats->sectors_moved) << 9); prt_newline(out); + + prt_str(out, "bytes raced: "); + prt_human_readable_u64(out, atomic64_read(&stats->sectors_raced) << 9); + prt_newline(out); + + printbuf_indent_sub(out, 2); +} + +static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, struct moving_context *ctxt) +{ + struct moving_io *io; + + bch2_move_stats_to_text(out, ctxt->stats); printbuf_indent_add(out, 2); prt_printf(out, "reads: ios %u/%u sectors %u/%u", @@ -1181,7 +1192,4 @@ void bch2_fs_move_init(struct bch_fs *c) { INIT_LIST_HEAD(&c->moving_context_list); mutex_init(&c->moving_context_lock); - - INIT_LIST_HEAD(&c->data_progress_list); - mutex_init(&c->data_progress_lock); } diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index 39e762b103ca..1b1e8678bfae 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -127,7 +127,10 @@ int bch2_data_job(struct bch_fs *, struct bch_move_stats *, struct bch_ioctl_data); -void bch2_move_stats_init(struct bch_move_stats *stats, char *name); +void bch2_move_stats_to_text(struct printbuf *, struct bch_move_stats *); +void bch2_move_stats_exit(struct bch_move_stats *, struct bch_fs *); +void bch2_move_stats_init(struct bch_move_stats *, char *); + void bch2_fs_moving_ctxts_to_text(struct printbuf *, struct bch_fs *); void bch2_fs_move_init(struct bch_fs *); diff --git a/fs/bcachefs/move_types.h b/fs/bcachefs/move_types.h index f402aa179bbe..e22841ef31e4 100644 --- a/fs/bcachefs/move_types.h +++ b/fs/bcachefs/move_types.h @@ -7,13 +7,12 @@ struct bch_move_stats { enum bch_data_type data_type; struct bbpos pos; - struct list_head list; char name[32]; atomic64_t keys_moved; atomic64_t keys_raced; - atomic64_t sectors_moved; atomic64_t sectors_seen; + atomic64_t sectors_moved; atomic64_t sectors_raced; }; diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index a2862e322658..f73b9b7f4bf7 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -361,6 +361,7 @@ static int bch2_copygc_thread(void *arg) move_buckets_wait(&ctxt, &buckets, true); rhashtable_destroy(&buckets.table); bch2_moving_ctxt_exit(&ctxt); + bch2_move_stats_exit(&move_stats, c); return 0; } diff --git a/fs/bcachefs/trace.c b/fs/bcachefs/trace.c index 33efa6005c6f..dc48b52b01b4 100644 --- a/fs/bcachefs/trace.c +++ b/fs/bcachefs/trace.c @@ -7,6 +7,7 @@ #include "btree_locking.h" #include "btree_update_interior.h" #include "keylist.h" +#include "move_types.h" #include "opts.h" #include "six.h" diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 2308f49f3b2e..81f72b2add09 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -767,25 +767,36 @@ DEFINE_EVENT(bkey, move_extent_alloc_mem_fail, ); TRACE_EVENT(move_data, - TP_PROTO(struct bch_fs *c, u64 sectors_moved, - u64 keys_moved), - TP_ARGS(c, sectors_moved, keys_moved), + TP_PROTO(struct bch_fs *c, + struct bch_move_stats *stats), + TP_ARGS(c, stats), TP_STRUCT__entry( - __field(dev_t, dev ) - __field(u64, sectors_moved ) + __field(dev_t, dev ) __field(u64, keys_moved ) + __field(u64, keys_raced ) + __field(u64, sectors_seen ) + __field(u64, sectors_moved ) + __field(u64, sectors_raced ) ), TP_fast_assign( - __entry->dev = c->dev; - __entry->sectors_moved = sectors_moved; - __entry->keys_moved = keys_moved; + __entry->dev = c->dev; + __entry->keys_moved = atomic64_read(&stats->keys_moved); + __entry->keys_raced = atomic64_read(&stats->keys_raced); + __entry->sectors_seen = atomic64_read(&stats->sectors_seen); + __entry->sectors_moved = atomic64_read(&stats->sectors_moved); + __entry->sectors_raced = atomic64_read(&stats->sectors_raced); ), - TP_printk("%d,%d sectors_moved %llu keys_moved %llu", + TP_printk("%d,%d keys moved %llu raced %llu" + "sectors seen %llu moved %llu raced %llu", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->sectors_moved, __entry->keys_moved) + __entry->keys_moved, + __entry->keys_raced, + __entry->sectors_seen, + __entry->sectors_moved, + __entry->sectors_raced) ); TRACE_EVENT(evacuate_bucket, -- cgit v1.2.3 From 55c11a159d3ca4ca7f9d5c1275d0768474b12195 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 20 Oct 2023 14:05:31 -0400 Subject: bcachefs: bch2_inum_opts_get() New helper for new rebalance code Signed-off-by: Kent Overstreet --- fs/bcachefs/inode.c | 12 ++++++++++++ fs/bcachefs/inode.h | 1 + 2 files changed, 13 insertions(+) (limited to 'fs') diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index a3921c397ea2..23fcd442c514 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -981,6 +981,18 @@ void bch2_inode_opts_get(struct bch_io_opts *opts, struct bch_fs *c, opts->compression = opts->background_compression = opts->data_checksum = opts->erasure_code = 0; } +int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_io_opts *opts) +{ + struct bch_inode_unpacked inode; + int ret = lockrestart_do(trans, bch2_inode_find_by_inum_trans(trans, inum, &inode)); + + if (ret) + return ret; + + bch2_inode_opts_get(opts, trans->c, &inode); + return 0; +} + int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) { struct bch_fs *c = trans->c; diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index a7464e1b6960..2781e3281583 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -200,6 +200,7 @@ void bch2_inode_nlink_dec(struct btree_trans *, struct bch_inode_unpacked *); struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *); void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *, struct bch_inode_unpacked *); +int bch2_inum_opts_get(struct btree_trans*, subvol_inum, struct bch_io_opts *); int bch2_inode_rm_snapshot(struct btree_trans *, u64, u32); int bch2_delete_dead_inodes(struct bch_fs *); -- cgit v1.2.3 From fb3f57bb1177ae4d5550bbb431f90ebf277329e8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 20 Oct 2023 13:33:14 -0400 Subject: bcachefs: rebalance_work This adds a new btree, rebalance_work, to eliminate scanning required for finding extents that need work done on them in the background - i.e. for the background_target and background_compression options. rebalance_work is a bitset btree, where a KEY_TYPE_set corresponds to an extent in the extents or reflink btree at the same pos. A new extent field is added, bch_extent_rebalance, which indicates that this extent has work that needs to be done in the background - and which options to use. This allows per-inode options to be propagated to indirect extents - at least in some circumstances. In this patch, changing IO options on a file will not propagate the new options to indirect extents pointed to by that file. Updating (setting/clearing) the rebalance_work btree is done by the extent trigger, which looks at the bch_extent_rebalance field. Scanning is still requrired after changing IO path options - either just for a given inode, or for the whole filesystem. We indicate that scanning is required by adding a KEY_TYPE_cookie key to the rebalance_work btree: the cookie counter is so that we can detect that scanning is still required when an option has been flipped mid-way through an existing scan. Future possible work: - Propagate options to indirect extents when being changed - Add other IO path options - nr_replicas, ec, to rebalance_work so they can be applied in the background when they change - Add a counter, for bcachefs fs usage output, showing the pending amount of rebalance work: we'll probably want to do this after the disk space accounting rewrite (moving it to a new btree) Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/bcachefs_format.h | 34 +-- fs/bcachefs/buckets.c | 10 + fs/bcachefs/compress.c | 18 +- fs/bcachefs/compress.h | 2 + fs/bcachefs/data_update.c | 11 +- fs/bcachefs/extents.c | 155 +++++++++++- fs/bcachefs/extents.h | 20 ++ fs/bcachefs/io_misc.c | 11 +- fs/bcachefs/io_write.c | 20 +- fs/bcachefs/rebalance.c | 553 +++++++++++++++++++++++++----------------- fs/bcachefs/rebalance.h | 9 +- fs/bcachefs/rebalance_types.h | 31 ++- fs/bcachefs/recovery.c | 1 + fs/bcachefs/recovery_types.h | 1 + fs/bcachefs/reflink.c | 21 +- fs/bcachefs/sysfs.c | 14 +- fs/bcachefs/xattr.c | 2 +- 18 files changed, 599 insertions(+), 315 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index bff6324447e1..68f0ff03c28a 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -464,6 +464,7 @@ enum gc_phase { GC_PHASE_BTREE_snapshot_trees, GC_PHASE_BTREE_deleted_inodes, GC_PHASE_BTREE_logged_ops, + GC_PHASE_BTREE_rebalance_work, GC_PHASE_PENDING_DELETE, }; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 99749f3315fe..e04999c57892 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -613,31 +613,17 @@ struct bch_extent_stripe_ptr { #endif }; -struct bch_extent_reservation { -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u64 type:6, - unused:22, - replicas:4, - generation:32; -#elif defined (__BIG_ENDIAN_BITFIELD) - __u64 generation:32, - replicas:4, - unused:22, - type:6; -#endif -}; - struct bch_extent_rebalance { #if defined(__LITTLE_ENDIAN_BITFIELD) - __u64 type:7, - unused:33, - compression:8, + __u64 type:6, + unused:34, + compression:8, /* enum bch_compression_opt */ target:16; #elif defined (__BIG_ENDIAN_BITFIELD) __u64 target:16, compression:8, - unused:33, - type:7; + unused:34, + type:6; #endif }; @@ -1682,7 +1668,9 @@ struct bch_sb_field_journal_seq_blacklist { x(snapshot_skiplists, BCH_VERSION(1, 1), \ BIT_ULL(BCH_RECOVERY_PASS_check_snapshots)) \ x(deleted_inodes, BCH_VERSION(1, 2), \ - BIT_ULL(BCH_RECOVERY_PASS_check_inodes)) + BIT_ULL(BCH_RECOVERY_PASS_check_inodes)) \ + x(rebalance_work, BCH_VERSION(1, 3), \ + BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, @@ -1693,7 +1681,7 @@ enum bcachefs_metadata_version { }; static const __maybe_unused -unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_major_minor; +unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_rebalance_work; #define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1) @@ -2306,7 +2294,9 @@ enum btree_id_flags { BIT_ULL(KEY_TYPE_set)) \ x(logged_ops, 17, 0, \ BIT_ULL(KEY_TYPE_logged_op_truncate)| \ - BIT_ULL(KEY_TYPE_logged_op_finsert)) + BIT_ULL(KEY_TYPE_logged_op_finsert)) \ + x(rebalance_work, 18, BTREE_ID_SNAPSHOTS, \ + BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie)) enum btree_id { #define x(name, nr, ...) BTREE_ID_##name = nr, diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 5c1eca183243..a8af803e7289 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1536,6 +1536,16 @@ int bch2_trans_mark_extent(struct btree_trans *trans, struct bkey_s_c old, struct bkey_i *new, unsigned flags) { + struct bch_fs *c = trans->c; + int mod = (int) bch2_bkey_needs_rebalance(c, bkey_i_to_s_c(new)) - + (int) bch2_bkey_needs_rebalance(c, old); + + if (mod) { + int ret = bch2_btree_bit_mod(trans, BTREE_ID_rebalance_work, new->k.p, mod > 0); + if (ret) + return ret; + } + return trigger_run_overwrite_then_insert(__trans_mark_extent, trans, btree_id, level, old, new, flags); } diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c index 0e3981f42526..a8b148ec2a2b 100644 --- a/fs/bcachefs/compress.c +++ b/fs/bcachefs/compress.c @@ -697,18 +697,26 @@ err: return ret; } -void bch2_opt_compression_to_text(struct printbuf *out, - struct bch_fs *c, - struct bch_sb *sb, - u64 v) +void bch2_compression_opt_to_text(struct printbuf *out, u64 v) { struct bch_compression_opt opt = bch2_compression_decode(v); - prt_str(out, bch2_compression_opts[opt.type]); + if (opt.type < BCH_COMPRESSION_OPT_NR) + prt_str(out, bch2_compression_opts[opt.type]); + else + prt_printf(out, "(unknown compression opt %u)", opt.type); if (opt.level) prt_printf(out, ":%u", opt.level); } +void bch2_opt_compression_to_text(struct printbuf *out, + struct bch_fs *c, + struct bch_sb *sb, + u64 v) +{ + return bch2_compression_opt_to_text(out, v); +} + int bch2_opt_compression_validate(u64 v, struct printbuf *err) { if (!bch2_compression_opt_valid(v)) { diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h index b938fc936365..607fd5e232c9 100644 --- a/fs/bcachefs/compress.h +++ b/fs/bcachefs/compress.h @@ -58,6 +58,8 @@ int bch2_check_set_has_compressed_data(struct bch_fs *, unsigned); void bch2_fs_compress_exit(struct bch_fs *); int bch2_fs_compress_init(struct bch_fs *); +void bch2_compression_opt_to_text(struct printbuf *, u64); + int bch2_opt_compression_parse(struct bch_fs *, const char *, u64 *, struct printbuf *); void bch2_opt_compression_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64); int bch2_opt_compression_validate(u64, struct printbuf *); diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 4860f8293a4f..d116f2f03db2 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -13,6 +13,7 @@ #include "keylist.h" #include "move.h" #include "nocow_locking.h" +#include "rebalance.h" #include "subvolume.h" #include "trace.h" @@ -251,11 +252,11 @@ restart_drop_extra_replicas: ret = bch2_insert_snapshot_whiteouts(trans, m->btree_id, k.k->p, bkey_start_pos(&insert->k)) ?: bch2_insert_snapshot_whiteouts(trans, m->btree_id, - k.k->p, insert->k.p); - if (ret) - goto err; - - ret = bch2_trans_update(trans, &iter, insert, + k.k->p, insert->k.p) ?: + bch2_bkey_set_needs_rebalance(c, insert, + op->opts.background_target, + op->opts.background_compression) ?: + bch2_trans_update(trans, &iter, insert, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: bch2_trans_commit(trans, &op->res, NULL, diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index ccb62fa22b04..0c60d49c3599 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -13,6 +13,7 @@ #include "btree_iter.h" #include "buckets.h" #include "checksum.h" +#include "compress.h" #include "debug.h" #include "disk_groups.h" #include "error.h" @@ -757,18 +758,6 @@ static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs, return i; } -static void extent_entry_drop(struct bkey_s k, union bch_extent_entry *entry) -{ - union bch_extent_entry *next = extent_entry_next(entry); - - /* stripes have ptrs, but their layout doesn't work with this code */ - BUG_ON(k.k->type == KEY_TYPE_stripe); - - memmove_u64s_down(entry, next, - (u64 *) bkey_val_end(k) - (u64 *) next); - k.k->u64s -= (u64 *) next - (u64 *) entry; -} - /* * Returns pointer to the next entry after the one being dropped: */ @@ -1048,6 +1037,18 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, (u64) ec->idx, ec->block); break; } + case BCH_EXTENT_ENTRY_rebalance: { + const struct bch_extent_rebalance *r = &entry->rebalance; + + prt_str(out, "rebalance: target "); + if (c) + bch2_target_to_text(out, c, r->target); + else + prt_printf(out, "%u", r->target); + prt_str(out, " compression "); + bch2_compression_opt_to_text(out, r->compression); + break; + } default: prt_printf(out, "(invalid extent entry %.16llx)", *((u64 *) entry)); return; @@ -1223,9 +1224,18 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k, } have_ec = true; break; - case BCH_EXTENT_ENTRY_rebalance: + case BCH_EXTENT_ENTRY_rebalance: { + const struct bch_extent_rebalance *r = &entry->rebalance; + + if (!bch2_compression_opt_valid(r->compression)) { + struct bch_compression_opt opt = __bch2_compression_decode(r->compression); + prt_printf(err, "invalid compression opt %u:%u", + opt.type, opt.level); + return -BCH_ERR_invalid_bkey; + } break; } + } } if (!nr_ptrs) { @@ -1289,6 +1299,125 @@ void bch2_ptr_swab(struct bkey_s k) } } +const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + + bkey_extent_entry_for_each(ptrs, entry) + if (__extent_entry_type(entry) == BCH_EXTENT_ENTRY_rebalance) + return &entry->rebalance; + + return NULL; +} + +unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k, + unsigned target, unsigned compression) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + unsigned rewrite_ptrs = 0; + + if (compression) { + unsigned compression_type = bch2_compression_opt_to_type(compression); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + unsigned i = 0; + + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) { + rewrite_ptrs = 0; + goto incompressible; + } + + if (!p.ptr.cached && p.crc.compression_type != compression_type) + rewrite_ptrs |= 1U << i; + i++; + } + } +incompressible: + if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) { + const struct bch_extent_ptr *ptr; + unsigned i = 0; + + bkey_for_each_ptr(ptrs, ptr) { + if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, target)) + rewrite_ptrs |= 1U << i; + i++; + } + } + + return rewrite_ptrs; +} + +bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k) +{ + const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); + + /* + * If it's an indirect extent, we don't delete the rebalance entry when + * done so that we know what options were applied - check if it still + * needs work done: + */ + if (r && + k.k->type == KEY_TYPE_reflink_v && + !bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression)) + r = NULL; + + return r != NULL; +} + +int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k, + unsigned target, unsigned compression) +{ + struct bkey_s k = bkey_i_to_s(_k); + struct bch_extent_rebalance *r; + bool needs_rebalance; + + if (!bkey_extent_is_direct_data(k.k)) + return 0; + + /* get existing rebalance entry: */ + r = (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c); + if (r) { + if (k.k->type == KEY_TYPE_reflink_v) { + /* + * indirect extents: existing options take precedence, + * so that we don't move extents back and forth if + * they're referenced by different inodes with different + * options: + */ + if (r->target) + target = r->target; + if (r->compression) + compression = r->compression; + } + + r->target = target; + r->compression = compression; + } + + needs_rebalance = bch2_bkey_ptrs_need_rebalance(c, k.s_c, target, compression); + + if (needs_rebalance && !r) { + union bch_extent_entry *new = bkey_val_end(k); + + new->rebalance.type = 1U << BCH_EXTENT_ENTRY_rebalance; + new->rebalance.compression = compression; + new->rebalance.target = target; + new->rebalance.unused = 0; + k.k->u64s += extent_entry_u64s(new); + } else if (!needs_rebalance && r && k.k->type != KEY_TYPE_reflink_v) { + /* + * For indirect extents, don't delete the rebalance entry when + * we're finished so that we know we specifically moved it or + * compressed it to its current location/compression type + */ + extent_entry_drop(k, (union bch_extent_entry *) r); + } + + return 0; +} + /* Generic extent code: */ int bch2_cut_front_s(struct bpos where, struct bkey_s k) diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index ef1b9f18719d..9110acae7e3c 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -89,6 +89,18 @@ static inline void __extent_entry_insert(struct bkey_i *k, memcpy_u64s_small(dst, new, extent_entry_u64s(new)); } +static inline void extent_entry_drop(struct bkey_s k, union bch_extent_entry *entry) +{ + union bch_extent_entry *next = extent_entry_next(entry); + + /* stripes have ptrs, but their layout doesn't work with this code */ + BUG_ON(k.k->type == KEY_TYPE_stripe); + + memmove_u64s_down(entry, next, + (u64 *) bkey_val_end(k) - (u64 *) next); + k.k->u64s -= (u64 *) next - (u64 *) entry; +} + static inline bool extent_entry_is_ptr(const union bch_extent_entry *e) { return extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr; @@ -698,6 +710,14 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *, struct bkey_s_c, void bch2_ptr_swab(struct bkey_s); +const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c); +unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bkey_s_c, + unsigned, unsigned); +bool bch2_bkey_needs_rebalance(struct bch_fs *, struct bkey_s_c); + +int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bkey_i *, + unsigned, unsigned); + /* Generic extent code: */ enum bch_extent_overlap { diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 119834cb8f9e..0979d5e05713 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -16,6 +16,7 @@ #include "io_misc.h" #include "io_write.h" #include "logged_ops.h" +#include "rebalance.h" #include "subvolume.h" /* Overwrites whatever was present with zeroes: */ @@ -355,6 +356,7 @@ static int __bch2_resume_logged_op_finsert(struct btree_trans *trans, struct btree_iter iter; struct bkey_i_logged_op_finsert *op = bkey_i_to_logged_op_finsert(op_k); subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) }; + struct bch_io_opts opts; u64 dst_offset = le64_to_cpu(op->v.dst_offset); u64 src_offset = le64_to_cpu(op->v.src_offset); s64 shift = dst_offset - src_offset; @@ -363,6 +365,10 @@ static int __bch2_resume_logged_op_finsert(struct btree_trans *trans, bool insert = shift > 0; int ret = 0; + ret = bch2_inum_opts_get(trans, inum, &opts); + if (ret) + return ret; + bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, POS(inum.inum, 0), BTREE_ITER_INTENT); @@ -443,7 +449,10 @@ case LOGGED_OP_FINSERT_shift_extents: op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset); - ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?: + ret = bch2_bkey_set_needs_rebalance(c, copy, + opts.background_target, + opts.background_compression) ?: + bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?: bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?: bch2_logged_op_update(trans, &op->k_i) ?: bch2_trans_commit(trans, &disk_res, NULL, BTREE_INSERT_NOFAIL); diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index f7461f60d760..6d9c777213e3 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -351,10 +351,13 @@ static int bch2_write_index_default(struct bch_write_op *op) bkey_start_pos(&sk.k->k), BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - ret = bch2_extent_update(trans, inum, &iter, sk.k, - &op->res, - op->new_i_size, &op->i_sectors_delta, - op->flags & BCH_WRITE_CHECK_ENOSPC); + ret = bch2_bkey_set_needs_rebalance(c, sk.k, + op->opts.background_target, + op->opts.background_compression) ?: + bch2_extent_update(trans, inum, &iter, sk.k, + &op->res, + op->new_i_size, &op->i_sectors_delta, + op->flags & BCH_WRITE_CHECK_ENOSPC); bch2_trans_iter_exit(trans, &iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) @@ -495,7 +498,6 @@ static void __bch2_write_index(struct bch_write_op *op) { struct bch_fs *c = op->c; struct keylist *keys = &op->insert_keys; - struct bkey_i *k; unsigned dev; int ret = 0; @@ -505,14 +507,6 @@ static void __bch2_write_index(struct bch_write_op *op) goto err; } - /* - * probably not the ideal place to hook this in, but I don't - * particularly want to plumb io_opts all the way through the btree - * update stack right now - */ - for_each_keylist_key(keys, k) - bch2_rebalance_add_key(c, bkey_i_to_s_c(k), &op->opts); - if (!bch2_keylist_empty(keys)) { u64 sectors_start = keylist_sectors(keys); diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 92403fa79f1f..6ee4d2e02073 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -3,13 +3,18 @@ #include "bcachefs.h" #include "alloc_foreground.h" #include "btree_iter.h" +#include "btree_update.h" +#include "btree_write_buffer.h" #include "buckets.h" #include "clock.h" #include "compress.h" #include "disk_groups.h" #include "errcode.h" +#include "error.h" +#include "inode.h" #include "move.h" #include "rebalance.h" +#include "subvolume.h" #include "super-io.h" #include "trace.h" @@ -17,301 +22,399 @@ #include #include -/* - * Check if an extent should be moved: - * returns -1 if it should not be moved, or - * device of pointer that should be moved, if known, or INT_MAX if unknown - */ -static bool rebalance_pred(struct bch_fs *c, void *arg, - struct bkey_s_c k, - struct bch_io_opts *io_opts, - struct data_update_opts *data_opts) -{ - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - unsigned i; +#define REBALANCE_WORK_SCAN_OFFSET (U64_MAX - 1) - data_opts->rewrite_ptrs = 0; - data_opts->target = io_opts->background_target; - data_opts->extra_replicas = 0; - data_opts->btree_insert_flags = 0; - - if (io_opts->background_compression && - !bch2_bkey_is_incompressible(k)) { - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - - i = 0; - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - if (!p.ptr.cached && - p.crc.compression_type != - bch2_compression_opt_to_type(io_opts->background_compression)) - data_opts->rewrite_ptrs |= 1U << i; - i++; - } - } - - if (io_opts->background_target) { - const struct bch_extent_ptr *ptr; +static const char * const bch2_rebalance_state_strs[] = { +#define x(t) #t, + BCH_REBALANCE_STATES() + NULL +#undef x +}; - i = 0; - bkey_for_each_ptr(ptrs, ptr) { - if (!ptr->cached && - !bch2_dev_in_target(c, ptr->dev, io_opts->background_target) && - bch2_target_accepts_data(c, BCH_DATA_user, io_opts->background_target)) - data_opts->rewrite_ptrs |= 1U << i; - i++; - } - } +static int __bch2_set_rebalance_needs_scan(struct btree_trans *trans, u64 inum) +{ + struct btree_iter iter; + struct bkey_s_c k; + struct bkey_i_cookie *cookie; + u64 v; + int ret; - return data_opts->rewrite_ptrs != 0; + bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work, + SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), + BTREE_ITER_INTENT); + k = bch2_btree_iter_peek_slot(&iter); + ret = bkey_err(k); + if (ret) + goto err; + + v = k.k->type == KEY_TYPE_cookie + ? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie) + : 0; + + cookie = bch2_trans_kmalloc(trans, sizeof(*cookie)); + ret = PTR_ERR_OR_ZERO(cookie); + if (ret) + goto err; + + bkey_cookie_init(&cookie->k_i); + cookie->k.p = iter.pos; + cookie->v.cookie = cpu_to_le64(v + 1); + + ret = bch2_trans_update(trans, &iter, &cookie->k_i, 0); +err: + bch2_trans_iter_exit(trans, &iter); + return ret; } -void bch2_rebalance_add_key(struct bch_fs *c, - struct bkey_s_c k, - struct bch_io_opts *io_opts) +int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum) { - struct data_update_opts update_opts = { 0 }; - struct bkey_ptrs_c ptrs; - const struct bch_extent_ptr *ptr; - unsigned i; + int ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, + __bch2_set_rebalance_needs_scan(trans, inum)); + rebalance_wakeup(c); + return ret; +} - if (!rebalance_pred(c, NULL, k, io_opts, &update_opts)) - return; - - i = 0; - ptrs = bch2_bkey_ptrs_c(k); - bkey_for_each_ptr(ptrs, ptr) { - if ((1U << i) && update_opts.rewrite_ptrs) - if (atomic64_add_return(k.k->size, - &bch_dev_bkey_exists(c, ptr->dev)->rebalance_work) == - k.k->size) - rebalance_wakeup(c); - i++; - } +int bch2_set_fs_needs_rebalance(struct bch_fs *c) +{ + return bch2_set_rebalance_needs_scan(c, 0); } -void bch2_rebalance_add_work(struct bch_fs *c, u64 sectors) +static int bch2_clear_rebalance_needs_scan(struct btree_trans *trans, u64 inum, u64 cookie) { - if (atomic64_add_return(sectors, &c->rebalance.work_unknown_dev) == - sectors) - rebalance_wakeup(c); + struct btree_iter iter; + struct bkey_s_c k; + u64 v; + int ret; + + bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work, + SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), + BTREE_ITER_INTENT); + k = bch2_btree_iter_peek_slot(&iter); + ret = bkey_err(k); + if (ret) + goto err; + + v = k.k->type == KEY_TYPE_cookie + ? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie) + : 0; + + if (v == cookie) + ret = bch2_btree_delete_at(trans, &iter, 0); +err: + bch2_trans_iter_exit(trans, &iter); + return ret; } -struct rebalance_work { - int dev_most_full_idx; - unsigned dev_most_full_percent; - u64 dev_most_full_work; - u64 dev_most_full_capacity; - u64 total_work; -}; +static struct bkey_s_c next_rebalance_entry(struct btree_trans *trans, + struct btree_iter *work_iter) +{ + return !kthread_should_stop() + ? bch2_btree_iter_peek(work_iter) + : bkey_s_c_null; +} -static void rebalance_work_accumulate(struct rebalance_work *w, - u64 dev_work, u64 unknown_dev, u64 capacity, int idx) +static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k) { - unsigned percent_full; - u64 work = dev_work + unknown_dev; + struct bkey_i *n = bch2_bkey_make_mut(trans, iter, &k, 0); + int ret = PTR_ERR_OR_ZERO(n); + if (ret) + return ret; - /* avoid divide by 0 */ - if (!capacity) - return; + extent_entry_drop(bkey_i_to_s(n), + (void *) bch2_bkey_rebalance_opts(bkey_i_to_s_c(n))); + return bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); +} + +static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, + struct bpos work_pos, + struct btree_iter *extent_iter, + struct data_update_opts *data_opts) +{ + struct bch_fs *c = trans->c; + struct bkey_s_c k; + + bch2_trans_iter_exit(trans, extent_iter); + bch2_trans_iter_init(trans, extent_iter, + work_pos.inode ? BTREE_ID_extents : BTREE_ID_reflink, + work_pos, + BTREE_ITER_ALL_SNAPSHOTS); + k = bch2_btree_iter_peek_slot(extent_iter); + if (bkey_err(k)) + return k; + + const struct bch_extent_rebalance *r = k.k ? bch2_bkey_rebalance_opts(k) : NULL; + if (!r) { + /* raced due to btree write buffer, nothing to do */ + return bkey_s_c_null; + } - if (work < dev_work || work < unknown_dev) - work = U64_MAX; - work = min(work, capacity); + memset(data_opts, 0, sizeof(*data_opts)); - percent_full = div64_u64(work * 100, capacity); + data_opts->rewrite_ptrs = + bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression); + data_opts->target = r->target; - if (percent_full >= w->dev_most_full_percent) { - w->dev_most_full_idx = idx; - w->dev_most_full_percent = percent_full; - w->dev_most_full_work = work; - w->dev_most_full_capacity = capacity; + if (!data_opts->rewrite_ptrs) { + /* + * device we would want to write to offline? devices in target + * changed? + * + * We'll now need a full scan before this extent is picked up + * again: + */ + int ret = bch2_bkey_clear_needs_rebalance(trans, extent_iter, k); + if (ret) + return bkey_s_c_err(ret); + return bkey_s_c_null; } - if (w->total_work + dev_work >= w->total_work && - w->total_work + dev_work >= dev_work) - w->total_work += dev_work; + return k; } -static struct rebalance_work rebalance_work(struct bch_fs *c) +noinline_for_stack +static int do_rebalance_extent(struct moving_context *ctxt, + struct bpos work_pos, + struct btree_iter *extent_iter) { - struct bch_dev *ca; - struct rebalance_work ret = { .dev_most_full_idx = -1 }; - u64 unknown_dev = atomic64_read(&c->rebalance.work_unknown_dev); - unsigned i; + struct btree_trans *trans = ctxt->trans; + struct bch_fs *c = trans->c; + struct bch_fs_rebalance *r = &trans->c->rebalance; + struct data_update_opts data_opts; + struct bch_io_opts io_opts; + struct bkey_s_c k; + struct bkey_buf sk; + int ret; + + ctxt->stats = &r->work_stats; + r->state = BCH_REBALANCE_working; - for_each_online_member(ca, c, i) - rebalance_work_accumulate(&ret, - atomic64_read(&ca->rebalance_work), - unknown_dev, - bucket_to_sector(ca, ca->mi.nbuckets - - ca->mi.first_bucket), - i); + bch2_bkey_buf_init(&sk); - rebalance_work_accumulate(&ret, - unknown_dev, 0, c->capacity, -1); + ret = bkey_err(k = next_rebalance_extent(trans, work_pos, + extent_iter, &data_opts)); + if (ret || !k.k) + goto out; + ret = bch2_move_get_io_opts_one(trans, &io_opts, k); + if (ret) + goto out; + + atomic64_add(k.k->size, &ctxt->stats->sectors_seen); + + /* + * The iterator gets unlocked by __bch2_read_extent - need to + * save a copy of @k elsewhere: + */ + bch2_bkey_buf_reassemble(&sk, c, k); + k = bkey_i_to_s_c(sk.k); + + ret = bch2_move_extent(ctxt, NULL, extent_iter, k, io_opts, data_opts); + if (ret) { + if (bch2_err_matches(ret, ENOMEM)) { + /* memory allocation failure, wait for some IO to finish */ + bch2_move_ctxt_wait_for_io(ctxt); + ret = -BCH_ERR_transaction_restart_nested; + } + + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + goto out; + + /* skip it and continue, XXX signal failure */ + ret = 0; + } +out: + bch2_bkey_buf_exit(&sk, c); return ret; } -static void rebalance_work_reset(struct bch_fs *c) +static bool rebalance_pred(struct bch_fs *c, void *arg, + struct bkey_s_c k, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) { - struct bch_dev *ca; - unsigned i; + unsigned target, compression; - for_each_online_member(ca, c, i) - atomic64_set(&ca->rebalance_work, 0); + if (k.k->p.inode) { + target = io_opts->background_target; + compression = io_opts->background_compression ?: io_opts->compression; + } else { + const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); + + target = r ? r->target : io_opts->background_target; + compression = r ? r->compression : + (io_opts->background_compression ?: io_opts->compression); + } - atomic64_set(&c->rebalance.work_unknown_dev, 0); + data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, k, target, compression); + data_opts->target = target; + return data_opts->rewrite_ptrs != 0; } -static unsigned long curr_cputime(void) +static int do_rebalance_scan(struct moving_context *ctxt, u64 inum, u64 cookie) { - u64 utime, stime; + struct btree_trans *trans = ctxt->trans; + struct bch_fs_rebalance *r = &trans->c->rebalance; + int ret; + + bch2_move_stats_init(&r->scan_stats, "rebalance_scan"); + ctxt->stats = &r->scan_stats; - task_cputime_adjusted(current, &utime, &stime); - return nsecs_to_jiffies(utime + stime); + if (!inum) { + r->scan_start = BBPOS_MIN; + r->scan_end = BBPOS_MAX; + } else { + r->scan_start = BBPOS(BTREE_ID_extents, POS(inum, 0)); + r->scan_end = BBPOS(BTREE_ID_extents, POS(inum, U64_MAX)); + } + + r->state = BCH_REBALANCE_scanning; + + ret = __bch2_move_data(ctxt, r->scan_start, r->scan_end, rebalance_pred, NULL) ?: + commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, + bch2_clear_rebalance_needs_scan(trans, inum, cookie)); + + bch2_move_stats_exit(&r->scan_stats, trans->c); + return ret; } -static int bch2_rebalance_thread(void *arg) +static void rebalance_wait(struct bch_fs *c) { - struct bch_fs *c = arg; struct bch_fs_rebalance *r = &c->rebalance; + struct bch_dev *ca; struct io_clock *clock = &c->io_clock[WRITE]; - struct rebalance_work w, p; - struct bch_move_stats move_stats; - unsigned long start, prev_start; - unsigned long prev_run_time, prev_run_cputime; - unsigned long cputime, prev_cputime; - u64 io_start; - long throttle; + u64 now = atomic64_read(&clock->now); + u64 min_member_capacity = 128 * 2048; + unsigned i; - set_freezable(); + for_each_rw_member(ca, c, i) + min_member_capacity = min(min_member_capacity, + ca->mi.nbuckets * ca->mi.bucket_size); + + r->wait_iotime_end = now + (min_member_capacity >> 6); + + if (r->state != BCH_REBALANCE_waiting) { + r->wait_iotime_start = now; + r->wait_wallclock_start = ktime_get_real_ns(); + r->state = BCH_REBALANCE_waiting; + } - io_start = atomic64_read(&clock->now); - p = rebalance_work(c); - prev_start = jiffies; - prev_cputime = curr_cputime(); + bch2_kthread_io_clock_wait(clock, r->wait_iotime_end, MAX_SCHEDULE_TIMEOUT); +} - bch2_move_stats_init(&move_stats, "rebalance"); - while (!kthread_wait_freezable(r->enabled)) { - cond_resched(); +static int do_rebalance(struct moving_context *ctxt) +{ + struct btree_trans *trans = ctxt->trans; + struct bch_fs *c = trans->c; + struct bch_fs_rebalance *r = &c->rebalance; + struct btree_iter rebalance_work_iter, extent_iter = { NULL }; + struct bkey_s_c k; + int ret = 0; - start = jiffies; - cputime = curr_cputime(); + bch2_move_stats_init(&r->work_stats, "rebalance_work"); + bch2_move_stats_init(&r->scan_stats, "rebalance_scan"); - prev_run_time = start - prev_start; - prev_run_cputime = cputime - prev_cputime; + bch2_trans_iter_init(trans, &rebalance_work_iter, + BTREE_ID_rebalance_work, POS_MIN, + BTREE_ITER_ALL_SNAPSHOTS); - w = rebalance_work(c); - BUG_ON(!w.dev_most_full_capacity); + while (!bch2_move_ratelimit(ctxt) && + !kthread_wait_freezable(r->enabled)) { + bch2_trans_begin(trans); - if (!w.total_work) { - r->state = REBALANCE_WAITING; - kthread_wait_freezable(rebalance_work(c).total_work); + ret = bkey_err(k = next_rebalance_entry(trans, &rebalance_work_iter)); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; - } + if (ret || !k.k) + break; - /* - * If there isn't much work to do, throttle cpu usage: - */ - throttle = prev_run_cputime * 100 / - max(1U, w.dev_most_full_percent) - - prev_run_time; - - if (w.dev_most_full_percent < 20 && throttle > 0) { - r->throttled_until_iotime = io_start + - div_u64(w.dev_most_full_capacity * - (20 - w.dev_most_full_percent), - 50); - - if (atomic64_read(&clock->now) + clock->max_slop < - r->throttled_until_iotime) { - r->throttled_until_cputime = start + throttle; - r->state = REBALANCE_THROTTLED; - - bch2_kthread_io_clock_wait(clock, - r->throttled_until_iotime, - throttle); - continue; - } - } + ret = k.k->type == KEY_TYPE_cookie + ? do_rebalance_scan(ctxt, k.k->p.inode, + le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie)) + : do_rebalance_extent(ctxt, k.k->p, &extent_iter); + + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + continue; + if (ret) + break; - /* minimum 1 mb/sec: */ - r->pd.rate.rate = - max_t(u64, 1 << 11, - r->pd.rate.rate * - max(p.dev_most_full_percent, 1U) / - max(w.dev_most_full_percent, 1U)); - - io_start = atomic64_read(&clock->now); - p = w; - prev_start = start; - prev_cputime = cputime; - - r->state = REBALANCE_RUNNING; - memset(&move_stats, 0, sizeof(move_stats)); - rebalance_work_reset(c); - - bch2_move_data(c, - BBPOS_MIN, BBPOS_MAX, - /* ratelimiting disabled for now */ - NULL, /* &r->pd.rate, */ - &move_stats, - writepoint_ptr(&c->rebalance_write_point), - true, - rebalance_pred, NULL); + bch2_btree_iter_advance(&rebalance_work_iter); } - return 0; + bch2_trans_iter_exit(trans, &extent_iter); + bch2_trans_iter_exit(trans, &rebalance_work_iter); + bch2_move_stats_exit(&r->scan_stats, c); + + if (!ret && + !kthread_should_stop() && + !atomic64_read(&r->work_stats.sectors_seen) && + !atomic64_read(&r->scan_stats.sectors_seen)) { + bch2_trans_unlock(trans); + rebalance_wait(c); + } + + if (!bch2_err_matches(ret, EROFS)) + bch_err_fn(c, ret); + return ret; } -void bch2_rebalance_work_to_text(struct printbuf *out, struct bch_fs *c) +static int bch2_rebalance_thread(void *arg) { + struct bch_fs *c = arg; struct bch_fs_rebalance *r = &c->rebalance; - struct rebalance_work w = rebalance_work(c); + struct moving_context ctxt; + int ret; - if (!out->nr_tabstops) - printbuf_tabstop_push(out, 20); + set_freezable(); - prt_printf(out, "fullest_dev (%i):", w.dev_most_full_idx); - prt_tab(out); + bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats, + writepoint_ptr(&c->rebalance_write_point), + true); - prt_human_readable_u64(out, w.dev_most_full_work << 9); - prt_printf(out, "/"); - prt_human_readable_u64(out, w.dev_most_full_capacity << 9); - prt_newline(out); + while (!kthread_should_stop() && + !(ret = do_rebalance(&ctxt))) + ; - prt_printf(out, "total work:"); - prt_tab(out); + bch2_moving_ctxt_exit(&ctxt); - prt_human_readable_u64(out, w.total_work << 9); - prt_printf(out, "/"); - prt_human_readable_u64(out, c->capacity << 9); - prt_newline(out); + return 0; +} + +void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c) +{ + struct bch_fs_rebalance *r = &c->rebalance; - prt_printf(out, "rate:"); - prt_tab(out); - prt_printf(out, "%u", r->pd.rate.rate); + prt_str(out, bch2_rebalance_state_strs[r->state]); prt_newline(out); + printbuf_indent_add(out, 2); switch (r->state) { - case REBALANCE_WAITING: - prt_printf(out, "waiting"); + case BCH_REBALANCE_waiting: { + u64 now = atomic64_read(&c->io_clock[WRITE].now); + + prt_str(out, "io wait duration: "); + bch2_prt_human_readable_s64(out, r->wait_iotime_end - r->wait_iotime_start); + prt_newline(out); + + prt_str(out, "io wait remaining: "); + bch2_prt_human_readable_s64(out, r->wait_iotime_end - now); + prt_newline(out); + + prt_str(out, "duration waited: "); + bch2_pr_time_units(out, ktime_get_real_ns() - r->wait_wallclock_start); + prt_newline(out); break; - case REBALANCE_THROTTLED: - prt_printf(out, "throttled for %lu sec or ", - (r->throttled_until_cputime - jiffies) / HZ); - prt_human_readable_u64(out, - (r->throttled_until_iotime - - atomic64_read(&c->io_clock[WRITE].now)) << 9); - prt_printf(out, " io"); + } + case BCH_REBALANCE_working: + bch2_move_stats_to_text(out, &r->work_stats); break; - case REBALANCE_RUNNING: - prt_printf(out, "running"); + case BCH_REBALANCE_scanning: + bch2_move_stats_to_text(out, &r->scan_stats); break; } prt_newline(out); + printbuf_indent_sub(out, 2); } void bch2_rebalance_stop(struct bch_fs *c) @@ -360,6 +463,4 @@ int bch2_rebalance_start(struct bch_fs *c) void bch2_fs_rebalance_init(struct bch_fs *c) { bch2_pd_controller_init(&c->rebalance.pd); - - atomic64_set(&c->rebalance.work_unknown_dev, S64_MAX); } diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h index 7ade0bb81cce..28a52638f16c 100644 --- a/fs/bcachefs/rebalance.h +++ b/fs/bcachefs/rebalance.h @@ -4,6 +4,9 @@ #include "rebalance_types.h" +int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum); +int bch2_set_fs_needs_rebalance(struct bch_fs *); + static inline void rebalance_wakeup(struct bch_fs *c) { struct task_struct *p; @@ -15,11 +18,7 @@ static inline void rebalance_wakeup(struct bch_fs *c) rcu_read_unlock(); } -void bch2_rebalance_add_key(struct bch_fs *, struct bkey_s_c, - struct bch_io_opts *); -void bch2_rebalance_add_work(struct bch_fs *, u64); - -void bch2_rebalance_work_to_text(struct printbuf *, struct bch_fs *); +void bch2_rebalance_status_to_text(struct printbuf *, struct bch_fs *); void bch2_rebalance_stop(struct bch_fs *); int bch2_rebalance_start(struct bch_fs *); diff --git a/fs/bcachefs/rebalance_types.h b/fs/bcachefs/rebalance_types.h index 7462a92e9598..0fffb536c1d0 100644 --- a/fs/bcachefs/rebalance_types.h +++ b/fs/bcachefs/rebalance_types.h @@ -2,25 +2,36 @@ #ifndef _BCACHEFS_REBALANCE_TYPES_H #define _BCACHEFS_REBALANCE_TYPES_H +#include "bbpos_types.h" #include "move_types.h" -enum rebalance_state { - REBALANCE_WAITING, - REBALANCE_THROTTLED, - REBALANCE_RUNNING, +#define BCH_REBALANCE_STATES() \ + x(waiting) \ + x(working) \ + x(scanning) + +enum bch_rebalance_states { +#define x(t) BCH_REBALANCE_##t, + BCH_REBALANCE_STATES() +#undef x }; struct bch_fs_rebalance { - struct task_struct __rcu *thread; + struct task_struct __rcu *thread; struct bch_pd_controller pd; - atomic64_t work_unknown_dev; + enum bch_rebalance_states state; + u64 wait_iotime_start; + u64 wait_iotime_end; + u64 wait_wallclock_start; + + struct bch_move_stats work_stats; - enum rebalance_state state; - u64 throttled_until_iotime; - unsigned long throttled_until_cputime; + struct bbpos scan_start; + struct bbpos scan_end; + struct bch_move_stats scan_stats; - unsigned enabled:1; + unsigned enabled:1; }; #endif /* _BCACHEFS_REBALANCE_TYPES_H */ diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 63faf70434ff..02025099c38f 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -23,6 +23,7 @@ #include "logged_ops.h" #include "move.h" #include "quota.h" +#include "rebalance.h" #include "recovery.h" #include "replicas.h" #include "sb-clean.h" diff --git a/fs/bcachefs/recovery_types.h b/fs/bcachefs/recovery_types.h index e2d8771909ef..515e3d62c2ac 100644 --- a/fs/bcachefs/recovery_types.h +++ b/fs/bcachefs/recovery_types.h @@ -42,6 +42,7 @@ x(check_nlinks, PASS_FSCK) \ x(delete_dead_inodes, PASS_FSCK|PASS_UNCLEAN) \ x(fix_reflink_p, 0) \ + x(set_fs_needs_rebalance, 0) \ enum bch_recovery_pass { #define x(n, when) BCH_RECOVERY_PASS_##n, diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 540c78cd4b0c..507100b38e29 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -7,6 +7,7 @@ #include "inode.h" #include "io_misc.h" #include "io_write.h" +#include "rebalance.h" #include "reflink.h" #include "subvolume.h" #include "super-io.h" @@ -252,8 +253,9 @@ s64 bch2_remap_range(struct bch_fs *c, struct bpos dst_start = POS(dst_inum.inum, dst_offset); struct bpos src_start = POS(src_inum.inum, src_offset); struct bpos dst_end = dst_start, src_end = src_start; + struct bch_io_opts opts; struct bpos src_want; - u64 dst_done; + u64 dst_done = 0; u32 dst_snapshot, src_snapshot; int ret = 0, ret2 = 0; @@ -269,6 +271,10 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_bkey_buf_init(&new_src); trans = bch2_trans_get(c); + ret = bch2_inum_opts_get(trans, src_inum, &opts); + if (ret) + goto err; + bch2_trans_iter_init(trans, &src_iter, BTREE_ID_extents, src_start, BTREE_ITER_INTENT); bch2_trans_iter_init(trans, &dst_iter, BTREE_ID_extents, dst_start, @@ -352,10 +358,13 @@ s64 bch2_remap_range(struct bch_fs *c, min(src_k.k->p.offset - src_want.offset, dst_end.offset - dst_iter.pos.offset)); - ret = bch2_extent_update(trans, dst_inum, &dst_iter, - new_dst.k, &disk_res, - new_i_size, i_sectors_delta, - true); + ret = bch2_bkey_set_needs_rebalance(c, new_dst.k, + opts.background_target, + opts.background_compression) ?: + bch2_extent_update(trans, dst_inum, &dst_iter, + new_dst.k, &disk_res, + new_i_size, i_sectors_delta, + true); bch2_disk_reservation_put(c, &disk_res); } bch2_trans_iter_exit(trans, &dst_iter); @@ -386,7 +395,7 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_trans_iter_exit(trans, &inode_iter); } while (bch2_err_matches(ret2, BCH_ERR_transaction_restart)); - +err: bch2_trans_put(trans); bch2_bkey_buf_exit(&new_src, c); bch2_bkey_buf_exit(&new_dst, c); diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 3ac6634020d1..7975587cab9a 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -212,7 +212,7 @@ read_attribute(copy_gc_wait); rw_attribute(rebalance_enabled); sysfs_pd_controller_attribute(rebalance); -read_attribute(rebalance_work); +read_attribute(rebalance_status); rw_attribute(promote_whole_extents); read_attribute(new_stripes); @@ -386,8 +386,8 @@ SHOW(bch2_fs) if (attr == &sysfs_copy_gc_wait) bch2_copygc_wait_to_text(out, c); - if (attr == &sysfs_rebalance_work) - bch2_rebalance_work_to_text(out, c); + if (attr == &sysfs_rebalance_status) + bch2_rebalance_status_to_text(out, c); sysfs_print(promote_whole_extents, c->promote_whole_extents); @@ -646,7 +646,7 @@ struct attribute *bch2_fs_internal_files[] = { &sysfs_copy_gc_wait, &sysfs_rebalance_enabled, - &sysfs_rebalance_work, + &sysfs_rebalance_status, sysfs_pd_controller_files(rebalance), &sysfs_moving_ctxts, @@ -707,10 +707,8 @@ STORE(bch2_fs_opts_dir) bch2_opt_set_by_id(&c->opts, id, v); if ((id == Opt_background_target || - id == Opt_background_compression) && v) { - bch2_rebalance_add_work(c, S64_MAX); - rebalance_wakeup(c); - } + id == Opt_background_compression) && v) + bch2_set_rebalance_needs_scan(c, 0); ret = size; err: diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index b069b1a62e25..74b41f567ab8 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -590,7 +590,7 @@ err: if (value && (opt_id == Opt_background_compression || opt_id == Opt_background_target)) - bch2_rebalance_add_work(c, inode->v.i_blocks); + bch2_set_rebalance_needs_scan(c, inode->ei_inode.bi_inum); return bch2_err_class(ret); } -- cgit v1.2.3 From 5c1ab40e76dd873bfbfbe4df98ca3e08de31d30d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 26 Oct 2023 14:56:53 -0400 Subject: bcachefs: Fix kasan splat in members_v1_get() This fixes an incorrect memcpy() in the recent members_v2 code - a members_v1 member is BCH_MEMBER_V1_BYTES, not sizeof(struct bch_member). Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-members.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 6dd85bb996fe..168b032a82d3 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -36,7 +36,8 @@ static struct bch_member members_v1_get(struct bch_sb_field_members_v1 *mi, int { struct bch_member ret, *p = members_v1_get_mut(mi, i); memset(&ret, 0, sizeof(ret)); - memcpy(&ret, p, min_t(size_t, sizeof(struct bch_member), sizeof(ret))); return ret; + memcpy(&ret, p, min_t(size_t, BCH_MEMBER_V1_BYTES, sizeof(ret))); + return ret; } struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i) @@ -262,8 +263,7 @@ static int bch2_sb_members_v1_validate(struct bch_sb *sb, struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1); unsigned i; - if ((void *) members_v1_get_mut(mi, sb->nr_devices) > - vstruct_end(&mi->field)) { + if ((void *) members_v1_get_mut(mi, sb->nr_devices) > vstruct_end(&mi->field)) { prt_printf(err, "too many devices for section size"); return -BCH_ERR_invalid_sb_members; } -- cgit v1.2.3 From e84843489c15bf9d39eec3a9a95870f98a71ac24 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 26 Oct 2023 17:00:36 -0400 Subject: bcachefs: Fix a kasan splat in bch2_dev_add() This fixes a use after free - mi is dangling after the resize call. Additionally, resizing the device's member info section was useless - we were attempting to preallocate the space required before adding it to the filesystem superblock, but there's other sections that we should have been preallocating as well for that to work. Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index ce59018b27ac..835342b56003 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1622,16 +1622,6 @@ int bch2_dev_add(struct bch_fs *c, const char *path) goto err_unlock; } - mi = bch2_sb_field_get(ca->disk_sb.sb, members_v2); - - if (!bch2_sb_field_resize(&ca->disk_sb, members_v2, - le32_to_cpu(mi->field.u64s) + - sizeof(dev_mi) / sizeof(u64))) { - ret = -BCH_ERR_ENOSPC_sb_members; - bch_err_msg(c, ret, "setting up new superblock"); - goto err_unlock; - } - if (dynamic_fault("bcachefs:add:no_slot")) goto no_slot; @@ -1645,6 +1635,8 @@ no_slot: have_slot: nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices); + + mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); u64s = DIV_ROUND_UP(sizeof(struct bch_sb_field_members_v2) + le16_to_cpu(mi->member_bytes) * nr_devices, sizeof(u64)); -- cgit v1.2.3 From 5394fe9494011de19baff276ce02a2f00eef568a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 26 Oct 2023 16:20:08 -0400 Subject: bcachefs: Fix snapshot skiplists Signed-off-by: Kent Overstreet --- fs/bcachefs/snapshot.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 315e88cc3867..e70adfcbd953 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -255,8 +255,7 @@ int bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k, for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) { id = le32_to_cpu(s.v->skip[i]); - if ((id && !s.v->parent) || - (id && id <= k.k->p.offset)) { + if (id && id < le32_to_cpu(s.v->parent)) { prt_printf(err, "bad skiplist node %u", id); return -BCH_ERR_invalid_bkey; } @@ -1348,12 +1347,12 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, u32 id = le32_to_cpu(s->v.skip[j]); if (snapshot_list_has_id(deleted, id)) { - id = depth > 1 - ? bch2_snapshot_nth_parent_skip(c, + id = bch2_snapshot_nth_parent_skip(c, parent, - get_random_u32_below(depth - 1), - deleted) - : parent; + depth > 1 + ? get_random_u32_below(depth - 1) + : 0, + deleted); s->v.skip[j] = cpu_to_le32(id); } } -- cgit v1.2.3 From 94119eeb02d114aa1f78dcfaabdca50b9b626790 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 25 Oct 2023 16:29:37 -0400 Subject: bcachefs: Add IO error counts to bch_member We now track IO errors per device since filesystem creation. IO error counts can be viewed in sysfs, or with the 'bcachefs show-super' command. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 2 + fs/bcachefs/bcachefs_format.h | 15 ++++++ fs/bcachefs/btree_io.c | 23 +++++--- fs/bcachefs/ec.c | 6 ++- fs/bcachefs/error.c | 3 +- fs/bcachefs/error.h | 10 ++-- fs/bcachefs/io_read.c | 4 +- fs/bcachefs/io_write.c | 2 +- fs/bcachefs/journal_io.c | 8 +-- fs/bcachefs/opts.c | 5 -- fs/bcachefs/opts.h | 1 - fs/bcachefs/sb-members.c | 121 ++++++++++++++++++++++++++++++++++++------ fs/bcachefs/sb-members.h | 42 ++++++++++++++- fs/bcachefs/super-io.c | 9 +++- fs/bcachefs/super-io.h | 35 ------------ fs/bcachefs/super.c | 5 ++ fs/bcachefs/sysfs.c | 20 +++++-- 17 files changed, 225 insertions(+), 86 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 68f0ff03c28a..0ae14a69dfde 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -502,6 +502,8 @@ struct bch_dev { * Committed by bch2_write_super() -> bch_fs_mi_update() */ struct bch_member_cpu mi; + atomic64_t errors[BCH_MEMBER_ERROR_NR]; + __uuid_t uuid; char name[BDEVNAME_SIZE]; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index e04999c57892..dbde425b4e76 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1268,6 +1268,18 @@ enum bch_iops_measurement { BCH_IOPS_NR }; +#define BCH_MEMBER_ERROR_TYPES() \ + x(read, 0) \ + x(write, 1) \ + x(checksum, 2) + +enum bch_member_error_type { +#define x(t, n) BCH_MEMBER_ERROR_##t = n, + BCH_MEMBER_ERROR_TYPES() +#undef x + BCH_MEMBER_ERROR_NR +}; + struct bch_member { __uuid_t uuid; __le64 nbuckets; /* device size */ @@ -1278,6 +1290,9 @@ struct bch_member { __le64 flags; __le32 iops[4]; + __le64 errors[BCH_MEMBER_ERROR_NR]; + __le64 errors_at_reset[BCH_MEMBER_ERROR_NR]; + __le64 errors_reset_time; }; #define BCH_MEMBER_V1_BYTES 56 diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 7bf3ee25bc32..f398c8095f07 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -934,8 +934,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, while (b->written < (ptr_written ?: btree_sectors(c))) { unsigned sectors; struct nonce nonce; - struct bch_csum csum; bool first = !b->written; + bool csum_bad; if (!b->written) { i = &b->data->keys; @@ -946,9 +946,13 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, BSET_CSUM_TYPE(i)); nonce = btree_nonce(i, b->written << 9); - csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data); - btree_err_on(bch2_crc_cmp(csum, b->data->csum), + csum_bad = bch2_crc_cmp(b->data->csum, + csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data)); + if (csum_bad) + bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); + + btree_err_on(csum_bad, -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i, "invalid checksum"); @@ -976,9 +980,12 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, BSET_CSUM_TYPE(i)); nonce = btree_nonce(i, b->written << 9); - csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); + csum_bad = bch2_crc_cmp(bne->csum, + csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne)); + if (csum_bad) + bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); - btree_err_on(bch2_crc_cmp(csum, bne->csum), + btree_err_on(csum_bad, -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i, "invalid checksum"); @@ -1168,7 +1175,8 @@ static void btree_node_read_work(struct work_struct *work) start: printbuf_reset(&buf); bch2_btree_pos_to_text(&buf, c, b); - bch2_dev_io_err_on(bio->bi_status, ca, "btree read error %s for %s", + bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read, + "btree read error %s for %s", bch2_blk_status_to_str(bio->bi_status), buf.buf); if (rb->have_ioref) percpu_ref_put(&ca->io_ref); @@ -1749,7 +1757,8 @@ static void btree_node_write_endio(struct bio *bio) if (wbio->have_ioref) bch2_latency_acct(ca, wbio->submit_time, WRITE); - if (bch2_dev_io_err_on(bio->bi_status, ca, "btree write error: %s", + if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write, + "btree write error: %s", bch2_blk_status_to_str(bio->bi_status)) || bch2_meta_write_fault("btree")) { spin_lock_irqsave(&c->btree_write_error_lock, flags); diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 8646856e4539..c9795cd98192 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -373,7 +373,11 @@ static void ec_block_endio(struct bio *bio) struct bch_dev *ca = ec_bio->ca; struct closure *cl = bio->bi_private; - if (bch2_dev_io_err_on(bio->bi_status, ca, "erasure coding %s error: %s", + if (bch2_dev_io_err_on(bio->bi_status, ca, + bio_data_dir(bio) + ? BCH_MEMBER_ERROR_write + : BCH_MEMBER_ERROR_read, + "erasure coding %s error: %s", bio_data_dir(bio) ? "write" : "read", bch2_blk_status_to_str(bio->bi_status))) clear_bit(ec_bio->idx, ec_bio->buf->valid); diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 2a5af8872613..4dbfe31197bc 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -56,8 +56,9 @@ void bch2_io_error_work(struct work_struct *work) up_write(&c->state_lock); } -void bch2_io_error(struct bch_dev *ca) +void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type) { + atomic64_inc(&ca->errors[type]); //queue_work(system_long_wq, &ca->io_error_work); } diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 7ce9540052e5..958b2bed4f39 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -179,26 +179,26 @@ do { \ void bch2_io_error_work(struct work_struct *); /* Does the error handling without logging a message */ -void bch2_io_error(struct bch_dev *); +void bch2_io_error(struct bch_dev *, enum bch_member_error_type); -#define bch2_dev_io_err_on(cond, ca, ...) \ +#define bch2_dev_io_err_on(cond, ca, _type, ...) \ ({ \ bool _ret = (cond); \ \ if (_ret) { \ bch_err_dev_ratelimited(ca, __VA_ARGS__); \ - bch2_io_error(ca); \ + bch2_io_error(ca, _type); \ } \ _ret; \ }) -#define bch2_dev_inum_io_err_on(cond, ca, ...) \ +#define bch2_dev_inum_io_err_on(cond, ca, _type, ...) \ ({ \ bool _ret = (cond); \ \ if (_ret) { \ bch_err_inum_offset_ratelimited(ca, __VA_ARGS__); \ - bch2_io_error(ca); \ + bch2_io_error(ca, _type); \ } \ _ret; \ }) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 443c3ea65527..ae36fc485f5f 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -643,7 +643,7 @@ csum_err: "data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)", rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo, csum.hi, csum.lo, bch2_csum_types[crc.csum_type]); - bch2_io_error(ca); + bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); goto out; decompression_err: @@ -677,7 +677,7 @@ static void bch2_read_endio(struct bio *bio) if (!rbio->split) rbio->bio.bi_end_io = rbio->end_io; - if (bch2_dev_inum_io_err_on(bio->bi_status, ca, + if (bch2_dev_inum_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read, rbio->read_pos.inode, rbio->read_pos.offset, "data read error: %s", diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 6d9c777213e3..613f38436640 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -637,7 +637,7 @@ static void bch2_write_endio(struct bio *bio) struct bch_fs *c = wbio->c; struct bch_dev *ca = bch_dev_bkey_exists(c, wbio->dev); - if (bch2_dev_inum_io_err_on(bio->bi_status, ca, + if (bch2_dev_inum_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write, op->pos.inode, wbio->inode_offset << 9, "data write error: %s", diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index b29ece313e44..9807e909cff4 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -900,7 +900,7 @@ reread: ret = submit_bio_wait(bio); kfree(bio); - if (bch2_dev_io_err_on(ret, ca, + if (bch2_dev_io_err_on(ret, ca, BCH_MEMBER_ERROR_read, "journal read error: sector %llu", offset) || bch2_meta_read_fault("journal")) { @@ -956,7 +956,8 @@ reread: ja->bucket_seq[bucket] = le64_to_cpu(j->seq); csum_good = jset_csum_good(c, j); - if (!csum_good) + if (bch2_dev_io_err_on(!csum_good, ca, BCH_MEMBER_ERROR_checksum, + "journal checksum error")) saw_bad = true; ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j), @@ -1581,7 +1582,8 @@ static void journal_write_endio(struct bio *bio) struct journal_buf *w = journal_last_unwritten_buf(j); unsigned long flags; - if (bch2_dev_io_err_on(bio->bi_status, ca, "error writing journal entry %llu: %s", + if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write, + "error writing journal entry %llu: %s", le64_to_cpu(w->data->seq), bch2_blk_status_to_str(bio->bi_status)) || bch2_meta_write_fault("journal")) { diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c index b7722b623697..4ad5880664b0 100644 --- a/fs/bcachefs/opts.c +++ b/fs/bcachefs/opts.c @@ -12,11 +12,6 @@ #define x(t, n, ...) [n] = #t, -const char * const bch2_iops_measurements[] = { - BCH_IOPS_MEASUREMENTS() - NULL -}; - const char * const bch2_error_actions[] = { BCH_ERROR_ACTIONS() NULL diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 2307cdd2a23c..8526f177450a 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -10,7 +10,6 @@ struct bch_fs; -extern const char * const bch2_iops_measurements[]; extern const char * const bch2_error_actions[]; extern const char * const bch2_fsck_fix_opts[]; extern const char * const bch2_version_upgrade_opts[]; diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 168b032a82d3..91566accc5a7 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -7,6 +7,18 @@ #include "sb-members.h" #include "super-io.h" +#define x(t, n, ...) [n] = #t, +static const char * const bch2_iops_measurements[] = { + BCH_IOPS_MEASUREMENTS() + NULL +}; + +char * const bch2_member_error_strs[] = { + BCH_MEMBER_ERROR_TYPES() + NULL +}; +#undef x + /* Code for bch_sb_field_members_v1: */ static struct bch_member *members_v2_get_mut(struct bch_sb_field_members_v2 *mi, int i) @@ -92,7 +104,7 @@ int bch2_members_v2_init(struct bch_fs *c) return sb_members_v2_resize_entries(c); } -int bch_members_cpy_v2_v1(struct bch_sb_handle *disk_sb) +int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb) { struct bch_sb_field_members_v1 *mi1; struct bch_sb_field_members_v2 *mi2; @@ -156,7 +168,6 @@ static void member_to_text(struct printbuf *out, u64 bucket_size = le16_to_cpu(m.bucket_size); u64 device_size = le64_to_cpu(m.nbuckets) * bucket_size; - prt_printf(out, "Device:"); prt_tab(out); prt_printf(out, "%u", i); @@ -164,6 +175,21 @@ static void member_to_text(struct printbuf *out, printbuf_indent_add(out, 2); + prt_printf(out, "Label:"); + prt_tab(out); + if (BCH_MEMBER_GROUP(&m)) { + unsigned idx = BCH_MEMBER_GROUP(&m) - 1; + + if (idx < disk_groups_nr(gi)) + prt_printf(out, "%s (%u)", + gi->entries[idx].label, idx); + else + prt_printf(out, "(bad disk labels section)"); + } else { + prt_printf(out, "(none)"); + } + prt_newline(out); + prt_printf(out, "UUID:"); prt_tab(out); pr_uuid(out, m.uuid.b); @@ -174,6 +200,13 @@ static void member_to_text(struct printbuf *out, prt_units_u64(out, device_size << 9); prt_newline(out); + for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) { + prt_printf(out, "%s errors:", bch2_member_error_strs[i]); + prt_tab(out); + prt_u64(out, le64_to_cpu(m.errors[i])); + prt_newline(out); + } + for (unsigned i = 0; i < BCH_IOPS_NR; i++) { prt_printf(out, "%s iops:", bch2_iops_measurements[i]); prt_tab(out); @@ -212,21 +245,6 @@ static void member_to_text(struct printbuf *out, : "unknown"); prt_newline(out); - prt_printf(out, "Label:"); - prt_tab(out); - if (BCH_MEMBER_GROUP(&m)) { - unsigned idx = BCH_MEMBER_GROUP(&m) - 1; - - if (idx < disk_groups_nr(gi)) - prt_printf(out, "%s (%u)", - gi->entries[idx].label, idx); - else - prt_printf(out, "(bad disk labels section)"); - } else { - prt_printf(out, "(none)"); - } - prt_newline(out); - prt_printf(out, "Data allowed:"); prt_tab(out); if (BCH_MEMBER_DATA_ALLOWED(&m)) @@ -337,3 +355,72 @@ const struct bch_sb_field_ops bch_sb_field_ops_members_v2 = { .validate = bch2_sb_members_v2_validate, .to_text = bch2_sb_members_v2_to_text, }; + +void bch2_sb_members_from_cpu(struct bch_fs *c) +{ + struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); + struct bch_dev *ca; + unsigned i, e; + + rcu_read_lock(); + for_each_member_device_rcu(ca, c, i, NULL) { + struct bch_member *m = members_v2_get_mut(mi, i); + + for (e = 0; e < BCH_MEMBER_ERROR_NR; e++) + m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e])); + } + rcu_read_unlock(); +} + +void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca) +{ + struct bch_fs *c = ca->fs; + struct bch_member m; + + mutex_lock(&ca->fs->sb_lock); + m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx); + mutex_unlock(&ca->fs->sb_lock); + + printbuf_tabstop_push(out, 12); + + prt_str(out, "IO errors since filesystem creation"); + prt_newline(out); + + printbuf_indent_add(out, 2); + for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) { + prt_printf(out, "%s:", bch2_member_error_strs[i]); + prt_tab(out); + prt_u64(out, atomic64_read(&ca->errors[i])); + prt_newline(out); + } + printbuf_indent_sub(out, 2); + + prt_str(out, "IO errors since "); + bch2_pr_time_units(out, (ktime_get_real_seconds() - le64_to_cpu(m.errors_reset_time)) * NSEC_PER_SEC); + prt_str(out, " ago"); + prt_newline(out); + + printbuf_indent_add(out, 2); + for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) { + prt_printf(out, "%s:", bch2_member_error_strs[i]); + prt_tab(out); + prt_u64(out, atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i])); + prt_newline(out); + } + printbuf_indent_sub(out, 2); +} + +void bch2_dev_errors_reset(struct bch_dev *ca) +{ + struct bch_fs *c = ca->fs; + struct bch_member *m; + + mutex_lock(&c->sb_lock); + m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++) + m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i])); + m->errors_reset_time = ktime_get_real_seconds(); + + bch2_write_super(c); + mutex_unlock(&c->sb_lock); +} diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index 430f3457bfd4..7cfd55a43bb5 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -2,8 +2,10 @@ #ifndef _BCACHEFS_SB_MEMBERS_H #define _BCACHEFS_SB_MEMBERS_H +extern char * const bch2_member_error_strs[]; + int bch2_members_v2_init(struct bch_fs *c); -int bch_members_cpy_v2_v1(struct bch_sb_handle *disk_sb); +int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb); struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i); struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i); @@ -179,4 +181,42 @@ static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c) extern const struct bch_sb_field_ops bch_sb_field_ops_members_v1; extern const struct bch_sb_field_ops bch_sb_field_ops_members_v2; +static inline bool bch2_member_exists(struct bch_member *m) +{ + return !bch2_is_zero(&m->uuid, sizeof(m->uuid)); +} + +static inline bool bch2_dev_exists(struct bch_sb *sb, + unsigned dev) +{ + if (dev < sb->nr_devices) { + struct bch_member m = bch2_sb_member_get(sb, dev); + return bch2_member_exists(&m); + } + return false; +} + +static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi) +{ + return (struct bch_member_cpu) { + .nbuckets = le64_to_cpu(mi->nbuckets), + .first_bucket = le16_to_cpu(mi->first_bucket), + .bucket_size = le16_to_cpu(mi->bucket_size), + .group = BCH_MEMBER_GROUP(mi), + .state = BCH_MEMBER_STATE(mi), + .discard = BCH_MEMBER_DISCARD(mi), + .data_allowed = BCH_MEMBER_DATA_ALLOWED(mi), + .durability = BCH_MEMBER_DURABILITY(mi) + ? BCH_MEMBER_DURABILITY(mi) - 1 + : 1, + .freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi), + .valid = bch2_member_exists(mi), + }; +} + +void bch2_sb_members_from_cpu(struct bch_fs *); + +void bch2_dev_io_errors_to_text(struct printbuf *, struct bch_dev *); +void bch2_dev_errors_reset(struct bch_dev *); + #endif /* _BCACHEFS_SB_MEMBERS_H */ diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 332d41e1c0a3..64e861b87535 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -805,7 +805,12 @@ static void write_super_endio(struct bio *bio) /* XXX: return errors directly */ - if (bch2_dev_io_err_on(bio->bi_status, ca, "superblock write error: %s", + if (bch2_dev_io_err_on(bio->bi_status, ca, + bio_data_dir(bio) + ? BCH_MEMBER_ERROR_write + : BCH_MEMBER_ERROR_read, + "superblock %s error: %s", + bio_data_dir(bio) ? "write" : "read", bch2_blk_status_to_str(bio->bi_status))) ca->sb_write_error = 1; @@ -892,7 +897,7 @@ int bch2_write_super(struct bch_fs *c) SET_BCH_SB_BIG_ENDIAN(c->disk_sb.sb, CPU_BIG_ENDIAN); bch2_sb_counters_from_cpu(c); - bch_members_cpy_v2_v1(&c->disk_sb); + bch2_sb_members_cpy_v2_v1(&c->disk_sb); for_each_online_member(ca, c, i) bch2_sb_from_fs(c, ca); diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h index b0d8584f475f..5d079dd12f95 100644 --- a/fs/bcachefs/super-io.h +++ b/fs/bcachefs/super-io.h @@ -78,41 +78,6 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat) __bch2_check_set_feature(c, feat); } -/* BCH_SB_FIELD_members_v1: */ - -static inline bool bch2_member_exists(struct bch_member *m) -{ - return !bch2_is_zero(&m->uuid, sizeof(m->uuid)); -} - -static inline bool bch2_dev_exists(struct bch_sb *sb, - unsigned dev) -{ - if (dev < sb->nr_devices) { - struct bch_member m = bch2_sb_member_get(sb, dev); - return bch2_member_exists(&m); - } - return false; -} - -static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi) -{ - return (struct bch_member_cpu) { - .nbuckets = le64_to_cpu(mi->nbuckets), - .first_bucket = le16_to_cpu(mi->first_bucket), - .bucket_size = le16_to_cpu(mi->bucket_size), - .group = BCH_MEMBER_GROUP(mi), - .state = BCH_MEMBER_STATE(mi), - .discard = BCH_MEMBER_DISCARD(mi), - .data_allowed = BCH_MEMBER_DATA_ALLOWED(mi), - .durability = BCH_MEMBER_DURABILITY(mi) - ? BCH_MEMBER_DURABILITY(mi) - 1 - : 1, - .freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi), - .valid = bch2_member_exists(mi), - }; -} - void bch2_sb_maybe_downgrade(struct bch_fs *); void bch2_sb_upgrade(struct bch_fs *, unsigned); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 835342b56003..e16b5bc12d26 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1131,6 +1131,7 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, struct bch_member *member) { struct bch_dev *ca; + unsigned i; ca = kzalloc(sizeof(*ca), GFP_KERNEL); if (!ca) @@ -1148,6 +1149,10 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, bch2_time_stats_init(&ca->io_latency[WRITE]); ca->mi = bch2_mi_to_cpu(member); + + for (i = 0; i < ARRAY_SIZE(member->errors); i++) + atomic64_set(&ca->errors[i], le64_to_cpu(member->errors[i])); + ca->uuid = member->uuid; ca->nr_btree_reserve = DIV_ROUND_UP(BTREE_NODE_RESERVE, diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 7975587cab9a..662366ce9e00 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -149,7 +149,9 @@ read_attribute(bucket_size); read_attribute(first_bucket); read_attribute(nbuckets); rw_attribute(durability); -read_attribute(iodone); +read_attribute(io_done); +read_attribute(io_errors); +write_attribute(io_errors_reset); read_attribute(io_latency_read); read_attribute(io_latency_write); @@ -880,7 +882,7 @@ static const char * const bch2_rw[] = { NULL }; -static void dev_iodone_to_text(struct printbuf *out, struct bch_dev *ca) +static void dev_io_done_to_text(struct printbuf *out, struct bch_dev *ca) { int rw, i; @@ -923,8 +925,11 @@ SHOW(bch2_dev) prt_char(out, '\n'); } - if (attr == &sysfs_iodone) - dev_iodone_to_text(out, ca); + if (attr == &sysfs_io_done) + dev_io_done_to_text(out, ca); + + if (attr == &sysfs_io_errors) + bch2_dev_io_errors_to_text(out, ca); sysfs_print(io_latency_read, atomic64_read(&ca->cur_latency[READ])); sysfs_print(io_latency_write, atomic64_read(&ca->cur_latency[WRITE])); @@ -991,6 +996,9 @@ STORE(bch2_dev) return ret; } + if (attr == &sysfs_io_errors_reset) + bch2_dev_errors_reset(ca); + return size; } SYSFS_OPS(bch2_dev); @@ -1008,7 +1016,9 @@ struct attribute *bch2_dev_files[] = { &sysfs_label, &sysfs_has_data, - &sysfs_iodone, + &sysfs_io_done, + &sysfs_io_errors, + &sysfs_io_errors_reset, &sysfs_io_latency_read, &sysfs_io_latency_write, -- cgit v1.2.3 From f5d26fa31ed2e260589f0bc8af010bb742f1231e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 25 Oct 2023 15:51:16 -0400 Subject: bcachefs: bch_sb_field_errors Add a new superblock section to keep counts of errors seen since filesystem creation: we'll be addingcounters for every distinct fsck error. The new superblock section has entries of the for [ id, count, time_of_last_error ]; this is intended to let us see what errors are occuring - and getting fixed - via show-super output. Signed-off-by: Kent Overstreet --- fs/bcachefs/Makefile | 1 + fs/bcachefs/bcachefs.h | 14 ++-- fs/bcachefs/bcachefs_format.h | 14 +++- fs/bcachefs/errcode.h | 1 + fs/bcachefs/error.c | 22 +++--- fs/bcachefs/sb-errors.c | 175 ++++++++++++++++++++++++++++++++++++++++++ fs/bcachefs/sb-errors.h | 26 +++++++ fs/bcachefs/sb-errors_types.h | 16 ++++ fs/bcachefs/sb-members.c | 2 +- fs/bcachefs/sb-members.h | 2 +- fs/bcachefs/super-io.c | 3 + fs/bcachefs/super-io.h | 5 ++ fs/bcachefs/super.c | 12 ++- 13 files changed, 270 insertions(+), 23 deletions(-) create mode 100644 fs/bcachefs/sb-errors.c create mode 100644 fs/bcachefs/sb-errors.h create mode 100644 fs/bcachefs/sb-errors_types.h (limited to 'fs') diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile index 0749731b9072..45b64f89258c 100644 --- a/fs/bcachefs/Makefile +++ b/fs/bcachefs/Makefile @@ -70,6 +70,7 @@ bcachefs-y := \ reflink.o \ replicas.o \ sb-clean.o \ + sb-errors.o \ sb-members.o \ siphash.o \ six.o \ diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 0ae14a69dfde..9cb8684959ee 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -209,6 +209,7 @@ #include "nocow_locking_types.h" #include "opts.h" #include "recovery_types.h" +#include "sb-errors_types.h" #include "seqmutex.h" #include "util.h" @@ -992,11 +993,6 @@ struct bch_fs { struct bio_set dio_read_bioset; struct bio_set nocow_flush_bioset; - /* ERRORS */ - struct list_head fsck_errors; - struct mutex fsck_error_lock; - bool fsck_alloc_err; - /* QUOTAS */ struct bch_memquota_type quotas[QTYP_NR]; @@ -1045,6 +1041,14 @@ struct bch_fs { struct bch2_time_stats times[BCH_TIME_STAT_NR]; struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR]; + + /* ERRORS */ + struct list_head fsck_error_msgs; + struct mutex fsck_error_msgs_lock; + bool fsck_alloc_msgs_err; + + bch_sb_errors_cpu fsck_error_counts; + struct mutex fsck_error_counts_lock; }; extern struct wait_queue_head bch2_read_only_wait; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index dbde425b4e76..29b000c6b7e1 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1218,7 +1218,8 @@ struct bch_sb_field { x(journal_seq_blacklist, 8) \ x(journal_v2, 9) \ x(counters, 10) \ - x(members_v2, 11) + x(members_v2, 11) \ + x(errors, 12) enum bch_sb_field_type { #define x(f, nr) BCH_SB_FIELD_##f = nr, @@ -1621,6 +1622,17 @@ struct bch_sb_field_journal_seq_blacklist { __u64 _data[]; }; +struct bch_sb_field_errors { + struct bch_sb_field field; + struct bch_sb_field_error_entry { + __le64 v; + __le64 last_error_time; + } entries[]; +}; + +LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID, struct bch_sb_field_error_entry, v, 0, 16); +LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR, struct bch_sb_field_error_entry, v, 16, 64); + /* Superblock: */ /* diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 3e9f09cea6c7..2a11f32cf30a 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -213,6 +213,7 @@ x(BCH_ERR_invalid_sb, invalid_sb_crypt) \ x(BCH_ERR_invalid_sb, invalid_sb_clean) \ x(BCH_ERR_invalid_sb, invalid_sb_quota) \ + x(BCH_ERR_invalid_sb, invalid_sb_errors) \ x(BCH_ERR_invalid_sb, invalid_sb_opt_compression) \ x(BCH_ERR_invalid, invalid_bkey) \ x(BCH_ERR_operation_blocked, nocow_lock_blocked) \ diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 4dbfe31197bc..d759afc910fc 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -117,27 +117,27 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt) if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) return NULL; - list_for_each_entry(s, &c->fsck_errors, list) + list_for_each_entry(s, &c->fsck_error_msgs, list) if (s->fmt == fmt) { /* * move it to the head of the list: repeated fsck errors * are common */ - list_move(&s->list, &c->fsck_errors); + list_move(&s->list, &c->fsck_error_msgs); return s; } s = kzalloc(sizeof(*s), GFP_NOFS); if (!s) { - if (!c->fsck_alloc_err) + if (!c->fsck_alloc_msgs_err) bch_err(c, "kmalloc err, cannot ratelimit fsck errs"); - c->fsck_alloc_err = true; + c->fsck_alloc_msgs_err = true; return NULL; } INIT_LIST_HEAD(&s->list); s->fmt = fmt; - list_add(&s->list, &c->fsck_errors); + list_add(&s->list, &c->fsck_error_msgs); return s; } @@ -153,7 +153,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...) prt_vprintf(out, fmt, args); va_end(args); - mutex_lock(&c->fsck_error_lock); + mutex_lock(&c->fsck_error_msgs_lock); s = fsck_err_get(c, fmt); if (s) { /* @@ -163,7 +163,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...) */ if (s->last_msg && !strcmp(buf.buf, s->last_msg)) { ret = s->ret; - mutex_unlock(&c->fsck_error_lock); + mutex_unlock(&c->fsck_error_msgs_lock); printbuf_exit(&buf); return ret; } @@ -258,7 +258,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...) if (s) s->ret = ret; - mutex_unlock(&c->fsck_error_lock); + mutex_unlock(&c->fsck_error_msgs_lock); printbuf_exit(&buf); @@ -279,9 +279,9 @@ void bch2_flush_fsck_errs(struct bch_fs *c) { struct fsck_err_state *s, *n; - mutex_lock(&c->fsck_error_lock); + mutex_lock(&c->fsck_error_msgs_lock); - list_for_each_entry_safe(s, n, &c->fsck_errors, list) { + list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) { if (s->ratelimited && s->last_msg) bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg); @@ -290,5 +290,5 @@ void bch2_flush_fsck_errs(struct bch_fs *c) kfree(s); } - mutex_unlock(&c->fsck_error_lock); + mutex_unlock(&c->fsck_error_msgs_lock); } diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c new file mode 100644 index 000000000000..3d66f15ae8f5 --- /dev/null +++ b/fs/bcachefs/sb-errors.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" +#include "sb-errors.h" +#include "super-io.h" + +static const char * const bch2_sb_error_strs[] = { +#define x(t, n, ...) [n] = #t, + BCH_SB_ERRS() + NULL +}; + +static void bch2_sb_error_id_to_text(struct printbuf *out, enum bch_sb_error_id id) +{ + if (id < BCH_SB_ERR_MAX) + prt_str(out, bch2_sb_error_strs[id]); + else + prt_printf(out, "(unknown error %u)", id); +} + +static inline unsigned bch2_sb_field_errors_nr_entries(struct bch_sb_field_errors *e) +{ + return e + ? (bch2_sb_field_bytes(&e->field) - sizeof(*e)) / sizeof(e->entries[0]) + : 0; +} + +static inline unsigned bch2_sb_field_errors_u64s(unsigned nr) +{ + return (sizeof(struct bch_sb_field_errors) + + sizeof(struct bch_sb_field_error_entry) * nr) / sizeof(u64); +} + +static int bch2_sb_errors_validate(struct bch_sb *sb, struct bch_sb_field *f, + struct printbuf *err) +{ + struct bch_sb_field_errors *e = field_to_type(f, errors); + unsigned i, nr = bch2_sb_field_errors_nr_entries(e); + + for (i = 0; i < nr; i++) { + if (!BCH_SB_ERROR_ENTRY_NR(&e->entries[i])) { + prt_printf(err, "entry with count 0 (id "); + bch2_sb_error_id_to_text(err, BCH_SB_ERROR_ENTRY_ID(&e->entries[i])); + prt_printf(err, ")"); + return -BCH_ERR_invalid_sb_errors; + } + + if (i + 1 < nr && + BCH_SB_ERROR_ENTRY_ID(&e->entries[i]) >= + BCH_SB_ERROR_ENTRY_ID(&e->entries[i + 1])) { + prt_printf(err, "entries out of order"); + return -BCH_ERR_invalid_sb_errors; + } + } + + return 0; +} + +static void bch2_sb_errors_to_text(struct printbuf *out, struct bch_sb *sb, + struct bch_sb_field *f) +{ + struct bch_sb_field_errors *e = field_to_type(f, errors); + unsigned i, nr = bch2_sb_field_errors_nr_entries(e); + u64 now = ktime_get_real_seconds(); + + if (out->nr_tabstops <= 1) + printbuf_tabstop_push(out, 16); + + for (i = 0; i < nr; i++) { + bch2_sb_error_id_to_text(out, BCH_SB_ERROR_ENTRY_ID(&e->entries[i])); + prt_tab(out); + prt_u64(out, BCH_SB_ERROR_ENTRY_NR(&e->entries[i])); + prt_tab(out); + bch2_pr_time_units(out, (now - le64_to_cpu(e->entries[i].last_error_time)) * + NSEC_PER_SEC); + prt_str(out, " ago"); + prt_newline(out); + } +} + +const struct bch_sb_field_ops bch_sb_field_ops_errors = { + .validate = bch2_sb_errors_validate, + .to_text = bch2_sb_errors_to_text, +}; + +void bch2_sb_error_count(struct bch_fs *c, enum bch_sb_error_id err) +{ + bch_sb_errors_cpu *e = &c->fsck_error_counts; + struct bch_sb_error_entry_cpu n = { + .id = err, + .nr = 1, + .last_error_time = ktime_get_real_seconds() + }; + unsigned i; + + mutex_lock(&c->fsck_error_counts_lock); + for (i = 0; i < e->nr; i++) { + if (err == e->data[i].id) { + e->data[i].nr++; + e->data[i].last_error_time = n.last_error_time; + goto out; + } + if (err < e->data[i].id) + break; + } + + if (darray_make_room(e, 1)) + goto out; + + darray_insert_item(e, i, n); +out: + mutex_unlock(&c->fsck_error_counts_lock); +} + +void bch2_sb_errors_from_cpu(struct bch_fs *c) +{ + bch_sb_errors_cpu *src = &c->fsck_error_counts; + struct bch_sb_field_errors *dst = + bch2_sb_field_resize(&c->disk_sb, errors, + bch2_sb_field_errors_u64s(src->nr)); + unsigned i; + + if (!dst) + return; + + for (i = 0; i < src->nr; i++) { + SET_BCH_SB_ERROR_ENTRY_ID(&dst->entries[i], src->data[i].id); + SET_BCH_SB_ERROR_ENTRY_NR(&dst->entries[i], src->data[i].nr); + dst->entries[i].last_error_time = cpu_to_le64(src->data[i].last_error_time); + } +} + +static int bch2_sb_errors_to_cpu(struct bch_fs *c) +{ + struct bch_sb_field_errors *src = bch2_sb_field_get(c->disk_sb.sb, errors); + bch_sb_errors_cpu *dst = &c->fsck_error_counts; + unsigned i, nr = bch2_sb_field_errors_nr_entries(src); + int ret; + + if (!nr) + return 0; + + mutex_lock(&c->fsck_error_counts_lock); + ret = darray_make_room(dst, nr); + if (ret) + goto err; + + dst->nr = nr; + + for (i = 0; i < nr; i++) { + dst->data[i].id = BCH_SB_ERROR_ENTRY_ID(&src->entries[i]); + dst->data[i].nr = BCH_SB_ERROR_ENTRY_NR(&src->entries[i]); + dst->data[i].last_error_time = le64_to_cpu(src->entries[i].last_error_time); + } +err: + mutex_unlock(&c->fsck_error_counts_lock); + + return ret; +} + +void bch2_fs_sb_errors_exit(struct bch_fs *c) +{ + darray_exit(&c->fsck_error_counts); +} + +void bch2_fs_sb_errors_init_early(struct bch_fs *c) +{ + mutex_init(&c->fsck_error_counts_lock); + darray_init(&c->fsck_error_counts); +} + +int bch2_fs_sb_errors_init(struct bch_fs *c) +{ + return bch2_sb_errors_to_cpu(c); +} diff --git a/fs/bcachefs/sb-errors.h b/fs/bcachefs/sb-errors.h new file mode 100644 index 000000000000..7f8172821240 --- /dev/null +++ b/fs/bcachefs/sb-errors.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_SB_ERRORS_H +#define _BCACHEFS_SB_ERRORS_H + +#include "sb-errors_types.h" + +#define BCH_SB_ERRS() + +enum bch_sb_error_id { +#define x(t, n) BCH_FSCK_ERR_##t = n, + BCH_SB_ERRS() +#undef x + BCH_SB_ERR_MAX +}; + +extern const struct bch_sb_field_ops bch_sb_field_ops_errors; + +void bch2_sb_error_count(struct bch_fs *, enum bch_sb_error_id); + +void bch2_sb_errors_from_cpu(struct bch_fs *); + +void bch2_fs_sb_errors_exit(struct bch_fs *); +void bch2_fs_sb_errors_init_early(struct bch_fs *); +int bch2_fs_sb_errors_init(struct bch_fs *); + +#endif /* _BCACHEFS_SB_ERRORS_H */ diff --git a/fs/bcachefs/sb-errors_types.h b/fs/bcachefs/sb-errors_types.h new file mode 100644 index 000000000000..b1c099843a39 --- /dev/null +++ b/fs/bcachefs/sb-errors_types.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_SB_ERRORS_TYPES_H +#define _BCACHEFS_SB_ERRORS_TYPES_H + +#include "darray.h" + +struct bch_sb_error_entry_cpu { + u64 id:16, + nr:48; + u64 last_error_time; +}; + +typedef DARRAY(struct bch_sb_error_entry_cpu) bch_sb_errors_cpu; + +#endif /* _BCACHEFS_SB_ERRORS_TYPES_H */ + diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 91566accc5a7..032fe45481d3 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -84,7 +84,7 @@ static int sb_members_v2_resize_entries(struct bch_fs *c) return 0; } -int bch2_members_v2_init(struct bch_fs *c) +int bch2_sb_members_v2_init(struct bch_fs *c) { struct bch_sb_field_members_v1 *mi1; struct bch_sb_field_members_v2 *mi2; diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index 7cfd55a43bb5..1583e80afcbf 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -4,7 +4,7 @@ extern char * const bch2_member_error_strs[]; -int bch2_members_v2_init(struct bch_fs *c); +int bch2_sb_members_v2_init(struct bch_fs *c); int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb); struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i); struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i); diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 64e861b87535..83bdb4368289 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -13,6 +13,7 @@ #include "replicas.h" #include "quota.h" #include "sb-clean.h" +#include "sb-errors.h" #include "sb-members.h" #include "super-io.h" #include "super.h" @@ -897,7 +898,9 @@ int bch2_write_super(struct bch_fs *c) SET_BCH_SB_BIG_ENDIAN(c->disk_sb.sb, CPU_BIG_ENDIAN); bch2_sb_counters_from_cpu(c); + bch2_sb_members_from_cpu(c); bch2_sb_members_cpy_v2_v1(&c->disk_sb); + bch2_sb_errors_from_cpu(c); for_each_online_member(ca, c, i) bch2_sb_from_fs(c, ca); diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h index 5d079dd12f95..f5abd102bff7 100644 --- a/fs/bcachefs/super-io.h +++ b/fs/bcachefs/super-io.h @@ -23,6 +23,11 @@ u64 bch2_upgrade_recovery_passes(struct bch_fs *c, unsigned, unsigned); +static inline size_t bch2_sb_field_bytes(struct bch_sb_field *f) +{ + return le32_to_cpu(f->u64s) * sizeof(u64); +} + #define field_to_type(_f, _name) \ container_of_or_null(_f, struct bch_sb_field_##_name, field) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index e16b5bc12d26..1b5c2a1bd68a 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -49,6 +49,7 @@ #include "recovery.h" #include "replicas.h" #include "sb-clean.h" +#include "sb-errors.h" #include "sb-members.h" #include "snapshot.h" #include "subvolume.h" @@ -400,7 +401,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) bch_info(c, "going read-write"); - ret = bch2_members_v2_init(c); + ret = bch2_sb_members_v2_init(c); if (ret) goto err; @@ -481,6 +482,7 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_time_stats_exit(&c->times[i]); bch2_free_pending_node_rewrites(c); + bch2_fs_sb_errors_exit(c); bch2_fs_counters_exit(c); bch2_fs_snapshots_exit(c); bch2_fs_quota_exit(c); @@ -713,6 +715,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bch2_fs_quota_init(c); bch2_fs_ec_init_early(c); bch2_fs_move_init(c); + bch2_fs_sb_errors_init_early(c); INIT_LIST_HEAD(&c->list); @@ -729,8 +732,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) INIT_LIST_HEAD(&c->journal_iters); - INIT_LIST_HEAD(&c->fsck_errors); - mutex_init(&c->fsck_error_lock); + INIT_LIST_HEAD(&c->fsck_error_msgs); + mutex_init(&c->fsck_error_msgs_lock); seqcount_init(&c->gc_pos_lock); @@ -840,6 +843,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) } ret = bch2_fs_counters_init(c) ?: + bch2_fs_sb_errors_init(c) ?: bch2_io_clock_init(&c->io_clock[READ]) ?: bch2_io_clock_init(&c->io_clock[WRITE]) ?: bch2_fs_journal_init(&c->journal) ?: @@ -942,7 +946,7 @@ int bch2_fs_start(struct bch_fs *c) mutex_lock(&c->sb_lock); - ret = bch2_members_v2_init(c); + ret = bch2_sb_members_v2_init(c); if (ret) { mutex_unlock(&c->sb_lock); goto err; -- cgit v1.2.3 From b65db750e2bb9252321fd54c284edd73c1595a09 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 24 Oct 2023 20:44:36 -0400 Subject: bcachefs: Enumerate fsck errors This patch adds a superblock error counter for every distinct fsck error; this means that when analyzing filesystems out in the wild we'll be able to see what sorts of inconsistencies are being found and repair, and hence what bugs to look for. Errors validating bkeys are not yet considered distinct fsck errors, but this patch adds a new helper, bkey_fsck_err(), in order to add distinct error types for them as well. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 158 ++++++++++++----------- fs/bcachefs/alloc_background.h | 10 +- fs/bcachefs/backpointers.c | 20 +-- fs/bcachefs/backpointers.h | 2 +- fs/bcachefs/bkey_methods.c | 147 ++++++++++----------- fs/bcachefs/bkey_methods.h | 5 +- fs/bcachefs/btree_gc.c | 124 +++++++++++------- fs/bcachefs/btree_io.c | 174 ++++++++++++++++++------- fs/bcachefs/btree_update_interior.c | 4 +- fs/bcachefs/buckets.c | 13 +- fs/bcachefs/dirent.c | 76 +++++------ fs/bcachefs/dirent.h | 2 +- fs/bcachefs/ec.c | 29 ++--- fs/bcachefs/ec.h | 2 +- fs/bcachefs/error.c | 7 +- fs/bcachefs/error.h | 80 ++++++++---- fs/bcachefs/extents.c | 236 ++++++++++++++++------------------ fs/bcachefs/extents.h | 8 +- fs/bcachefs/fsck.c | 96 ++++++++------ fs/bcachefs/inode.c | 143 +++++++++++---------- fs/bcachefs/inode.h | 8 +- fs/bcachefs/journal_io.c | 73 ++++++++--- fs/bcachefs/lru.c | 18 +-- fs/bcachefs/lru.h | 2 +- fs/bcachefs/quota.c | 15 ++- fs/bcachefs/quota.h | 2 +- fs/bcachefs/recovery.c | 10 +- fs/bcachefs/reflink.c | 6 +- fs/bcachefs/reflink.h | 6 +- fs/bcachefs/sb-clean.c | 3 + fs/bcachefs/sb-errors.h | 246 +++++++++++++++++++++++++++++++++++- fs/bcachefs/snapshot.c | 104 +++++++-------- fs/bcachefs/snapshot.h | 4 +- fs/bcachefs/subvolume.c | 18 +-- fs/bcachefs/subvolume.h | 2 +- fs/bcachefs/xattr.c | 58 ++++----- fs/bcachefs/xattr.h | 2 +- 37 files changed, 1175 insertions(+), 738 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 455ee0b47f31..c342ec3b0385 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -192,114 +192,109 @@ static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a) return DIV_ROUND_UP(bytes, sizeof(u64)); } -int bch2_alloc_v1_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_alloc_v1_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); + int ret = 0; /* allow for unknown fields */ - if (bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v)) { - prt_printf(err, "incorrect value size (%zu < %u)", - bkey_val_u64s(a.k), bch_alloc_v1_val_u64s(a.v)); - return -BCH_ERR_invalid_bkey; - } - - return 0; + bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v), c, err, + alloc_v1_val_size_bad, + "incorrect value size (%zu < %u)", + bkey_val_u64s(a.k), bch_alloc_v1_val_u64s(a.v)); +fsck_err: + return ret; } -int bch2_alloc_v2_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_alloc_v2_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { struct bkey_alloc_unpacked u; + int ret = 0; - if (bch2_alloc_unpack_v2(&u, k)) { - prt_printf(err, "unpack error"); - return -BCH_ERR_invalid_bkey; - } - - return 0; + bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k), c, err, + alloc_v2_unpack_error, + "unpack error"); +fsck_err: + return ret; } -int bch2_alloc_v3_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_alloc_v3_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { struct bkey_alloc_unpacked u; + int ret = 0; - if (bch2_alloc_unpack_v3(&u, k)) { - prt_printf(err, "unpack error"); - return -BCH_ERR_invalid_bkey; - } - - return 0; + bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k), c, err, + alloc_v2_unpack_error, + "unpack error"); +fsck_err: + return ret; } -int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k); + int ret = 0; - if (alloc_v4_u64s(a.v) > bkey_val_u64s(k.k)) { - prt_printf(err, "bad val size (%u > %zu)", - alloc_v4_u64s(a.v), bkey_val_u64s(k.k)); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(alloc_v4_u64s(a.v) > bkey_val_u64s(k.k), c, err, + alloc_v4_val_size_bad, + "bad val size (%u > %zu)", + alloc_v4_u64s(a.v), bkey_val_u64s(k.k)); - if (!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) && - BCH_ALLOC_V4_NR_BACKPOINTERS(a.v)) { - prt_printf(err, "invalid backpointers_start"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) && + BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), c, err, + alloc_v4_backpointers_start_bad, + "invalid backpointers_start"); - if (alloc_data_type(*a.v, a.v->data_type) != a.v->data_type) { - prt_printf(err, "invalid data type (got %u should be %u)", - a.v->data_type, alloc_data_type(*a.v, a.v->data_type)); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type, c, err, + alloc_key_data_type_bad, + "invalid data type (got %u should be %u)", + a.v->data_type, alloc_data_type(*a.v, a.v->data_type)); switch (a.v->data_type) { case BCH_DATA_free: case BCH_DATA_need_gc_gens: case BCH_DATA_need_discard: - if (a.v->dirty_sectors || - a.v->cached_sectors || - a.v->stripe) { - prt_printf(err, "empty data type free but have data"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(a.v->dirty_sectors || + a.v->cached_sectors || + a.v->stripe, c, err, + alloc_key_empty_but_have_data, + "empty data type free but have data"); break; case BCH_DATA_sb: case BCH_DATA_journal: case BCH_DATA_btree: case BCH_DATA_user: case BCH_DATA_parity: - if (!a.v->dirty_sectors) { - prt_printf(err, "data_type %s but dirty_sectors==0", - bch2_data_types[a.v->data_type]); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(!a.v->dirty_sectors, c, err, + alloc_key_dirty_sectors_0, + "data_type %s but dirty_sectors==0", + bch2_data_types[a.v->data_type]); break; case BCH_DATA_cached: - if (!a.v->cached_sectors || - a.v->dirty_sectors || - a.v->stripe) { - prt_printf(err, "data type inconsistency"); - return -BCH_ERR_invalid_bkey; - } - - if (!a.v->io_time[READ] && - c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs) { - prt_printf(err, "cached bucket with read_time == 0"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(!a.v->cached_sectors || + a.v->dirty_sectors || + a.v->stripe, c, err, + alloc_key_cached_inconsistency, + "data type inconsistency"); + + bkey_fsck_err_on(!a.v->io_time[READ] && + c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs, + c, err, + alloc_key_cached_but_read_time_zero, + "cached bucket with read_time == 0"); break; case BCH_DATA_stripe: break; } - - return 0; +fsck_err: + return ret; } static inline u64 swab40(u64 x) @@ -521,17 +516,18 @@ static unsigned alloc_gen(struct bkey_s_c k, unsigned offset) : 0; } -int bch2_bucket_gens_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_bucket_gens_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { - if (bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens)) { - prt_printf(err, "bad val size (%zu != %zu)", - bkey_val_bytes(k.k), sizeof(struct bch_bucket_gens)); - return -BCH_ERR_invalid_bkey; - } + int ret = 0; - return 0; + bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens), c, err, + bucket_gens_val_size_bad, + "bad val size (%zu != %zu)", + bkey_val_bytes(k.k), sizeof(struct bch_bucket_gens)); +fsck_err: + return ret; } void bch2_bucket_gens_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) @@ -986,6 +982,7 @@ int bch2_check_alloc_key(struct btree_trans *trans, int ret; if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_k.k->p), c, + alloc_key_to_missing_dev_bucket, "alloc key for invalid device:bucket %llu:%llu", alloc_k.k->p.inode, alloc_k.k->p.offset)) return bch2_btree_delete_at(trans, alloc_iter, 0); @@ -1005,7 +1002,8 @@ int bch2_check_alloc_key(struct btree_trans *trans, if (k.k->type != discard_key_type && (c->opts.reconstruct_alloc || - fsck_err(c, "incorrect key in need_discard btree (got %s should be %s)\n" + fsck_err(c, need_discard_key_wrong, + "incorrect key in need_discard btree (got %s should be %s)\n" " %s", bch2_bkey_types[k.k->type], bch2_bkey_types[discard_key_type], @@ -1035,7 +1033,8 @@ int bch2_check_alloc_key(struct btree_trans *trans, if (k.k->type != freespace_key_type && (c->opts.reconstruct_alloc || - fsck_err(c, "incorrect key in freespace btree (got %s should be %s)\n" + fsck_err(c, freespace_key_wrong, + "incorrect key in freespace btree (got %s should be %s)\n" " %s", bch2_bkey_types[k.k->type], bch2_bkey_types[freespace_key_type], @@ -1066,7 +1065,8 @@ int bch2_check_alloc_key(struct btree_trans *trans, if (a->gen != alloc_gen(k, gens_offset) && (c->opts.reconstruct_alloc || - fsck_err(c, "incorrect gen in bucket_gens btree (got %u should be %u)\n" + fsck_err(c, bucket_gens_key_wrong, + "incorrect gen in bucket_gens btree (got %u should be %u)\n" " %s", alloc_gen(k, gens_offset), a->gen, (printbuf_reset(&buf), @@ -1124,7 +1124,8 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans, if (k.k->type != KEY_TYPE_set && (c->opts.reconstruct_alloc || - fsck_err(c, "hole in alloc btree missing in freespace btree\n" + fsck_err(c, freespace_hole_missing, + "hole in alloc btree missing in freespace btree\n" " device %llu buckets %llu-%llu", freespace_iter->pos.inode, freespace_iter->pos.offset, @@ -1187,6 +1188,7 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, for (i = gens_offset; i < gens_end_offset; i++) { if (fsck_err_on(g.v.gens[i], c, + bucket_gens_hole_wrong, "hole in alloc btree at %llu:%llu with nonzero gen in bucket_gens btree (%u)", bucket_gens_pos_to_alloc(k.k->p, i).inode, bucket_gens_pos_to_alloc(k.k->p, i).offset, @@ -1244,6 +1246,7 @@ static noinline_for_stack int __bch2_check_discard_freespace_key(struct btree_tr return ret; if (fsck_err_on(!bch2_dev_bucket_exists(c, pos), c, + need_discard_freespace_key_to_invalid_dev_bucket, "entry in %s btree for nonexistant dev:bucket %llu:%llu", bch2_btree_id_str(iter->btree_id), pos.inode, pos.offset)) goto delete; @@ -1253,6 +1256,7 @@ static noinline_for_stack int __bch2_check_discard_freespace_key(struct btree_tr if (fsck_err_on(a->data_type != state || (state == BCH_DATA_free && genbits != alloc_freespace_genbits(*a)), c, + need_discard_freespace_key_bad, "%s\n incorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)", (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf), bch2_btree_id_str(iter->btree_id), @@ -1320,6 +1324,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans, dev_exists = bch2_dev_exists2(c, k.k->p.inode); if (!dev_exists) { if (fsck_err_on(!dev_exists, c, + bucket_gens_to_invalid_dev, "bucket_gens key for invalid device:\n %s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { ret = bch2_btree_delete_at(trans, iter, 0); @@ -1330,6 +1335,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans, ca = bch_dev_bkey_exists(c, k.k->p.inode); if (fsck_err_on(end <= ca->mi.first_bucket || start >= ca->mi.nbuckets, c, + bucket_gens_to_invalid_buckets, "bucket_gens key for invalid buckets:\n %s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { ret = bch2_btree_delete_at(trans, iter, 0); @@ -1338,6 +1344,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans, for (b = start; b < ca->mi.first_bucket; b++) if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK], c, + bucket_gens_nonzero_for_invalid_buckets, "bucket_gens key has nonzero gen for invalid bucket")) { g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0; need_update = true; @@ -1345,6 +1352,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans, for (b = ca->mi.nbuckets; b < end; b++) if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK], c, + bucket_gens_nonzero_for_invalid_buckets, "bucket_gens key has nonzero gen for invalid bucket")) { g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0; need_update = true; @@ -1495,11 +1503,13 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, return ret; if (fsck_err_on(!a->io_time[READ], c, + alloc_key_cached_but_read_time_zero, "cached bucket with read_time 0\n" " %s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)) || fsck_err_on(lru_k.k->type != KEY_TYPE_set, c, + alloc_key_to_missing_lru_entry, "missing lru entry\n" " %s", (printbuf_reset(&buf), diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 97042067d2a9..e1ce38ef052e 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -149,13 +149,13 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int); -int bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_alloc_v1_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); -int bch2_alloc_v2_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_alloc_v2_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); -int bch2_alloc_v3_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_alloc_v3_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); -int bch2_alloc_v4_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_alloc_v4_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_alloc_v4_swab(struct bkey_s); void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); @@ -193,7 +193,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); .min_val_size = 48, \ }) -int bch2_bucket_gens_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_bucket_gens_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_bucket_gens_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index e74295c21a03..3b79bde1ce2f 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -37,19 +37,20 @@ static bool extent_matches_bp(struct bch_fs *c, return false; } -int bch2_backpointer_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); struct bpos bucket = bp_pos_to_bucket(c, bp.k->p); + int ret = 0; - if (!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset))) { - prt_str(err, "backpointer at wrong pos"); - return -BCH_ERR_invalid_bkey; - } - - return 0; + bkey_fsck_err_on(!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)), + c, err, + backpointer_pos_wrong, + "backpointer at wrong pos"); +fsck_err: + return ret; } void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp) @@ -356,6 +357,7 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_ int ret = 0; if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c, + backpointer_to_missing_device, "backpointer for missing device:\n%s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { ret = bch2_btree_delete_at(trans, bp_iter, 0); @@ -369,6 +371,7 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_ goto out; if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c, + backpointer_to_missing_alloc, "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", alloc_iter.pos.inode, alloc_iter.pos.offset, (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { @@ -460,7 +463,7 @@ missing: if (c->sb.version_upgrade_complete < bcachefs_metadata_version_backpointers || c->opts.reconstruct_alloc || - fsck_err(c, "%s", buf.buf)) + fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf)) ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true); goto out; @@ -793,6 +796,7 @@ static int check_one_backpointer(struct btree_trans *trans, } if (fsck_err_on(!k.k, c, + backpointer_to_missing_ptr, "backpointer for missing extent\n %s", (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p); diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h index 547e0617602a..4ab9f3562912 100644 --- a/fs/bcachefs/backpointers.h +++ b/fs/bcachefs/backpointers.h @@ -7,7 +7,7 @@ #include "buckets.h" #include "super.h" -int bch2_backpointer_invalid(const struct bch_fs *, struct bkey_s_c k, +int bch2_backpointer_invalid(struct bch_fs *, struct bkey_s_c k, enum bkey_invalid_flags, struct printbuf *); void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *); void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index baf491878bf1..2f518d7e1a64 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -26,7 +26,7 @@ const char * const bch2_bkey_types[] = { NULL }; -static int deleted_key_invalid(const struct bch_fs *c, struct bkey_s_c k, +static int deleted_key_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { return 0; @@ -40,23 +40,24 @@ static int deleted_key_invalid(const struct bch_fs *c, struct bkey_s_c k, .key_invalid = deleted_key_invalid, \ }) -static int empty_val_key_invalid(const struct bch_fs *c, struct bkey_s_c k, +static int empty_val_key_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { - if (bkey_val_bytes(k.k)) { - prt_printf(err, "incorrect value size (%zu != 0)", - bkey_val_bytes(k.k)); - return -BCH_ERR_invalid_bkey; - } - - return 0; + int ret = 0; + + bkey_fsck_err_on(bkey_val_bytes(k.k), c, err, + bkey_val_size_nonzero, + "incorrect value size (%zu != 0)", + bkey_val_bytes(k.k)); +fsck_err: + return ret; } #define bch2_bkey_ops_error ((struct bkey_ops) { \ .key_invalid = empty_val_key_invalid, \ }) -static int key_type_cookie_invalid(const struct bch_fs *c, struct bkey_s_c k, +static int key_type_cookie_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { return 0; @@ -71,7 +72,7 @@ static int key_type_cookie_invalid(const struct bch_fs *c, struct bkey_s_c k, .key_invalid = empty_val_key_invalid, \ }) -static int key_type_inline_data_invalid(const struct bch_fs *c, struct bkey_s_c k, +static int key_type_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { return 0; @@ -92,18 +93,6 @@ static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c, .val_to_text = key_type_inline_data_to_text, \ }) -static int key_type_set_invalid(const struct bch_fs *c, struct bkey_s_c k, - enum bkey_invalid_flags flags, struct printbuf *err) -{ - if (bkey_val_bytes(k.k)) { - prt_printf(err, "incorrect value size (%zu != %zu)", - bkey_val_bytes(k.k), sizeof(struct bch_cookie)); - return -BCH_ERR_invalid_bkey; - } - - return 0; -} - static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) { bch2_key_resize(l.k, l.k->size + r.k->size); @@ -111,7 +100,7 @@ static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_ } #define bch2_bkey_ops_set ((struct bkey_ops) { \ - .key_invalid = key_type_set_invalid, \ + .key_invalid = empty_val_key_invalid, \ .key_merge = key_type_set_merge, \ }) @@ -129,17 +118,19 @@ int bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k, struct printbuf *err) { const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type); + int ret = 0; - if (bkey_val_bytes(k.k) < ops->min_val_size) { - prt_printf(err, "bad val size (%zu < %u)", - bkey_val_bytes(k.k), ops->min_val_size); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(bkey_val_bytes(k.k) < ops->min_val_size, c, err, + bkey_val_size_too_small, + "bad val size (%zu < %u)", + bkey_val_bytes(k.k), ops->min_val_size); if (!ops->key_invalid) return 0; - return ops->key_invalid(c, k, flags, err); + ret = ops->key_invalid(c, k, flags, err); +fsck_err: + return ret; } static u64 bch2_key_types_allowed[] = { @@ -162,61 +153,55 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { - if (k.k->u64s < BKEY_U64s) { - prt_printf(err, "u64s too small (%u < %zu)", k.k->u64s, BKEY_U64s); - return -BCH_ERR_invalid_bkey; - } + int ret = 0; + + bkey_fsck_err_on(k.k->u64s < BKEY_U64s, c, err, + bkey_u64s_too_small, + "u64s too small (%u < %zu)", k.k->u64s, BKEY_U64s); if (type >= BKEY_TYPE_NR) return 0; - if (flags & BKEY_INVALID_COMMIT && - !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type))) { - prt_printf(err, "invalid key type for btree %s (%s)", - bch2_btree_node_type_str(type), bch2_bkey_types[k.k->type]); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) && + !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err, + bkey_invalid_type_for_btree, + "invalid key type for btree %s (%s)", + bch2_btree_node_type_str(type), bch2_bkey_types[k.k->type]); if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) { - if (k.k->size == 0) { - prt_printf(err, "size == 0"); - return -BCH_ERR_invalid_bkey; - } - - if (k.k->size > k.k->p.offset) { - prt_printf(err, "size greater than offset (%u > %llu)", - k.k->size, k.k->p.offset); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(k.k->size == 0, c, err, + bkey_extent_size_zero, + "size == 0"); + + bkey_fsck_err_on(k.k->size > k.k->p.offset, c, err, + bkey_extent_size_greater_than_offset, + "size greater than offset (%u > %llu)", + k.k->size, k.k->p.offset); } else { - if (k.k->size) { - prt_printf(err, "size != 0"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(k.k->size, c, err, + bkey_size_nonzero, + "size != 0"); } if (type != BKEY_TYPE_btree) { enum btree_id btree = type - 1; - if (!btree_type_has_snapshots(btree) && - k.k->p.snapshot) { - prt_printf(err, "nonzero snapshot"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(!btree_type_has_snapshots(btree) && + k.k->p.snapshot, c, err, + bkey_snapshot_nonzero, + "nonzero snapshot"); - if (btree_type_has_snapshots(btree) && - !k.k->p.snapshot) { - prt_printf(err, "snapshot == 0"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(btree_type_has_snapshots(btree) && + !k.k->p.snapshot, c, err, + bkey_snapshot_zero, + "snapshot == 0"); - if (bkey_eq(k.k->p, POS_MAX)) { - prt_printf(err, "key at POS_MAX"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(bkey_eq(k.k->p, POS_MAX), c, err, + bkey_at_pos_max, + "key at POS_MAX"); } - - return 0; +fsck_err: + return ret; } int bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, @@ -228,20 +213,20 @@ int bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, bch2_bkey_val_invalid(c, k, flags, err); } -int bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k, - struct printbuf *err) +int bch2_bkey_in_btree_node(struct bch_fs *c, struct btree *b, + struct bkey_s_c k, struct printbuf *err) { - if (bpos_lt(k.k->p, b->data->min_key)) { - prt_printf(err, "key before start of btree node"); - return -BCH_ERR_invalid_bkey; - } + int ret = 0; - if (bpos_gt(k.k->p, b->data->max_key)) { - prt_printf(err, "key past end of btree node"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(bpos_lt(k.k->p, b->data->min_key), c, err, + bkey_before_start_of_btree_node, + "key before start of btree node"); - return 0; + bkey_fsck_err_on(bpos_gt(k.k->p, b->data->max_key), c, err, + bkey_after_end_of_btree_node, + "key past end of btree node"); +fsck_err: + return ret; } void bch2_bpos_to_text(struct printbuf *out, struct bpos pos) diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h index c829c8e381a7..3a370b7087ac 100644 --- a/fs/bcachefs/bkey_methods.h +++ b/fs/bcachefs/bkey_methods.h @@ -21,7 +21,7 @@ extern const struct bkey_ops bch2_bkey_null_ops; * being read or written; more aggressive checks can be enabled when rw == WRITE. */ struct bkey_ops { - int (*key_invalid)(const struct bch_fs *c, struct bkey_s_c k, + int (*key_invalid)(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err); void (*val_to_text)(struct printbuf *, struct bch_fs *, struct bkey_s_c); @@ -55,7 +55,8 @@ int __bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type, enum bkey_invalid_flags, struct printbuf *); int bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type, enum bkey_invalid_flags, struct printbuf *); -int bch2_bkey_in_btree_node(struct btree *, struct bkey_s_c, struct printbuf *); +int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *, + struct bkey_s_c, struct printbuf *); void bch2_bpos_to_text(struct printbuf *, struct bpos); void bch2_bkey_to_text(struct printbuf *, const struct bkey *); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 53d1d1da2640..0b5d09c8475d 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -95,15 +95,15 @@ static int bch2_gc_check_topology(struct bch_fs *c, bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(cur.k)); if (__fsck_err(c, - FSCK_CAN_FIX| - FSCK_CAN_IGNORE| - FSCK_NO_RATELIMIT, - "btree node with incorrect min_key at btree %s level %u:\n" - " prev %s\n" - " cur %s", - bch2_btree_id_str(b->c.btree_id), b->c.level, - buf1.buf, buf2.buf) && - should_restart_for_topology_repair(c)) { + FSCK_CAN_FIX| + FSCK_CAN_IGNORE| + FSCK_NO_RATELIMIT, + btree_node_topology_bad_min_key, + "btree node with incorrect min_key at btree %s level %u:\n" + " prev %s\n" + " cur %s", + bch2_btree_id_str(b->c.btree_id), b->c.level, + buf1.buf, buf2.buf) && should_restart_for_topology_repair(c)) { bch_info(c, "Halting mark and sweep to start topology repair pass"); ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); goto err; @@ -122,10 +122,8 @@ static int bch2_gc_check_topology(struct bch_fs *c, bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(cur.k)); bch2_bpos_to_text(&buf2, node_end); - if (__fsck_err(c, - FSCK_CAN_FIX| - FSCK_CAN_IGNORE| - FSCK_NO_RATELIMIT, + if (__fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE|FSCK_NO_RATELIMIT, + btree_node_topology_bad_max_key, "btree node with incorrect max_key at btree %s level %u:\n" " %s\n" " expected %s", @@ -287,6 +285,7 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b, if (mustfix_fsck_err_on(bpos_ge(prev->data->min_key, cur->data->min_key), c, + btree_node_topology_overwritten_by_next_node, "btree node overwritten by next node at btree %s level %u:\n" " node %s\n" " next %s", @@ -298,6 +297,7 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b, if (mustfix_fsck_err_on(!bpos_eq(prev->key.k.p, bpos_predecessor(cur->data->min_key)), c, + btree_node_topology_bad_max_key, "btree node with incorrect max_key at btree %s level %u:\n" " node %s\n" " next %s", @@ -310,6 +310,7 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b, if (mustfix_fsck_err_on(bpos_ge(expected_start, cur->data->max_key), c, + btree_node_topology_overwritten_by_prev_node, "btree node overwritten by prev node at btree %s level %u:\n" " prev %s\n" " node %s", @@ -320,6 +321,7 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b, } if (mustfix_fsck_err_on(!bpos_eq(expected_start, cur->data->min_key), c, + btree_node_topology_bad_min_key, "btree node with incorrect min_key at btree %s level %u:\n" " prev %s\n" " node %s", @@ -344,6 +346,7 @@ static int btree_repair_node_end(struct bch_fs *c, struct btree *b, bch2_bpos_to_text(&buf2, b->key.k.p); if (mustfix_fsck_err_on(!bpos_eq(child->key.k.p, b->key.k.p), c, + btree_node_topology_bad_max_key, "btree node with incorrect max_key at btree %s level %u:\n" " %s\n" " expected %s", @@ -396,6 +399,7 @@ again: bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k)); if (mustfix_fsck_err_on(ret == -EIO, c, + btree_node_unreadable, "Topology repair: unreadable btree node at btree %s level %u:\n" " %s", bch2_btree_id_str(b->c.btree_id), @@ -504,6 +508,7 @@ again: bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); if (mustfix_fsck_err_on(!have_child, c, + btree_node_topology_interior_node_empty, "empty interior btree node at btree %s level %u\n" " %s", bch2_btree_id_str(b->c.btree_id), @@ -582,7 +587,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id if (!g->gen_valid && (c->opts.reconstruct_alloc || - fsck_err(c, "bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n" + fsck_err(c, ptr_to_missing_alloc_key, + "bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n" "while marking %s", p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), bch2_data_types[ptr_data_type(k->k, &p.ptr)], @@ -599,7 +605,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id if (gen_cmp(p.ptr.gen, g->gen) > 0 && (c->opts.reconstruct_alloc || - fsck_err(c, "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n" + fsck_err(c, ptr_gen_newer_than_bucket_gen, + "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n" "while marking %s", p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), bch2_data_types[ptr_data_type(k->k, &p.ptr)], @@ -620,7 +627,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id if (gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX && (c->opts.reconstruct_alloc || - fsck_err(c, "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" + fsck_err(c, ptr_gen_newer_than_bucket_gen, + "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" "while marking %s", p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen, bch2_data_types[ptr_data_type(k->k, &p.ptr)], @@ -631,7 +639,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id if (!p.ptr.cached && gen_cmp(p.ptr.gen, g->gen) < 0 && (c->opts.reconstruct_alloc || - fsck_err(c, "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n" + fsck_err(c, stale_dirty_ptr, + "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n" "while marking %s", p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), bch2_data_types[ptr_data_type(k->k, &p.ptr)], @@ -645,6 +654,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id if (fsck_err_on(bucket_data_type(g->data_type) && bucket_data_type(g->data_type) != data_type, c, + ptr_bucket_data_type_mismatch, "bucket %u:%zu different types of data in same bucket: %s, %s\n" "while marking %s", p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), @@ -664,6 +674,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id struct gc_stripe *m = genradix_ptr(&c->gc_stripes, p.ec.idx); if (fsck_err_on(!m || !m->alive, c, + ptr_to_missing_stripe, "pointer to nonexistent stripe %llu\n" "while marking %s", (u64) p.ec.idx, @@ -672,6 +683,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id do_update = true; if (fsck_err_on(m && m->alive && !bch2_ptr_matches_stripe_m(m, p), c, + ptr_to_incorrect_stripe, "pointer does not match stripe %llu\n" "while marking %s", (u64) p.ec.idx, @@ -811,6 +823,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, goto err; if (fsck_err_on(k->k->version.lo > atomic64_read(&c->key_version), c, + bkey_version_in_future, "key version number higher than recorded: %llu > %llu", k->k->version.lo, atomic64_read(&c->key_version))) @@ -968,6 +981,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b FSCK_CAN_FIX| FSCK_CAN_IGNORE| FSCK_NO_RATELIMIT, + btree_node_read_error, "Unreadable btree node at btree %s level %u:\n" " %s", bch2_btree_id_str(b->c.btree_id), @@ -1025,6 +1039,7 @@ static int bch2_gc_btree_init(struct btree_trans *trans, printbuf_reset(&buf); bch2_bpos_to_text(&buf, b->data->min_key); if (mustfix_fsck_err_on(!bpos_eq(b->data->min_key, POS_MIN), c, + btree_root_bad_min_key, "btree root with incorrect min_key: %s", buf.buf)) { bch_err(c, "repair unimplemented"); ret = -BCH_ERR_fsck_repair_unimplemented; @@ -1034,6 +1049,7 @@ static int bch2_gc_btree_init(struct btree_trans *trans, printbuf_reset(&buf); bch2_bpos_to_text(&buf, b->data->max_key); if (mustfix_fsck_err_on(!bpos_eq(b->data->max_key, SPOS_MAX), c, + btree_root_bad_max_key, "btree root with incorrect max_key: %s", buf.buf)) { bch_err(c, "repair unimplemented"); ret = -BCH_ERR_fsck_repair_unimplemented; @@ -1207,16 +1223,16 @@ static int bch2_gc_done(struct bch_fs *c, percpu_down_write(&c->mark_lock); -#define copy_field(_f, _msg, ...) \ +#define copy_field(_err, _f, _msg, ...) \ if (dst->_f != src->_f && \ (!verify || \ - fsck_err(c, _msg ": got %llu, should be %llu" \ + fsck_err(c, _err, _msg ": got %llu, should be %llu" \ , ##__VA_ARGS__, dst->_f, src->_f))) \ dst->_f = src->_f -#define copy_dev_field(_f, _msg, ...) \ - copy_field(_f, "dev %u has wrong " _msg, dev, ##__VA_ARGS__) -#define copy_fs_field(_f, _msg, ...) \ - copy_field(_f, "fs has wrong " _msg, ##__VA_ARGS__) +#define copy_dev_field(_err, _f, _msg, ...) \ + copy_field(_err, _f, "dev %u has wrong " _msg, dev, ##__VA_ARGS__) +#define copy_fs_field(_err, _f, _msg, ...) \ + copy_field(_err, _f, "fs has wrong " _msg, ##__VA_ARGS__) for (i = 0; i < ARRAY_SIZE(c->usage); i++) bch2_fs_usage_acc_to_base(c, i); @@ -1227,13 +1243,17 @@ static int bch2_gc_done(struct bch_fs *c, bch2_acc_percpu_u64s((u64 __percpu *) ca->usage_gc, dev_usage_u64s()); - copy_dev_field(buckets_ec, "buckets_ec"); - for (i = 0; i < BCH_DATA_NR; i++) { - copy_dev_field(d[i].buckets, "%s buckets", bch2_data_types[i]); - copy_dev_field(d[i].sectors, "%s sectors", bch2_data_types[i]); - copy_dev_field(d[i].fragmented, "%s fragmented", bch2_data_types[i]); + copy_dev_field(dev_usage_buckets_wrong, + d[i].buckets, "%s buckets", bch2_data_types[i]); + copy_dev_field(dev_usage_sectors_wrong, + d[i].sectors, "%s sectors", bch2_data_types[i]); + copy_dev_field(dev_usage_fragmented_wrong, + d[i].fragmented, "%s fragmented", bch2_data_types[i]); } + + copy_dev_field(dev_usage_buckets_ec_wrong, + buckets_ec, "buckets_ec"); } { @@ -1242,17 +1262,24 @@ static int bch2_gc_done(struct bch_fs *c, struct bch_fs_usage *src = (void *) bch2_acc_percpu_u64s((u64 __percpu *) c->usage_gc, nr); - copy_fs_field(hidden, "hidden"); - copy_fs_field(btree, "btree"); + copy_fs_field(fs_usage_hidden_wrong, + hidden, "hidden"); + copy_fs_field(fs_usage_btree_wrong, + btree, "btree"); if (!metadata_only) { - copy_fs_field(data, "data"); - copy_fs_field(cached, "cached"); - copy_fs_field(reserved, "reserved"); - copy_fs_field(nr_inodes,"nr_inodes"); + copy_fs_field(fs_usage_data_wrong, + data, "data"); + copy_fs_field(fs_usage_cached_wrong, + cached, "cached"); + copy_fs_field(fs_usage_reserved_wrong, + reserved, "reserved"); + copy_fs_field(fs_usage_nr_inodes_wrong, + nr_inodes,"nr_inodes"); for (i = 0; i < BCH_REPLICAS_MAX; i++) - copy_fs_field(persistent_reserved[i], + copy_fs_field(fs_usage_persistent_reserved_wrong, + persistent_reserved[i], "persistent_reserved[%i]", i); } @@ -1268,7 +1295,8 @@ static int bch2_gc_done(struct bch_fs *c, printbuf_reset(&buf); bch2_replicas_entry_to_text(&buf, e); - copy_fs_field(replicas[i], "%s", buf.buf); + copy_fs_field(fs_usage_replicas_wrong, + replicas[i], "%s", buf.buf); } } @@ -1404,6 +1432,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans, if (c->opts.reconstruct_alloc || fsck_err_on(new.data_type != gc.data_type, c, + alloc_key_data_type_wrong, "bucket %llu:%llu gen %u has wrong data_type" ": got %s, should be %s", iter->pos.inode, iter->pos.offset, @@ -1412,9 +1441,9 @@ static int bch2_alloc_write_key(struct btree_trans *trans, bch2_data_types[gc.data_type])) new.data_type = gc.data_type; -#define copy_bucket_field(_f) \ +#define copy_bucket_field(_errtype, _f) \ if (c->opts.reconstruct_alloc || \ - fsck_err_on(new._f != gc._f, c, \ + fsck_err_on(new._f != gc._f, c, _errtype, \ "bucket %llu:%llu gen %u data type %s has wrong " #_f \ ": got %u, should be %u", \ iter->pos.inode, iter->pos.offset, \ @@ -1423,11 +1452,16 @@ static int bch2_alloc_write_key(struct btree_trans *trans, new._f, gc._f)) \ new._f = gc._f; \ - copy_bucket_field(gen); - copy_bucket_field(dirty_sectors); - copy_bucket_field(cached_sectors); - copy_bucket_field(stripe_redundancy); - copy_bucket_field(stripe); + copy_bucket_field(alloc_key_gen_wrong, + gen); + copy_bucket_field(alloc_key_dirty_sectors_wrong, + dirty_sectors); + copy_bucket_field(alloc_key_cached_sectors_wrong, + cached_sectors); + copy_bucket_field(alloc_key_stripe_wrong, + stripe); + copy_bucket_field(alloc_key_stripe_redundancy_wrong, + stripe_redundancy); #undef copy_bucket_field if (!bch2_alloc_v4_cmp(*old, new)) @@ -1584,6 +1618,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans, } if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c, + reflink_v_refcount_wrong, "reflink key has wrong refcount:\n" " %s\n" " should be %u", @@ -1709,7 +1744,8 @@ static int bch2_gc_write_stripes_key(struct btree_trans *trans, if (bad) bch2_bkey_val_to_text(&buf, c, k); - if (fsck_err_on(bad, c, "%s", buf.buf)) { + if (fsck_err_on(bad, c, stripe_sector_count_wrong, + "%s", buf.buf)) { struct bkey_i_stripe *new; new = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index f398c8095f07..4d2d6f93500d 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -530,7 +530,7 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c, prt_str(out, ": "); } -__printf(8, 9) +__printf(9, 10) static int __btree_err(int ret, struct bch_fs *c, struct bch_dev *ca, @@ -538,6 +538,7 @@ static int __btree_err(int ret, struct bset *i, int write, bool have_retry, + enum bch_sb_error_id err_type, const char *fmt, ...) { struct printbuf out = PRINTBUF; @@ -562,9 +563,15 @@ static int __btree_err(int ret, if (!have_retry && ret == -BCH_ERR_btree_node_read_err_must_retry) ret = -BCH_ERR_btree_node_read_err_bad_node; + if (ret != -BCH_ERR_btree_node_read_err_fixable) + bch2_sb_error_count(c, err_type); + switch (ret) { case -BCH_ERR_btree_node_read_err_fixable: - mustfix_fsck_err(c, "%s", out.buf); + ret = bch2_fsck_err(c, FSCK_CAN_FIX, err_type, "%s", out.buf); + if (ret != -BCH_ERR_fsck_fix && + ret != -BCH_ERR_fsck_ignore) + goto fsck_err; ret = -BCH_ERR_fsck_fix; break; case -BCH_ERR_btree_node_read_err_want_retry: @@ -589,9 +596,11 @@ fsck_err: return ret; } -#define btree_err(type, c, ca, b, i, msg, ...) \ +#define btree_err(type, c, ca, b, i, _err_type, msg, ...) \ ({ \ - int _ret = __btree_err(type, c, ca, b, i, write, have_retry, msg, ##__VA_ARGS__);\ + int _ret = __btree_err(type, c, ca, b, i, write, have_retry, \ + BCH_FSCK_ERR_##_err_type, \ + msg, ##__VA_ARGS__); \ \ if (_ret != -BCH_ERR_fsck_fix) { \ ret = _ret; \ @@ -666,13 +675,17 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, int ret = 0; btree_err_on(!bch2_version_compatible(version), - -BCH_ERR_btree_node_read_err_incompatible, c, ca, b, i, + -BCH_ERR_btree_node_read_err_incompatible, + c, ca, b, i, + btree_node_unsupported_version, "unsupported bset version %u.%u", BCH_VERSION_MAJOR(version), BCH_VERSION_MINOR(version)); if (btree_err_on(version < c->sb.version_min, - -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, + -BCH_ERR_btree_node_read_err_fixable, + c, NULL, b, i, + btree_node_bset_older_than_sb_min, "bset version %u older than superblock version_min %u", version, c->sb.version_min)) { mutex_lock(&c->sb_lock); @@ -683,7 +696,9 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, if (btree_err_on(BCH_VERSION_MAJOR(version) > BCH_VERSION_MAJOR(c->sb.version), - -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, + -BCH_ERR_btree_node_read_err_fixable, + c, NULL, b, i, + btree_node_bset_newer_than_sb, "bset version %u newer than superblock version %u", version, c->sb.version)) { mutex_lock(&c->sb_lock); @@ -693,11 +708,15 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, } btree_err_on(BSET_SEPARATE_WHITEOUTS(i), - -BCH_ERR_btree_node_read_err_incompatible, c, ca, b, i, + -BCH_ERR_btree_node_read_err_incompatible, + c, ca, b, i, + btree_node_unsupported_version, "BSET_SEPARATE_WHITEOUTS no longer supported"); if (btree_err_on(offset + sectors > btree_sectors(c), - -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i, + -BCH_ERR_btree_node_read_err_fixable, + c, ca, b, i, + bset_past_end_of_btree_node, "bset past end of btree node")) { i->u64s = 0; ret = 0; @@ -705,12 +724,15 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, } btree_err_on(offset && !i->u64s, - -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i, + -BCH_ERR_btree_node_read_err_fixable, + c, ca, b, i, + bset_empty, "empty bset"); - btree_err_on(BSET_OFFSET(i) && - BSET_OFFSET(i) != offset, - -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i, + btree_err_on(BSET_OFFSET(i) && BSET_OFFSET(i) != offset, + -BCH_ERR_btree_node_read_err_want_retry, + c, ca, b, i, + bset_wrong_sector_offset, "bset at wrong sector offset"); if (!offset) { @@ -724,16 +746,22 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, /* XXX endianness */ btree_err_on(bp->seq != bn->keys.seq, - -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, + -BCH_ERR_btree_node_read_err_must_retry, + c, ca, b, NULL, + bset_bad_seq, "incorrect sequence number (wrong btree node)"); } btree_err_on(BTREE_NODE_ID(bn) != b->c.btree_id, - -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, i, + -BCH_ERR_btree_node_read_err_must_retry, + c, ca, b, i, + btree_node_bad_btree, "incorrect btree id"); btree_err_on(BTREE_NODE_LEVEL(bn) != b->c.level, - -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, i, + -BCH_ERR_btree_node_read_err_must_retry, + c, ca, b, i, + btree_node_bad_level, "incorrect level"); if (!write) @@ -750,7 +778,9 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, } btree_err_on(!bpos_eq(b->data->min_key, bp->min_key), - -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, + -BCH_ERR_btree_node_read_err_must_retry, + c, ca, b, NULL, + btree_node_bad_min_key, "incorrect min_key: got %s should be %s", (printbuf_reset(&buf1), bch2_bpos_to_text(&buf1, bn->min_key), buf1.buf), @@ -759,7 +789,9 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, } btree_err_on(!bpos_eq(bn->max_key, b->key.k.p), - -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, i, + -BCH_ERR_btree_node_read_err_must_retry, + c, ca, b, i, + btree_node_bad_max_key, "incorrect max key %s", (printbuf_reset(&buf1), bch2_bpos_to_text(&buf1, bn->max_key), buf1.buf)); @@ -769,7 +801,9 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, BSET_BIG_ENDIAN(i), write, bn); btree_err_on(bch2_bkey_format_invalid(c, &bn->format, write, &buf1), - -BCH_ERR_btree_node_read_err_bad_node, c, ca, b, i, + -BCH_ERR_btree_node_read_err_bad_node, + c, ca, b, i, + btree_node_bad_format, "invalid bkey format: %s\n %s", buf1.buf, (printbuf_reset(&buf2), bch2_bkey_format_to_text(&buf2, &bn->format), buf2.buf)); @@ -792,7 +826,7 @@ static int bset_key_invalid(struct bch_fs *c, struct btree *b, struct printbuf *err) { return __bch2_bkey_invalid(c, k, btree_node_type(b), READ, err) ?: - (!updated_range ? bch2_bkey_in_btree_node(b, k, err) : 0) ?: + (!updated_range ? bch2_bkey_in_btree_node(c, b, k, err) : 0) ?: (rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0); } @@ -813,14 +847,18 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, struct bkey tmp; if (btree_err_on(bkey_p_next(k) > vstruct_last(i), - -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, + -BCH_ERR_btree_node_read_err_fixable, + c, NULL, b, i, + btree_node_bkey_past_bset_end, "key extends past end of bset")) { i->u64s = cpu_to_le16((u64 *) k - i->_data); break; } if (btree_err_on(k->format > KEY_FORMAT_CURRENT, - -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, + -BCH_ERR_btree_node_read_err_fixable, + c, NULL, b, i, + btree_node_bkey_bad_format, "invalid bkey format %u", k->format)) { i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); memmove_u64s_down(k, bkey_p_next(k), @@ -839,12 +877,14 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, printbuf_reset(&buf); if (bset_key_invalid(c, b, u.s_c, updated_range, write, &buf)) { printbuf_reset(&buf); - prt_printf(&buf, "invalid bkey: "); bset_key_invalid(c, b, u.s_c, updated_range, write, &buf); prt_printf(&buf, "\n "); bch2_bkey_val_to_text(&buf, c, u.s_c); - btree_err(-BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, "%s", buf.buf); + btree_err(-BCH_ERR_btree_node_read_err_fixable, + c, NULL, b, i, + btree_node_bad_bkey, + "invalid bkey: %s", buf.buf); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); memmove_u64s_down(k, bkey_p_next(k), @@ -868,7 +908,10 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, bch2_dump_bset(c, b, i, 0); - if (btree_err(-BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, "%s", buf.buf)) { + if (btree_err(-BCH_ERR_btree_node_read_err_fixable, + c, NULL, b, i, + btree_node_bkey_out_of_order, + "%s", buf.buf)) { i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k); @@ -909,26 +952,34 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, sort_iter_init(iter, b, (btree_blocks(c) + 1) * 2); if (bch2_meta_read_fault("btree")) - btree_err(-BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, + btree_err(-BCH_ERR_btree_node_read_err_must_retry, + c, ca, b, NULL, + btree_node_fault_injected, "dynamic fault"); btree_err_on(le64_to_cpu(b->data->magic) != bset_magic(c), - -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, + -BCH_ERR_btree_node_read_err_must_retry, + c, ca, b, NULL, + btree_node_bad_magic, "bad magic: want %llx, got %llx", bset_magic(c), le64_to_cpu(b->data->magic)); - btree_err_on(!b->data->keys.seq, - -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, - "bad btree header: seq 0"); - if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { struct bch_btree_ptr_v2 *bp = &bkey_i_to_btree_ptr_v2(&b->key)->v; btree_err_on(b->data->keys.seq != bp->seq, - -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, + -BCH_ERR_btree_node_read_err_must_retry, + c, ca, b, NULL, + btree_node_bad_seq, "got wrong btree node (seq %llx want %llx)", b->data->keys.seq, bp->seq); + } else { + btree_err_on(!b->data->keys.seq, + -BCH_ERR_btree_node_read_err_must_retry, + c, ca, b, NULL, + btree_node_bad_seq, + "bad btree header: seq 0"); } while (b->written < (ptr_written ?: btree_sectors(c))) { @@ -941,9 +992,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, i = &b->data->keys; btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)), - -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i, - "unknown checksum type %llu", - BSET_CSUM_TYPE(i)); + -BCH_ERR_btree_node_read_err_want_retry, + c, ca, b, i, + bset_unknown_csum, + "unknown checksum type %llu", BSET_CSUM_TYPE(i)); nonce = btree_nonce(i, b->written << 9); @@ -953,7 +1005,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); btree_err_on(csum_bad, - -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i, + -BCH_ERR_btree_node_read_err_want_retry, + c, ca, b, i, + bset_bad_csum, "invalid checksum"); ret = bset_encrypt(c, i, b->written << 9); @@ -963,7 +1017,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_err_on(btree_node_type_is_extents(btree_node_type(b)) && !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data), - -BCH_ERR_btree_node_read_err_incompatible, c, NULL, b, NULL, + -BCH_ERR_btree_node_read_err_incompatible, + c, NULL, b, NULL, + btree_node_unsupported_version, "btree node does not have NEW_EXTENT_OVERWRITE set"); sectors = vstruct_sectors(b->data, c->block_bits); @@ -975,9 +1031,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, break; btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)), - -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i, - "unknown checksum type %llu", - BSET_CSUM_TYPE(i)); + -BCH_ERR_btree_node_read_err_want_retry, + c, ca, b, i, + bset_unknown_csum, + "unknown checksum type %llu", BSET_CSUM_TYPE(i)); nonce = btree_nonce(i, b->written << 9); csum_bad = bch2_crc_cmp(bne->csum, @@ -986,7 +1043,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); btree_err_on(csum_bad, - -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i, + -BCH_ERR_btree_node_read_err_want_retry, + c, ca, b, i, + bset_bad_csum, "invalid checksum"); ret = bset_encrypt(c, i, b->written << 9); @@ -1019,12 +1078,16 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, true); btree_err_on(blacklisted && first, - -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i, + -BCH_ERR_btree_node_read_err_fixable, + c, ca, b, i, + bset_blacklisted_journal_seq, "first btree node bset has blacklisted journal seq (%llu)", le64_to_cpu(i->journal_seq)); btree_err_on(blacklisted && ptr_written, - -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i, + -BCH_ERR_btree_node_read_err_fixable, + c, ca, b, i, + first_bset_blacklisted_journal_seq, "found blacklisted bset (journal seq %llu) in btree node at offset %u-%u/%u", le64_to_cpu(i->journal_seq), b->written, b->written + sectors, ptr_written); @@ -1041,7 +1104,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, if (ptr_written) { btree_err_on(b->written < ptr_written, - -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, NULL, + -BCH_ERR_btree_node_read_err_want_retry, + c, ca, b, NULL, + btree_node_data_missing, "btree node data missing: expected %u sectors, found %u", ptr_written, b->written); } else { @@ -1052,7 +1117,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, !bch2_journal_seq_is_blacklisted(c, le64_to_cpu(bne->keys.journal_seq), true), - -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, NULL, + -BCH_ERR_btree_node_read_err_want_retry, + c, ca, b, NULL, + btree_node_bset_after_end, "found bset signature after last bset"); } @@ -1094,7 +1161,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, prt_printf(&buf, "\n "); bch2_bkey_val_to_text(&buf, c, u.s_c); - btree_err(-BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, "%s", buf.buf); + btree_err(-BCH_ERR_btree_node_read_err_fixable, + c, NULL, b, i, + btree_node_bad_bkey, + "%s", buf.buf); btree_keys_account_key_drop(&b->nr, 0, k); @@ -1320,14 +1390,20 @@ static void btree_node_read_all_replicas_done(struct closure *cl) } written2 = btree_node_sectors_written(c, ra->buf[i]); - if (btree_err_on(written2 != written, -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, NULL, + if (btree_err_on(written2 != written, -BCH_ERR_btree_node_read_err_fixable, + c, NULL, b, NULL, + btree_node_replicas_sectors_written_mismatch, "btree node sectors written mismatch: %u != %u", written, written2) || btree_err_on(btree_node_has_extra_bsets(c, written2, ra->buf[i]), - -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, NULL, + -BCH_ERR_btree_node_read_err_fixable, + c, NULL, b, NULL, + btree_node_bset_after_end, "found bset signature after last bset") || btree_err_on(memcmp(ra->buf[best], ra->buf[i], written << 9), - -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, NULL, + -BCH_ERR_btree_node_read_err_fixable, + c, NULL, b, NULL, + btree_node_replicas_data_mismatch, "btree node replicas content mismatch")) dump_bset_maps = true; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 7dbf6b6c7f34..818a83f35d27 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1274,14 +1274,14 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, if (bch2_bkey_invalid(c, bkey_i_to_s_c(insert), btree_node_type(b), WRITE, &buf) ?: - bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert), &buf)) { + bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf)) { printbuf_reset(&buf); prt_printf(&buf, "inserting invalid bkey\n "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); prt_printf(&buf, "\n "); bch2_bkey_invalid(c, bkey_i_to_s_c(insert), btree_node_type(b), WRITE, &buf); - bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert), &buf); + bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf); bch2_fs_inconsistent(c, "%s", buf.buf); dump_stack(); diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index a8af803e7289..2acd727d3f9b 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -370,8 +370,8 @@ static inline int update_replicas(struct bch_fs *c, struct bkey_s_c k, idx = bch2_replicas_entry_idx(c, r); if (idx < 0 && - fsck_err(c, "no replicas entry\n" - " while marking %s", + fsck_err(c, ptr_to_missing_replicas_entry, + "no replicas entry\n while marking %s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { percpu_up_read(&c->mark_lock); ret = bch2_mark_replicas(c, r); @@ -695,6 +695,7 @@ static int check_bucket_ref(struct btree_trans *trans, if (gen_after(ptr->gen, b_gen)) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + BCH_FSCK_ERR_ptr_gen_newer_than_bucket_gen, "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n" "while marking %s", ptr->dev, bucket_nr, b_gen, @@ -707,6 +708,7 @@ static int check_bucket_ref(struct btree_trans *trans, if (gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + BCH_FSCK_ERR_ptr_too_stale, "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" "while marking %s", ptr->dev, bucket_nr, b_gen, @@ -720,6 +722,7 @@ static int check_bucket_ref(struct btree_trans *trans, if (b_gen != ptr->gen && !ptr->cached) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + BCH_FSCK_ERR_stale_dirty_ptr, "bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n" "while marking %s", ptr->dev, bucket_nr, b_gen, @@ -741,6 +744,7 @@ static int check_bucket_ref(struct btree_trans *trans, ptr_data_type && bucket_data_type != ptr_data_type) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + BCH_FSCK_ERR_ptr_bucket_data_type_mismatch, "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" "while marking %s", ptr->dev, bucket_nr, b_gen, @@ -754,6 +758,7 @@ static int check_bucket_ref(struct btree_trans *trans, if ((u64) bucket_sectors + sectors > U32_MAX) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + BCH_FSCK_ERR_bucket_sector_count_overflow, "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n" "while marking %s", ptr->dev, bucket_nr, b_gen, @@ -1195,7 +1200,8 @@ static s64 __bch2_mark_reflink_p(struct btree_trans *trans, *idx = r->offset; return 0; not_found: - if (fsck_err(c, "pointer to missing indirect extent\n" + if (fsck_err(c, reflink_p_to_missing_reflink_v, + "pointer to missing indirect extent\n" " %s\n" " missing range %llu-%llu", (bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf), @@ -1857,6 +1863,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, if (a->v.data_type && type && a->v.data_type != type) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + BCH_FSCK_ERR_bucket_metadata_type_mismatch, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" "while marking %s", iter.pos.inode, iter.pos.offset, a->v.gen, diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 6c6c8d57d72b..1a0f2d571569 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -97,61 +97,51 @@ const struct bch_hash_desc bch2_dirent_hash_desc = { .is_visible = dirent_is_visible, }; -int bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); struct qstr d_name = bch2_dirent_get_name(d); + int ret = 0; - if (!d_name.len) { - prt_printf(err, "empty name"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(!d_name.len, c, err, + dirent_empty_name, + "empty name"); - if (bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len)) { - prt_printf(err, "value too big (%zu > %u)", - bkey_val_u64s(k.k), dirent_val_u64s(d_name.len)); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), c, err, + dirent_val_too_big, + "value too big (%zu > %u)", + bkey_val_u64s(k.k), dirent_val_u64s(d_name.len)); /* * Check new keys don't exceed the max length * (older keys may be larger.) */ - if ((flags & BKEY_INVALID_COMMIT) && d_name.len > BCH_NAME_MAX) { - prt_printf(err, "dirent name too big (%u > %u)", - d_name.len, BCH_NAME_MAX); - return -BCH_ERR_invalid_bkey; - } - - if (d_name.len != strnlen(d_name.name, d_name.len)) { - prt_printf(err, "dirent has stray data after name's NUL"); - return -BCH_ERR_invalid_bkey; - } - - if (d_name.len == 1 && !memcmp(d_name.name, ".", 1)) { - prt_printf(err, "invalid name"); - return -BCH_ERR_invalid_bkey; - } - - if (d_name.len == 2 && !memcmp(d_name.name, "..", 2)) { - prt_printf(err, "invalid name"); - return -BCH_ERR_invalid_bkey; - } - - if (memchr(d_name.name, '/', d_name.len)) { - prt_printf(err, "invalid name"); - return -BCH_ERR_invalid_bkey; - } - - if (d.v->d_type != DT_SUBVOL && - le64_to_cpu(d.v->d_inum) == d.k->p.inode) { - prt_printf(err, "dirent points to own directory"); - return -BCH_ERR_invalid_bkey; - } - - return 0; + bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) && d_name.len > BCH_NAME_MAX, c, err, + dirent_name_too_long, + "dirent name too big (%u > %u)", + d_name.len, BCH_NAME_MAX); + + bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), c, err, + dirent_name_embedded_nul, + "dirent has stray data after name's NUL"); + + bkey_fsck_err_on((d_name.len == 1 && !memcmp(d_name.name, ".", 1)) || + (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), c, err, + dirent_name_dot_or_dotdot, + "invalid name"); + + bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), c, err, + dirent_name_has_slash, + "name with /"); + + bkey_fsck_err_on(d.v->d_type != DT_SUBVOL && + le64_to_cpu(d.v->d_inum) == d.k->p.inode, c, err, + dirent_to_itself, + "dirent points to own directory"); +fsck_err: + return ret; } void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index e9fa1df38232..cd262bf4d9c5 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -7,7 +7,7 @@ enum bkey_invalid_flags; extern const struct bch_hash_desc bch2_dirent_hash_desc; -int bch2_dirent_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_dirent_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index c9795cd98192..5da0e7a69323 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -105,29 +105,26 @@ struct ec_bio { /* Stripes btree keys: */ -int bch2_stripe_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_stripe_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; + int ret = 0; - if (bkey_eq(k.k->p, POS_MIN)) { - prt_printf(err, "stripe at POS_MIN"); - return -BCH_ERR_invalid_bkey; - } - - if (k.k->p.inode) { - prt_printf(err, "nonzero inode field"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(bkey_eq(k.k->p, POS_MIN) || + bpos_gt(k.k->p, POS(0, U32_MAX)), c, err, + stripe_pos_bad, + "stripe at bad pos"); - if (bkey_val_u64s(k.k) < stripe_val_u64s(s)) { - prt_printf(err, "incorrect value size (%zu < %u)", - bkey_val_u64s(k.k), stripe_val_u64s(s)); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(bkey_val_u64s(k.k) < stripe_val_u64s(s), c, err, + stripe_val_size_bad, + "incorrect value size (%zu < %u)", + bkey_val_u64s(k.k), stripe_val_u64s(s)); - return bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_invalid(c, k, flags, err); +fsck_err: + return ret; } void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c, diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 966d165a3b66..61c67aa0aa49 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -8,7 +8,7 @@ enum bkey_invalid_flags; -int bch2_stripe_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_stripe_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_stripe_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index d759afc910fc..7b28d37922fd 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -141,7 +141,10 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt) return s; } -int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...) +int bch2_fsck_err(struct bch_fs *c, + enum bch_fsck_flags flags, + enum bch_sb_error_id err, + const char *fmt, ...) { struct fsck_err_state *s = NULL; va_list args; @@ -149,6 +152,8 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...) struct printbuf buf = PRINTBUF, *out = &buf; int ret = -BCH_ERR_fsck_ignore; + bch2_sb_error_count(c, err); + va_start(args, fmt); prt_vprintf(out, fmt, args); va_end(args); diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 958b2bed4f39..d167d65986e0 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -4,6 +4,7 @@ #include #include +#include "sb-errors.h" struct bch_dev; struct bch_fs; @@ -101,18 +102,26 @@ struct fsck_err_state { char *last_msg; }; -#define FSCK_CAN_FIX (1 << 0) -#define FSCK_CAN_IGNORE (1 << 1) -#define FSCK_NEED_FSCK (1 << 2) -#define FSCK_NO_RATELIMIT (1 << 3) +enum bch_fsck_flags { + FSCK_CAN_FIX = 1 << 0, + FSCK_CAN_IGNORE = 1 << 1, + FSCK_NEED_FSCK = 1 << 2, + FSCK_NO_RATELIMIT = 1 << 3, +}; + +#define fsck_err_count(_c, _err) bch2_sb_err_count(_c, BCH_FSCK_ERR_##_err) -__printf(3, 4) __cold -int bch2_fsck_err(struct bch_fs *, unsigned, const char *, ...); +__printf(4, 5) __cold +int bch2_fsck_err(struct bch_fs *, + enum bch_fsck_flags, + enum bch_sb_error_id, + const char *, ...); void bch2_flush_fsck_errs(struct bch_fs *); -#define __fsck_err(c, _flags, msg, ...) \ +#define __fsck_err(c, _flags, _err_type, ...) \ ({ \ - int _ret = bch2_fsck_err(c, _flags, msg, ##__VA_ARGS__); \ + int _ret = bch2_fsck_err(c, _flags, BCH_FSCK_ERR_##_err_type, \ + __VA_ARGS__); \ \ if (_ret != -BCH_ERR_fsck_fix && \ _ret != -BCH_ERR_fsck_ignore) { \ @@ -127,26 +136,53 @@ void bch2_flush_fsck_errs(struct bch_fs *); /* XXX: mark in superblock that filesystem contains errors, if we ignore: */ -#define __fsck_err_on(cond, c, _flags, ...) \ - (unlikely(cond) ? __fsck_err(c, _flags, ##__VA_ARGS__) : false) +#define __fsck_err_on(cond, c, _flags, _err_type, ...) \ + (unlikely(cond) ? __fsck_err(c, _flags, _err_type, __VA_ARGS__) : false) + +#define need_fsck_err_on(cond, c, _err_type, ...) \ + __fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, _err_type, __VA_ARGS__) + +#define need_fsck_err(c, _err_type, ...) \ + __fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, _err_type, __VA_ARGS__) + +#define mustfix_fsck_err(c, _err_type, ...) \ + __fsck_err(c, FSCK_CAN_FIX, _err_type, __VA_ARGS__) + +#define mustfix_fsck_err_on(cond, c, _err_type, ...) \ + __fsck_err_on(cond, c, FSCK_CAN_FIX, _err_type, __VA_ARGS__) -#define need_fsck_err_on(cond, c, ...) \ - __fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__) +#define fsck_err(c, _err_type, ...) \ + __fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) -#define need_fsck_err(c, ...) \ - __fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__) +#define fsck_err_on(cond, c, _err_type, ...) \ + __fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) -#define mustfix_fsck_err(c, ...) \ - __fsck_err(c, FSCK_CAN_FIX, ##__VA_ARGS__) +static inline void bch2_bkey_fsck_err(struct bch_fs *c, + struct printbuf *err_msg, + enum bch_sb_error_id err_type, + const char *fmt, ...) +{ + va_list args; -#define mustfix_fsck_err_on(cond, c, ...) \ - __fsck_err_on(cond, c, FSCK_CAN_FIX, ##__VA_ARGS__) + va_start(args, fmt); + prt_vprintf(err_msg, fmt, args); + va_end(args); -#define fsck_err(c, ...) \ - __fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__) +} -#define fsck_err_on(cond, c, ...) \ - __fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__) +#define bkey_fsck_err(c, _err_msg, _err_type, ...) \ +do { \ + prt_printf(_err_msg, __VA_ARGS__); \ + bch2_sb_error_count(c, BCH_FSCK_ERR_##_err_type); \ + ret = -BCH_ERR_invalid_bkey; \ + goto fsck_err; \ +} while (0) + +#define bkey_fsck_err_on(cond, ...) \ +do { \ + if (unlikely(cond)) \ + bkey_fsck_err(__VA_ARGS__); \ +} while (0) /* * Fatal errors: these don't indicate a bug, but we can't continue running in RW diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 0c60d49c3599..a864de231b69 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -163,17 +163,19 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, /* KEY_TYPE_btree_ptr: */ -int bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_btree_ptr_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { - if (bkey_val_u64s(k.k) > BCH_REPLICAS_MAX) { - prt_printf(err, "value too big (%zu > %u)", - bkey_val_u64s(k.k), BCH_REPLICAS_MAX); - return -BCH_ERR_invalid_bkey; - } + int ret = 0; - return bch2_bkey_ptrs_invalid(c, k, flags, err); + bkey_fsck_err_on(bkey_val_u64s(k.k) > BCH_REPLICAS_MAX, c, err, + btree_ptr_val_too_big, + "value too big (%zu > %u)", bkey_val_u64s(k.k), BCH_REPLICAS_MAX); + + ret = bch2_bkey_ptrs_invalid(c, k, flags, err); +fsck_err: + return ret; } void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, @@ -182,17 +184,20 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, bch2_bkey_ptrs_to_text(out, c, k); } -int bch2_btree_ptr_v2_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_btree_ptr_v2_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { - if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX) { - prt_printf(err, "value too big (%zu > %zu)", - bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX); - return -BCH_ERR_invalid_bkey; - } + int ret = 0; - return bch2_bkey_ptrs_invalid(c, k, flags, err); + bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX, c, err, + btree_ptr_v2_val_too_big, + "value too big (%zu > %zu)", + bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX); + + ret = bch2_bkey_ptrs_invalid(c, k, flags, err); +fsck_err: + return ret; } void bch2_btree_ptr_v2_to_text(struct printbuf *out, struct bch_fs *c, @@ -373,19 +378,18 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) /* KEY_TYPE_reservation: */ -int bch2_reservation_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_reservation_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); + int ret = 0; - if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX) { - prt_printf(err, "invalid nr_replicas (%u)", - r.v->nr_replicas); - return -BCH_ERR_invalid_bkey; - } - - return 0; + bkey_fsck_err_on(!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX, c, err, + reservation_key_nr_replicas_invalid, + "invalid nr_replicas (%u)", r.v->nr_replicas); +fsck_err: + return ret; } void bch2_reservation_to_text(struct printbuf *out, struct bch_fs *c, @@ -1058,7 +1062,7 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, } } -static int extent_ptr_invalid(const struct bch_fs *c, +static int extent_ptr_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, const struct bch_extent_ptr *ptr, @@ -1071,6 +1075,7 @@ static int extent_ptr_invalid(const struct bch_fs *c, u64 bucket; u32 bucket_offset; struct bch_dev *ca; + int ret = 0; if (!bch2_dev_exists2(c, ptr->dev)) { /* @@ -1081,41 +1086,33 @@ static int extent_ptr_invalid(const struct bch_fs *c, if (flags & BKEY_INVALID_WRITE) return 0; - prt_printf(err, "pointer to invalid device (%u)", ptr->dev); - return -BCH_ERR_invalid_bkey; + bkey_fsck_err(c, err, ptr_to_invalid_device, + "pointer to invalid device (%u)", ptr->dev); } ca = bch_dev_bkey_exists(c, ptr->dev); bkey_for_each_ptr(ptrs, ptr2) - if (ptr != ptr2 && ptr->dev == ptr2->dev) { - prt_printf(err, "multiple pointers to same device (%u)", ptr->dev); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, c, err, + ptr_to_duplicate_device, + "multiple pointers to same device (%u)", ptr->dev); bucket = sector_to_bucket_and_offset(ca, ptr->offset, &bucket_offset); - if (bucket >= ca->mi.nbuckets) { - prt_printf(err, "pointer past last bucket (%llu > %llu)", - bucket, ca->mi.nbuckets); - return -BCH_ERR_invalid_bkey; - } - - if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket)) { - prt_printf(err, "pointer before first bucket (%llu < %u)", - bucket, ca->mi.first_bucket); - return -BCH_ERR_invalid_bkey; - } - - if (bucket_offset + size_ondisk > ca->mi.bucket_size) { - prt_printf(err, "pointer spans multiple buckets (%u + %u > %u)", + bkey_fsck_err_on(bucket >= ca->mi.nbuckets, c, err, + ptr_after_last_bucket, + "pointer past last bucket (%llu > %llu)", bucket, ca->mi.nbuckets); + bkey_fsck_err_on(ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket), c, err, + ptr_before_first_bucket, + "pointer before first bucket (%llu < %u)", bucket, ca->mi.first_bucket); + bkey_fsck_err_on(bucket_offset + size_ondisk > ca->mi.bucket_size, c, err, + ptr_spans_multiple_buckets, + "pointer spans multiple buckets (%u + %u > %u)", bucket_offset, size_ondisk, ca->mi.bucket_size); - return -BCH_ERR_invalid_bkey; - } - - return 0; +fsck_err: + return ret; } -int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { @@ -1125,24 +1122,22 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k, unsigned size_ondisk = k.k->size; unsigned nonce = UINT_MAX; unsigned nr_ptrs = 0; - bool unwritten = false, have_ec = false, crc_since_last_ptr = false; - int ret; + bool have_written = false, have_unwritten = false, have_ec = false, crc_since_last_ptr = false; + int ret = 0; if (bkey_is_btree_ptr(k.k)) size_ondisk = btree_sectors(c); bkey_extent_entry_for_each(ptrs, entry) { - if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) { - prt_printf(err, "invalid extent entry type (got %u, max %u)", - __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX, c, err, + extent_ptrs_invalid_entry, + "invalid extent entry type (got %u, max %u)", + __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX); - if (bkey_is_btree_ptr(k.k) && - !extent_entry_is_ptr(entry)) { - prt_printf(err, "has non ptr field"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(bkey_is_btree_ptr(k.k) && + !extent_entry_is_ptr(entry), c, err, + btree_ptr_has_non_ptr, + "has non ptr field"); switch (extent_entry_type(entry)) { case BCH_EXTENT_ENTRY_ptr: @@ -1151,22 +1146,15 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k, if (ret) return ret; - if (nr_ptrs && unwritten != entry->ptr.unwritten) { - prt_printf(err, "extent with unwritten and written ptrs"); - return -BCH_ERR_invalid_bkey; - } - - if (k.k->type != KEY_TYPE_extent && entry->ptr.unwritten) { - prt_printf(err, "has unwritten ptrs"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(entry->ptr.cached && have_ec, c, err, + ptr_cached_and_erasure_coded, + "cached, erasure coded ptr"); - if (entry->ptr.cached && have_ec) { - prt_printf(err, "cached, erasure coded ptr"); - return -BCH_ERR_invalid_bkey; - } + if (!entry->ptr.unwritten) + have_written = true; + else + have_unwritten = true; - unwritten = entry->ptr.unwritten; have_ec = false; crc_since_last_ptr = false; nr_ptrs++; @@ -1176,52 +1164,41 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k, case BCH_EXTENT_ENTRY_crc128: crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); - if (crc.offset + crc.live_size > - crc.uncompressed_size) { - prt_printf(err, "checksum offset + key size > uncompressed size"); - return -BCH_ERR_invalid_bkey; - } - - size_ondisk = crc.compressed_size; - - if (!bch2_checksum_type_valid(c, crc.csum_type)) { - prt_printf(err, "invalid checksum type"); - return -BCH_ERR_invalid_bkey; - } - - if (crc.compression_type >= BCH_COMPRESSION_TYPE_NR) { - prt_printf(err, "invalid compression type"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, c, err, + ptr_crc_uncompressed_size_too_small, + "checksum offset + key size > uncompressed size"); + bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), c, err, + ptr_crc_csum_type_unknown, + "invalid checksum type"); + bkey_fsck_err_on(crc.compression_type >= BCH_COMPRESSION_TYPE_NR, c, err, + ptr_crc_compression_type_unknown, + "invalid compression type"); if (bch2_csum_type_is_encryption(crc.csum_type)) { if (nonce == UINT_MAX) nonce = crc.offset + crc.nonce; - else if (nonce != crc.offset + crc.nonce) { - prt_printf(err, "incorrect nonce"); - return -BCH_ERR_invalid_bkey; - } + else if (nonce != crc.offset + crc.nonce) + bkey_fsck_err(c, err, ptr_crc_nonce_mismatch, + "incorrect nonce"); } - if (crc_since_last_ptr) { - prt_printf(err, "redundant crc entry"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(crc_since_last_ptr, c, err, + ptr_crc_redundant, + "redundant crc entry"); crc_since_last_ptr = true; - if (crc_is_encoded(crc) && - (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && - (flags & (BKEY_INVALID_WRITE|BKEY_INVALID_COMMIT))) { - prt_printf(err, "too large encoded extent"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(crc_is_encoded(crc) && + (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && + (flags & (BKEY_INVALID_WRITE|BKEY_INVALID_COMMIT)), c, err, + ptr_crc_uncompressed_size_too_big, + "too large encoded extent"); + size_ondisk = crc.compressed_size; break; case BCH_EXTENT_ENTRY_stripe_ptr: - if (have_ec) { - prt_printf(err, "redundant stripe entry"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(have_ec, c, err, + ptr_stripe_redundant, + "redundant stripe entry"); have_ec = true; break; case BCH_EXTENT_ENTRY_rebalance: { @@ -1238,27 +1215,26 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k, } } - if (!nr_ptrs) { - prt_str(err, "no ptrs"); - return -BCH_ERR_invalid_bkey; - } - - if (nr_ptrs >= BCH_BKEY_PTRS_MAX) { - prt_str(err, "too many ptrs"); - return -BCH_ERR_invalid_bkey; - } - - if (crc_since_last_ptr) { - prt_printf(err, "redundant crc entry"); - return -BCH_ERR_invalid_bkey; - } - - if (have_ec) { - prt_printf(err, "redundant stripe entry"); - return -BCH_ERR_invalid_bkey; - } - - return 0; + bkey_fsck_err_on(!nr_ptrs, c, err, + extent_ptrs_no_ptrs, + "no ptrs"); + bkey_fsck_err_on(nr_ptrs > BCH_BKEY_PTRS_MAX, c, err, + extent_ptrs_too_many_ptrs, + "too many ptrs: %u > %u", nr_ptrs, BCH_BKEY_PTRS_MAX); + bkey_fsck_err_on(have_written && have_unwritten, c, err, + extent_ptrs_written_and_unwritten, + "extent with unwritten and written ptrs"); + bkey_fsck_err_on(k.k->type != KEY_TYPE_extent && have_unwritten, c, err, + extent_ptrs_unwritten, + "has unwritten ptrs"); + bkey_fsck_err_on(crc_since_last_ptr, c, err, + extent_ptrs_redundant_crc, + "redundant crc entry"); + bkey_fsck_err_on(have_ec, c, err, + extent_ptrs_redundant_stripe, + "redundant stripe entry"); +fsck_err: + return ret; } void bch2_ptr_swab(struct bkey_s k) diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 9110acae7e3c..a2ce8a3be13c 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -400,12 +400,12 @@ int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, /* KEY_TYPE_btree_ptr: */ -int bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_btree_ptr_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_btree_ptr_v2_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_btree_ptr_v2_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_btree_ptr_v2_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, @@ -445,7 +445,7 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); /* KEY_TYPE_reservation: */ -int bch2_reservation_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_reservation_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); @@ -705,7 +705,7 @@ void bch2_extent_ptr_set_cached(struct bkey_s, struct bch_extent_ptr *); bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_bkey_ptrs_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_bkey_ptrs_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_ptr_swab(struct bkey_s); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 328cb3b3e213..0e470ebd7f10 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -721,8 +721,9 @@ static int check_key_has_snapshot(struct btree_trans *trans, int ret = 0; if (mustfix_fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), c, - "key in missing snapshot: %s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) + bkey_in_missing_snapshot, + "key in missing snapshot: %s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: 1; fsck_err: @@ -791,6 +792,7 @@ static int hash_check_key(struct btree_trans *trans, if (fsck_err_on(k.k->type == desc.key_type && !desc.cmp_bkey(k, hash_k), c, + hash_table_key_duplicate, "duplicate hash table keys:\n%s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, hash_k), @@ -809,7 +811,8 @@ out: printbuf_exit(&buf); return ret; bad_hash: - if (fsck_err(c, "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s", + if (fsck_err(c, hash_table_key_wrong_offset, + "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s", bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { @@ -860,7 +863,8 @@ static int check_inode(struct btree_trans *trans, *prev = u; if (fsck_err_on(prev->bi_hash_seed != u.bi_hash_seed || - inode_d_type(prev) != inode_d_type(&u), c, + inode_d_type(prev) != inode_d_type(&u), + c, inode_snapshot_mismatch, "inodes in different snapshots don't match")) { bch_err(c, "repair not implemented yet"); return -EINVAL; @@ -888,7 +892,8 @@ static int check_inode(struct btree_trans *trans, if (u.bi_flags & BCH_INODE_UNLINKED && (!c->sb.clean || - fsck_err(c, "filesystem marked clean, but inode %llu unlinked", + fsck_err(c, inode_unlinked_but_clean, + "filesystem marked clean, but inode %llu unlinked", u.bi_inum))) { bch2_trans_unlock(trans); bch2_fs_lazy_rw(c); @@ -900,7 +905,8 @@ static int check_inode(struct btree_trans *trans, if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY && (!c->sb.clean || - fsck_err(c, "filesystem marked clean, but inode %llu has i_size dirty", + fsck_err(c, inode_i_size_dirty_but_clean, + "filesystem marked clean, but inode %llu has i_size dirty", u.bi_inum))) { bch_verbose(c, "truncating inode %llu", u.bi_inum); @@ -932,7 +938,8 @@ static int check_inode(struct btree_trans *trans, if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY && (!c->sb.clean || - fsck_err(c, "filesystem marked clean, but inode %llu has i_sectors dirty", + fsck_err(c, inode_i_sectors_dirty_but_clean, + "filesystem marked clean, but inode %llu has i_sectors dirty", u.bi_inum))) { s64 sectors; @@ -1058,10 +1065,11 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) return -BCH_ERR_internal_fsck_err; } - if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY), c, - "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", - w->last_pos.inode, i->snapshot, - i->inode.bi_sectors, i->count)) { + if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY), + c, inode_i_sectors_wrong, + "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", + w->last_pos.inode, i->snapshot, + i->inode.bi_sectors, i->count)) { i->inode.bi_sectors = i->count; ret = fsck_write_inode(trans, &i->inode, i->snapshot); if (ret) @@ -1202,7 +1210,8 @@ static int overlapping_extents_found(struct btree_trans *trans, prt_printf(&buf, "\n overwriting %s extent", pos1.snapshot >= pos2.p.snapshot ? "first" : "second"); - if (fsck_err(c, "overlapping extents%s", buf.buf)) { + if (fsck_err(c, extent_overlapping, + "overlapping extents%s", buf.buf)) { struct btree_iter *old_iter = &iter1; struct disk_reservation res = { 0 }; @@ -1357,7 +1366,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, goto err; if (k.k->type != KEY_TYPE_whiteout) { - if (fsck_err_on(!i, c, + if (fsck_err_on(!i, c, extent_in_missing_inode, "extent in missing inode:\n %s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf))) @@ -1365,7 +1374,8 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, if (fsck_err_on(i && !S_ISREG(i->inode.bi_mode) && - !S_ISLNK(i->inode.bi_mode), c, + !S_ISLNK(i->inode.bi_mode), + c, extent_in_non_reg_inode, "extent in non regular inode mode %o:\n %s", i->inode.bi_mode, (printbuf_reset(&buf), @@ -1397,7 +1407,8 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, if (k.k->type != KEY_TYPE_whiteout) { if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) && k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && - !bkey_extent_is_reservation(k), c, + !bkey_extent_is_reservation(k), + c, extent_past_end_of_inode, "extent type past end of inode %llu:%u, i_size %llu\n %s", i->inode.bi_inum, i->snapshot, i->inode.bi_size, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { @@ -1519,7 +1530,8 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) continue; } - if (fsck_err_on(i->inode.bi_nlink != i->count, c, + if (fsck_err_on(i->inode.bi_nlink != i->count, + c, inode_dir_wrong_nlink, "directory %llu:%u with wrong i_nlink: got %u, should be %llu", w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) { i->inode.bi_nlink = i->count; @@ -1563,16 +1575,16 @@ static int check_dirent_target(struct btree_trans *trans, backpointer_exists = ret; ret = 0; - if (fsck_err_on(S_ISDIR(target->bi_mode) && - backpointer_exists, c, + if (fsck_err_on(S_ISDIR(target->bi_mode) && backpointer_exists, + c, inode_dir_multiple_links, "directory %llu with multiple links", target->bi_inum)) { ret = __remove_dirent(trans, d.k->p); goto out; } - if (fsck_err_on(backpointer_exists && - !target->bi_nlink, c, + if (fsck_err_on(backpointer_exists && !target->bi_nlink, + c, inode_multiple_links_but_nlink_0, "inode %llu type %s has multiple links but i_nlink 0", target->bi_inum, bch2_d_types[d.v->d_type])) { target->bi_nlink++; @@ -1583,7 +1595,8 @@ static int check_dirent_target(struct btree_trans *trans, goto err; } - if (fsck_err_on(!backpointer_exists, c, + if (fsck_err_on(!backpointer_exists, + c, inode_wrong_backpointer, "inode %llu:%u has wrong backpointer:\n" "got %llu:%llu\n" "should be %llu:%llu", @@ -1601,7 +1614,8 @@ static int check_dirent_target(struct btree_trans *trans, } } - if (fsck_err_on(d.v->d_type != inode_d_type(target), c, + if (fsck_err_on(d.v->d_type != inode_d_type(target), + c, dirent_d_type_wrong, "incorrect d_type: got %s, should be %s:\n%s", bch2_d_type_str(d.v->d_type), bch2_d_type_str(inode_d_type(target)), @@ -1625,7 +1639,8 @@ static int check_dirent_target(struct btree_trans *trans, if (d.v->d_type == DT_SUBVOL && target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol) && (c->sb.version < bcachefs_metadata_version_subvol_dirent || - fsck_err(c, "dirent has wrong d_parent_subvol field: got %u, should be %u", + fsck_err(c, dirent_d_parent_subvol_wrong, + "dirent has wrong d_parent_subvol field: got %u, should be %u", le32_to_cpu(d.v->d_parent_subvol), target->bi_parent_subvol))) { n = bch2_trans_kmalloc(trans, bkey_bytes(d.k)); @@ -1697,7 +1712,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, *hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode); dir->first_this_inode = false; - if (fsck_err_on(!i, c, + if (fsck_err_on(!i, c, dirent_in_missing_dir_inode, "dirent in nonexisting directory:\n%s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { @@ -1709,7 +1724,8 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, if (!i) goto out; - if (fsck_err_on(!S_ISDIR(i->inode.bi_mode), c, + if (fsck_err_on(!S_ISDIR(i->inode.bi_mode), + c, dirent_in_non_dir_inode, "dirent in non directory inode type %s:\n%s", bch2_d_type_str(inode_d_type(&i->inode)), (printbuf_reset(&buf), @@ -1743,7 +1759,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, if (ret && !bch2_err_matches(ret, ENOENT)) goto err; - if (fsck_err_on(ret, c, + if (fsck_err_on(ret, c, dirent_to_missing_subvol, "dirent points to missing subvolume %u", le32_to_cpu(d.v->d_child_subvol))) { ret = __remove_dirent(trans, d.k->p); @@ -1755,7 +1771,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, if (ret && !bch2_err_matches(ret, ENOENT)) goto err; - if (fsck_err_on(ret, c, + if (fsck_err_on(ret, c, subvol_to_missing_root, "subvolume %u points to missing subvolume root %llu", target_subvol, target_inum)) { @@ -1764,7 +1780,8 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, goto err; } - if (fsck_err_on(subvol_root.bi_subvol != target_subvol, c, + if (fsck_err_on(subvol_root.bi_subvol != target_subvol, + c, subvol_root_wrong_bi_subvol, "subvol root %llu has wrong bi_subvol field: got %u, should be %u", target_inum, subvol_root.bi_subvol, target_subvol)) { @@ -1783,7 +1800,8 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, if (ret) goto err; - if (fsck_err_on(!target->inodes.nr, c, + if (fsck_err_on(!target->inodes.nr, + c, dirent_to_missing_inode, "dirent points to missing inode: (equiv %u)\n%s", equiv.snapshot, (printbuf_reset(&buf), @@ -1869,7 +1887,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, *hash_info = bch2_hash_info_init(c, &inode->inodes.data[0].inode); inode->first_this_inode = false; - if (fsck_err_on(!i, c, + if (fsck_err_on(!i, c, xattr_in_missing_inode, "xattr for missing inode %llu", k.k->p.inode)) return bch2_btree_delete_at(trans, iter, 0); @@ -1918,7 +1936,8 @@ static int check_root_trans(struct btree_trans *trans) if (ret && !bch2_err_matches(ret, ENOENT)) return ret; - if (mustfix_fsck_err_on(ret, c, "root subvol missing")) { + if (mustfix_fsck_err_on(ret, c, root_subvol_missing, + "root subvol missing")) { struct bkey_i_subvolume root_subvol; snapshot = U32_MAX; @@ -1944,8 +1963,10 @@ static int check_root_trans(struct btree_trans *trans) if (ret && !bch2_err_matches(ret, ENOENT)) return ret; - if (mustfix_fsck_err_on(ret, c, "root directory missing") || - mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode), c, + if (mustfix_fsck_err_on(ret, c, root_dir_missing, + "root directory missing") || + mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode), + c, root_inode_not_dir, "root inode not a directory")) { bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 0, NULL); @@ -2049,7 +2070,8 @@ static int check_path(struct btree_trans *trans, } if (bch2_err_matches(ret, ENOENT)) { - if (fsck_err(c, "unreachable inode %llu:%u, type %s nlink %u backptr %llu:%llu", + if (fsck_err(c, inode_unreachable, + "unreachable inode %llu:%u, type %s nlink %u backptr %llu:%llu", inode->bi_inum, snapshot, bch2_d_type_str(inode_d_type(inode)), inode->bi_nlink, @@ -2089,7 +2111,8 @@ static int check_path(struct btree_trans *trans, pr_err("%llu:%u", i->inum, i->snapshot); pr_err("%llu:%u", inode->bi_inum, snapshot); - if (!fsck_err(c, "directory structure loop")) + if (!fsck_err(c, dir_loop, + "directory structure loop")) return 0; ret = commit_do(trans, NULL, NULL, @@ -2349,7 +2372,8 @@ static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_ite link = &links->d[++*idx]; } - if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, c, + if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, + c, inode_wrong_nlink, "inode %llu type %s has wrong i_nlink (%u, should be %u)", u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)], bch2_inode_nlink_get(&u), link->count)) { diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 23fcd442c514..925d1b7f2887 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -398,104 +398,102 @@ struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k) return &inode_p->inode.k_i; } -static int __bch2_inode_invalid(struct bkey_s_c k, struct printbuf *err) +static int __bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, struct printbuf *err) { struct bch_inode_unpacked unpacked; + int ret = 0; - if (k.k->p.inode) { - prt_printf(err, "nonzero k.p.inode"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(k.k->p.inode, c, err, + inode_pos_inode_nonzero, + "nonzero k.p.inode"); - if (k.k->p.offset < BLOCKDEV_INODE_MAX) { - prt_printf(err, "fs inode in blockdev range"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(k.k->p.offset < BLOCKDEV_INODE_MAX, c, err, + inode_pos_blockdev_range, + "fs inode in blockdev range"); - if (bch2_inode_unpack(k, &unpacked)) { - prt_printf(err, "invalid variable length fields"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(bch2_inode_unpack(k, &unpacked), c, err, + inode_unpack_error, + "invalid variable length fields"); - if (unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1) { - prt_printf(err, "invalid data checksum type (%u >= %u", - unpacked.bi_data_checksum, BCH_CSUM_OPT_NR + 1); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1, c, err, + inode_checksum_type_invalid, + "invalid data checksum type (%u >= %u", + unpacked.bi_data_checksum, BCH_CSUM_OPT_NR + 1); - if (unpacked.bi_compression && - !bch2_compression_opt_valid(unpacked.bi_compression - 1)) { - prt_printf(err, "invalid compression opt %u", - unpacked.bi_compression - 1); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(unpacked.bi_compression && + !bch2_compression_opt_valid(unpacked.bi_compression - 1), c, err, + inode_compression_type_invalid, + "invalid compression opt %u", unpacked.bi_compression - 1); - if ((unpacked.bi_flags & BCH_INODE_UNLINKED) && - unpacked.bi_nlink != 0) { - prt_printf(err, "flagged as unlinked but bi_nlink != 0"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on((unpacked.bi_flags & BCH_INODE_UNLINKED) && + unpacked.bi_nlink != 0, c, err, + inode_unlinked_but_nlink_nonzero, + "flagged as unlinked but bi_nlink != 0"); - if (unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode)) { - prt_printf(err, "subvolume root but not a directory"); - return -BCH_ERR_invalid_bkey; - } - - return 0; + bkey_fsck_err_on(unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode), c, err, + inode_subvol_root_but_not_dir, + "subvolume root but not a directory"); +fsck_err: + return ret; } -int bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); + int ret = 0; - if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR) { - prt_printf(err, "invalid str hash type (%llu >= %u)", - INODE_STR_HASH(inode.v), BCH_STR_HASH_NR); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, + inode_str_hash_invalid, + "invalid str hash type (%llu >= %u)", + INODE_STR_HASH(inode.v), BCH_STR_HASH_NR); - return __bch2_inode_invalid(k, err); + ret = __bch2_inode_invalid(c, k, err); +fsck_err: + return ret; } -int bch2_inode_v2_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_inode_v2_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k); + int ret = 0; - if (INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR) { - prt_printf(err, "invalid str hash type (%llu >= %u)", - INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, + inode_str_hash_invalid, + "invalid str hash type (%llu >= %u)", + INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR); - return __bch2_inode_invalid(k, err); + ret = __bch2_inode_invalid(c, k, err); +fsck_err: + return ret; } -int bch2_inode_v3_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_inode_v3_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k); + int ret = 0; - if (INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL || - INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k)) { - prt_printf(err, "invalid fields_start (got %llu, min %u max %zu)", - INODEv3_FIELDS_START(inode.v), - INODEv3_FIELDS_START_INITIAL, - bkey_val_u64s(inode.k)); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL || + INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k), c, err, + inode_v3_fields_start_bad, + "invalid fields_start (got %llu, min %u max %zu)", + INODEv3_FIELDS_START(inode.v), + INODEv3_FIELDS_START_INITIAL, + bkey_val_u64s(inode.k)); - if (INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR) { - prt_printf(err, "invalid str hash type (%llu >= %u)", - INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, + inode_str_hash_invalid, + "invalid str hash type (%llu >= %u)", + INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR); - return __bch2_inode_invalid(k, err); + ret = __bch2_inode_invalid(c, k, err); +fsck_err: + return ret; } static void __bch2_inode_unpacked_to_text(struct printbuf *out, @@ -612,16 +610,17 @@ int bch2_mark_inode(struct btree_trans *trans, return 0; } -int bch2_inode_generation_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_inode_generation_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { - if (k.k->p.inode) { - prt_printf(err, "nonzero k.p.inode"); - return -BCH_ERR_invalid_bkey; - } + int ret = 0; - return 0; + bkey_fsck_err_on(k.k->p.inode, c, err, + inode_pos_inode_nonzero, + "nonzero k.p.inode"); +fsck_err: + return ret; } void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c, @@ -1068,6 +1067,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos) return 0; if (!fsck_err_on(c->sb.clean, c, + deleted_inode_but_clean, "filesystem marked as clean but have deleted inode %llu:%u", pos.offset, pos.snapshot)) return 0; @@ -1079,6 +1079,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos) ret = bkey_is_inode(k.k) ? 0 : -BCH_ERR_ENOENT_inode; if (fsck_err_on(!bkey_is_inode(k.k), c, + deleted_inode_missing, "nonexistent inode %llu:%u in deleted_inodes btree", pos.offset, pos.snapshot)) goto delete; @@ -1088,11 +1089,13 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos) goto err; if (fsck_err_on(S_ISDIR(inode.bi_mode), c, + deleted_inode_is_dir, "directory %llu:%u in deleted_inodes btree", pos.offset, pos.snapshot)) goto delete; if (fsck_err_on(!(inode.bi_flags & BCH_INODE_UNLINKED), c, + deleted_inode_not_unlinked, "non-deleted inode %llu:%u in deleted_inodes btree", pos.offset, pos.snapshot)) goto delete; diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index 2781e3281583..74c62e6c16cc 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -8,11 +8,11 @@ enum bkey_invalid_flags; extern const char * const bch2_inode_opts[]; -int bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_inode_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); -int bch2_inode_v2_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_inode_v2_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); -int bch2_inode_v3_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_inode_v3_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); @@ -52,7 +52,7 @@ static inline bool bkey_is_inode(const struct bkey *k) k->type == KEY_TYPE_inode_v3; } -int bch2_inode_generation_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_inode_generation_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 9807e909cff4..65878542940d 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -140,7 +140,8 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, if (!dup->csum_good) goto replace; - fsck_err(c, "found duplicate but non identical journal entries (seq %llu)", + fsck_err(c, journal_entry_replicas_data_mismatch, + "found duplicate but non identical journal entries (seq %llu)", le64_to_cpu(j->seq)); i = dup; goto found; @@ -235,7 +236,7 @@ static void journal_entry_err_msg(struct printbuf *out, prt_str(out, ": "); } -#define journal_entry_err(c, version, jset, entry, msg, ...) \ +#define journal_entry_err(c, version, jset, entry, _err, msg, ...) \ ({ \ struct printbuf _buf = PRINTBUF; \ \ @@ -244,9 +245,10 @@ static void journal_entry_err_msg(struct printbuf *out, \ switch (flags & BKEY_INVALID_WRITE) { \ case READ: \ - mustfix_fsck_err(c, "%s", _buf.buf); \ + mustfix_fsck_err(c, _err, "%s", _buf.buf); \ break; \ case WRITE: \ + bch2_sb_error_count(c, BCH_FSCK_ERR_##_err); \ bch_err(c, "corrupt metadata before write: %s\n", _buf.buf);\ if (bch2_fs_inconsistent(c)) { \ ret = -BCH_ERR_fsck_errors_not_fixed; \ @@ -259,8 +261,8 @@ static void journal_entry_err_msg(struct printbuf *out, true; \ }) -#define journal_entry_err_on(cond, c, version, jset, entry, msg, ...) \ - ((cond) ? journal_entry_err(c, version, jset, entry, msg, ##__VA_ARGS__) : false) +#define journal_entry_err_on(cond, ...) \ + ((cond) ? journal_entry_err(__VA_ARGS__) : false) #define FSCK_DELETED_KEY 5 @@ -277,7 +279,10 @@ static int journal_validate_key(struct bch_fs *c, struct printbuf buf = PRINTBUF; int ret = 0; - if (journal_entry_err_on(!k->k.u64s, c, version, jset, entry, "k->u64s 0")) { + if (journal_entry_err_on(!k->k.u64s, + c, version, jset, entry, + journal_entry_bkey_u64s_0, + "k->u64s 0")) { entry->u64s = cpu_to_le16((u64 *) k - entry->_data); journal_entry_null_range(vstruct_next(entry), next); return FSCK_DELETED_KEY; @@ -286,6 +291,7 @@ static int journal_validate_key(struct bch_fs *c, if (journal_entry_err_on((void *) bkey_next(k) > (void *) vstruct_next(entry), c, version, jset, entry, + journal_entry_bkey_past_end, "extends past end of journal entry")) { entry->u64s = cpu_to_le16((u64 *) k - entry->_data); journal_entry_null_range(vstruct_next(entry), next); @@ -294,6 +300,7 @@ static int journal_validate_key(struct bch_fs *c, if (journal_entry_err_on(k->k.format != KEY_FORMAT_CURRENT, c, version, jset, entry, + journal_entry_bkey_bad_format, "bad format %u", k->k.format)) { le16_add_cpu(&entry->u64s, -((u16) k->k.u64s)); memmove(k, bkey_next(k), next - (void *) bkey_next(k)); @@ -317,7 +324,8 @@ static int journal_validate_key(struct bch_fs *c, bch2_bkey_invalid(c, bkey_i_to_s_c(k), __btree_node_type(level, btree_id), write, &buf); - mustfix_fsck_err(c, "%s", buf.buf); + mustfix_fsck_err(c, journal_entry_bkey_invalid, + "%s", buf.buf); le16_add_cpu(&entry->u64s, -((u16) k->k.u64s)); memmove(k, bkey_next(k), next - (void *) bkey_next(k)); @@ -387,6 +395,7 @@ static int journal_entry_btree_root_validate(struct bch_fs *c, if (journal_entry_err_on(!entry->u64s || le16_to_cpu(entry->u64s) != k->k.u64s, c, version, jset, entry, + journal_entry_btree_root_bad_size, "invalid btree root journal entry: wrong number of keys")) { void *next = vstruct_next(entry); /* @@ -436,6 +445,7 @@ static int journal_entry_blacklist_validate(struct bch_fs *c, if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 1, c, version, jset, entry, + journal_entry_blacklist_bad_size, "invalid journal seq blacklist entry: bad size")) { journal_entry_null_range(entry, vstruct_next(entry)); } @@ -463,6 +473,7 @@ static int journal_entry_blacklist_v2_validate(struct bch_fs *c, if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 2, c, version, jset, entry, + journal_entry_blacklist_v2_bad_size, "invalid journal seq blacklist entry: bad size")) { journal_entry_null_range(entry, vstruct_next(entry)); goto out; @@ -473,6 +484,7 @@ static int journal_entry_blacklist_v2_validate(struct bch_fs *c, if (journal_entry_err_on(le64_to_cpu(bl_entry->start) > le64_to_cpu(bl_entry->end), c, version, jset, entry, + journal_entry_blacklist_v2_start_past_end, "invalid journal seq blacklist entry: start > end")) { journal_entry_null_range(entry, vstruct_next(entry)); } @@ -505,6 +517,7 @@ static int journal_entry_usage_validate(struct bch_fs *c, if (journal_entry_err_on(bytes < sizeof(*u), c, version, jset, entry, + journal_entry_usage_bad_size, "invalid journal entry usage: bad size")) { journal_entry_null_range(entry, vstruct_next(entry)); return ret; @@ -539,6 +552,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c, if (journal_entry_err_on(bytes < sizeof(*u) || bytes < sizeof(*u) + u->r.nr_devs, c, version, jset, entry, + journal_entry_data_usage_bad_size, "invalid journal entry usage: bad size")) { journal_entry_null_range(entry, vstruct_next(entry)); return ret; @@ -570,13 +584,17 @@ static int journal_entry_clock_validate(struct bch_fs *c, int ret = 0; if (journal_entry_err_on(bytes != sizeof(*clock), - c, version, jset, entry, "bad size")) { + c, version, jset, entry, + journal_entry_clock_bad_size, + "bad size")) { journal_entry_null_range(entry, vstruct_next(entry)); return ret; } if (journal_entry_err_on(clock->rw > 1, - c, version, jset, entry, "bad rw")) { + c, version, jset, entry, + journal_entry_clock_bad_rw, + "bad rw")) { journal_entry_null_range(entry, vstruct_next(entry)); return ret; } @@ -608,7 +626,9 @@ static int journal_entry_dev_usage_validate(struct bch_fs *c, int ret = 0; if (journal_entry_err_on(bytes < expected, - c, version, jset, entry, "bad size (%u < %u)", + c, version, jset, entry, + journal_entry_dev_usage_bad_size, + "bad size (%u < %u)", bytes, expected)) { journal_entry_null_range(entry, vstruct_next(entry)); return ret; @@ -617,13 +637,17 @@ static int journal_entry_dev_usage_validate(struct bch_fs *c, dev = le32_to_cpu(u->dev); if (journal_entry_err_on(!bch2_dev_exists2(c, dev), - c, version, jset, entry, "bad dev")) { + c, version, jset, entry, + journal_entry_dev_usage_bad_dev, + "bad dev")) { journal_entry_null_range(entry, vstruct_next(entry)); return ret; } if (journal_entry_err_on(u->pad, - c, version, jset, entry, "bad pad")) { + c, version, jset, entry, + journal_entry_dev_usage_bad_pad, + "bad pad")) { journal_entry_null_range(entry, vstruct_next(entry)); return ret; } @@ -738,7 +762,8 @@ static int jset_validate_entries(struct bch_fs *c, struct jset *jset, vstruct_for_each(jset, entry) { if (journal_entry_err_on(vstruct_next(entry) > vstruct_last(jset), - c, version, jset, entry, + c, version, jset, entry, + journal_entry_past_jset_end, "journal entry extends past end of jset")) { jset->u64s = cpu_to_le32((u64 *) entry - jset->_data); break; @@ -767,6 +792,7 @@ static int jset_validate(struct bch_fs *c, version = le32_to_cpu(jset->version); if (journal_entry_err_on(!bch2_version_compatible(version), c, version, jset, NULL, + jset_unsupported_version, "%s sector %llu seq %llu: incompatible journal entry version %u.%u", ca ? ca->name : c->name, sector, le64_to_cpu(jset->seq), @@ -777,7 +803,8 @@ static int jset_validate(struct bch_fs *c, } if (journal_entry_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(jset)), - c, version, jset, NULL, + c, version, jset, NULL, + jset_unknown_csum, "%s sector %llu seq %llu: journal entry with unknown csum type %llu", ca ? ca->name : c->name, sector, le64_to_cpu(jset->seq), @@ -788,6 +815,7 @@ static int jset_validate(struct bch_fs *c, if (journal_entry_err_on(!JSET_NO_FLUSH(jset) && le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq), c, version, jset, NULL, + jset_last_seq_newer_than_seq, "invalid journal entry: last_seq > seq (%llu > %llu)", le64_to_cpu(jset->last_seq), le64_to_cpu(jset->seq))) { @@ -816,7 +844,8 @@ static int jset_validate_early(struct bch_fs *c, version = le32_to_cpu(jset->version); if (journal_entry_err_on(!bch2_version_compatible(version), - c, version, jset, NULL, + c, version, jset, NULL, + jset_unsupported_version, "%s sector %llu seq %llu: unknown journal entry version %u.%u", ca ? ca->name : c->name, sector, le64_to_cpu(jset->seq), @@ -831,7 +860,8 @@ static int jset_validate_early(struct bch_fs *c, return JOURNAL_ENTRY_REREAD; if (journal_entry_err_on(bytes > bucket_sectors_left << 9, - c, version, jset, NULL, + c, version, jset, NULL, + jset_past_bucket_end, "%s sector %llu seq %llu: journal entry too big (%zu bytes)", ca ? ca->name : c->name, sector, le64_to_cpu(jset->seq), bytes)) @@ -1173,6 +1203,7 @@ int bch2_journal_read(struct bch_fs *c, if (journal_entry_err_on(le64_to_cpu(i->j.last_seq) > le64_to_cpu(i->j.seq), c, le32_to_cpu(i->j.version), &i->j, NULL, + jset_last_seq_newer_than_seq, "invalid journal entry: last_seq > seq (%llu > %llu)", le64_to_cpu(i->j.last_seq), le64_to_cpu(i->j.seq))) @@ -1189,7 +1220,8 @@ int bch2_journal_read(struct bch_fs *c, } if (!*last_seq) { - fsck_err(c, "journal read done, but no entries found after dropping non-flushes"); + fsck_err(c, dirty_but_no_journal_entries_post_drop_nonflushes, + "journal read done, but no entries found after dropping non-flushes"); return 0; } @@ -1215,6 +1247,7 @@ int bch2_journal_read(struct bch_fs *c, if (bch2_journal_seq_is_blacklisted(c, seq, true)) { fsck_err_on(!JSET_NO_FLUSH(&i->j), c, + jset_seq_blacklisted, "found blacklisted journal entry %llu", seq); i->ignore = true; } @@ -1255,7 +1288,8 @@ int bch2_journal_read(struct bch_fs *c, bch2_journal_ptrs_to_text(&buf2, c, i); missing_end = seq - 1; - fsck_err(c, "journal entries %llu-%llu missing! (replaying %llu-%llu)\n" + fsck_err(c, journal_entries_missing, + "journal entries %llu-%llu missing! (replaying %llu-%llu)\n" " prev at %s\n" " next at %s", missing_start, missing_end, @@ -1310,7 +1344,8 @@ int bch2_journal_read(struct bch_fs *c, if (!degraded && !bch2_replicas_marked(c, &replicas.e) && (le64_to_cpu(i->j.seq) == *last_seq || - fsck_err(c, "superblock not marked as containing replicas for journal entry %llu\n %s", + fsck_err(c, journal_entry_replicas_not_marked, + "superblock not marked as containing replicas for journal entry %llu\n %s", le64_to_cpu(i->j.seq), buf.buf))) { ret = bch2_mark_replicas(c, &replicas.e); if (ret) diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index 215a653322f3..a5cc0ed195d6 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -10,17 +10,17 @@ #include "recovery.h" /* KEY_TYPE_lru is obsolete: */ -int bch2_lru_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_lru_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { - if (!lru_pos_time(k.k->p)) { - prt_printf(err, "lru entry at time=0"); - return -BCH_ERR_invalid_bkey; - - } + int ret = 0; - return 0; + bkey_fsck_err_on(!lru_pos_time(k.k->p), c, err, + lru_entry_at_time_0, + "lru entry at time=0"); +fsck_err: + return ret; } void bch2_lru_to_text(struct printbuf *out, struct bch_fs *c, @@ -95,6 +95,7 @@ static int bch2_check_lru_key(struct btree_trans *trans, int ret; if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_pos), c, + lru_entry_to_invalid_bucket, "lru key points to nonexistent device:bucket %llu:%llu", alloc_pos.inode, alloc_pos.offset)) return bch2_btree_delete_at(trans, lru_iter, 0); @@ -125,7 +126,8 @@ static int bch2_check_lru_key(struct btree_trans *trans, } if (c->opts.reconstruct_alloc || - fsck_err(c, "incorrect lru entry: lru %s time %llu\n" + fsck_err(c, lru_entry_bad, + "incorrect lru entry: lru %s time %llu\n" " %s\n" " for %s", bch2_lru_types[type], diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h index be66bf9ad809..429dca816df5 100644 --- a/fs/bcachefs/lru.h +++ b/fs/bcachefs/lru.h @@ -48,7 +48,7 @@ static inline enum bch_lru_type lru_type(struct bkey_s_c l) return BCH_LRU_read; } -int bch2_lru_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_lru_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index cb68ae44d597..a54647c36b85 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -59,17 +59,18 @@ const struct bch_sb_field_ops bch_sb_field_ops_quota = { .to_text = bch2_sb_quota_to_text, }; -int bch2_quota_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_quota_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { - if (k.k->p.inode >= QTYP_NR) { - prt_printf(err, "invalid quota type (%llu >= %u)", - k.k->p.inode, QTYP_NR); - return -BCH_ERR_invalid_bkey; - } + int ret = 0; - return 0; + bkey_fsck_err_on(k.k->p.inode >= QTYP_NR, c, err, + quota_type_invalid, + "invalid quota type (%llu >= %u)", + k.k->p.inode, QTYP_NR); +fsck_err: + return ret; } void bch2_quota_to_text(struct printbuf *out, struct bch_fs *c, diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h index 2f463874a362..884f601f41c4 100644 --- a/fs/bcachefs/quota.h +++ b/fs/bcachefs/quota.h @@ -8,7 +8,7 @@ enum bkey_invalid_flags; extern const struct bch_sb_field_ops bch_sb_field_ops_quota; -int bch2_quota_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_quota_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 02025099c38f..f73338f37bf1 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -365,8 +365,10 @@ static int read_btree_roots(struct bch_fs *c) } if (r->error) { - __fsck_err(c, btree_id_is_alloc(i) + __fsck_err(c, + btree_id_is_alloc(i) ? FSCK_CAN_IGNORE : 0, + btree_root_bkey_invalid, "invalid btree root %s", bch2_btree_id_str(i)); if (i == BTREE_ID_alloc) @@ -376,6 +378,7 @@ static int read_btree_roots(struct bch_fs *c) ret = bch2_btree_root_read(c, i, &r->key, r->level); if (ret) { fsck_err(c, + btree_root_read_error, "error reading btree root %s", bch2_btree_id_str(i)); if (btree_id_is_alloc(i)) @@ -714,6 +717,7 @@ int bch2_fs_recovery(struct bch_fs *c) if (mustfix_fsck_err_on(c->sb.clean && last_journal_entry && !journal_entry_empty(last_journal_entry), c, + clean_but_journal_not_empty, "filesystem marked clean but journal not empty")) { c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); SET_BCH_SB_CLEAN(c->disk_sb.sb, false); @@ -721,7 +725,9 @@ int bch2_fs_recovery(struct bch_fs *c) } if (!last_journal_entry) { - fsck_err_on(!c->sb.clean, c, "no journal entries found"); + fsck_err_on(!c->sb.clean, c, + dirty_but_no_journal_entries, + "no journal entries found"); if (clean) goto use_clean; diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 507100b38e29..6e1bfe9feb59 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -28,7 +28,7 @@ static inline unsigned bkey_type_to_indirect(const struct bkey *k) /* reflink pointers */ -int bch2_reflink_p_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_reflink_p_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { @@ -75,7 +75,7 @@ bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r /* indirect extents */ -int bch2_reflink_v_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_reflink_v_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { @@ -126,7 +126,7 @@ int bch2_trans_mark_reflink_v(struct btree_trans *trans, /* indirect inline data */ -int bch2_indirect_inline_data_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_indirect_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h index fe52538efb52..8ccf3f9c4939 100644 --- a/fs/bcachefs/reflink.h +++ b/fs/bcachefs/reflink.h @@ -4,7 +4,7 @@ enum bkey_invalid_flags; -int bch2_reflink_p_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_reflink_p_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); @@ -19,7 +19,7 @@ bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); .min_val_size = 16, \ }) -int bch2_reflink_v_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_reflink_v_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); @@ -35,7 +35,7 @@ int bch2_trans_mark_reflink_v(struct btree_trans *, enum btree_id, unsigned, .min_val_size = 8, \ }) -int bch2_indirect_inline_data_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_indirect_inline_data_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_indirect_inline_data_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c index 61203d7c8d36..9b6cc86d264a 100644 --- a/fs/bcachefs/sb-clean.c +++ b/fs/bcachefs/sb-clean.c @@ -82,6 +82,7 @@ int bch2_verify_superblock_clean(struct bch_fs *c, int ret = 0; if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c, + sb_clean_journal_seq_mismatch, "superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown", le64_to_cpu(clean->journal_seq), le64_to_cpu(j->seq))) { @@ -119,6 +120,7 @@ int bch2_verify_superblock_clean(struct bch_fs *c, k1->k.u64s != k2->k.u64s || memcmp(k1, k2, bkey_bytes(&k1->k)) || l1 != l2, c, + sb_clean_btree_root_mismatch, "superblock btree root %u doesn't match journal after clean shutdown\n" "sb: l=%u %s\n" "journal: l=%u %s\n", i, @@ -140,6 +142,7 @@ struct bch_sb_field_clean *bch2_read_superblock_clean(struct bch_fs *c) sb_clean = bch2_sb_field_get(c->disk_sb.sb, clean); if (fsck_err_on(!sb_clean, c, + sb_clean_missing, "superblock marked clean but clean section not present")) { SET_BCH_SB_CLEAN(c->disk_sb.sb, false); c->sb.clean = false; diff --git a/fs/bcachefs/sb-errors.h b/fs/bcachefs/sb-errors.h index 7f8172821240..5a09a53966be 100644 --- a/fs/bcachefs/sb-errors.h +++ b/fs/bcachefs/sb-errors.h @@ -4,7 +4,251 @@ #include "sb-errors_types.h" -#define BCH_SB_ERRS() +#define BCH_SB_ERRS() \ + x(clean_but_journal_not_empty, 0) \ + x(dirty_but_no_journal_entries, 1) \ + x(dirty_but_no_journal_entries_post_drop_nonflushes, 2) \ + x(sb_clean_journal_seq_mismatch, 3) \ + x(sb_clean_btree_root_mismatch, 4) \ + x(sb_clean_missing, 5) \ + x(jset_unsupported_version, 6) \ + x(jset_unknown_csum, 7) \ + x(jset_last_seq_newer_than_seq, 8) \ + x(jset_past_bucket_end, 9) \ + x(jset_seq_blacklisted, 10) \ + x(journal_entries_missing, 11) \ + x(journal_entry_replicas_not_marked, 12) \ + x(journal_entry_past_jset_end, 13) \ + x(journal_entry_replicas_data_mismatch, 14) \ + x(journal_entry_bkey_u64s_0, 15) \ + x(journal_entry_bkey_past_end, 16) \ + x(journal_entry_bkey_bad_format, 17) \ + x(journal_entry_bkey_invalid, 18) \ + x(journal_entry_btree_root_bad_size, 19) \ + x(journal_entry_blacklist_bad_size, 20) \ + x(journal_entry_blacklist_v2_bad_size, 21) \ + x(journal_entry_blacklist_v2_start_past_end, 22) \ + x(journal_entry_usage_bad_size, 23) \ + x(journal_entry_data_usage_bad_size, 24) \ + x(journal_entry_clock_bad_size, 25) \ + x(journal_entry_clock_bad_rw, 26) \ + x(journal_entry_dev_usage_bad_size, 27) \ + x(journal_entry_dev_usage_bad_dev, 28) \ + x(journal_entry_dev_usage_bad_pad, 29) \ + x(btree_node_unreadable, 30) \ + x(btree_node_fault_injected, 31) \ + x(btree_node_bad_magic, 32) \ + x(btree_node_bad_seq, 33) \ + x(btree_node_unsupported_version, 34) \ + x(btree_node_bset_older_than_sb_min, 35) \ + x(btree_node_bset_newer_than_sb, 36) \ + x(btree_node_data_missing, 37) \ + x(btree_node_bset_after_end, 38) \ + x(btree_node_replicas_sectors_written_mismatch, 39) \ + x(btree_node_replicas_data_mismatch, 40) \ + x(bset_unknown_csum, 41) \ + x(bset_bad_csum, 42) \ + x(bset_past_end_of_btree_node, 43) \ + x(bset_wrong_sector_offset, 44) \ + x(bset_empty, 45) \ + x(bset_bad_seq, 46) \ + x(bset_blacklisted_journal_seq, 47) \ + x(first_bset_blacklisted_journal_seq, 48) \ + x(btree_node_bad_btree, 49) \ + x(btree_node_bad_level, 50) \ + x(btree_node_bad_min_key, 51) \ + x(btree_node_bad_max_key, 52) \ + x(btree_node_bad_format, 53) \ + x(btree_node_bkey_past_bset_end, 54) \ + x(btree_node_bkey_bad_format, 55) \ + x(btree_node_bad_bkey, 56) \ + x(btree_node_bkey_out_of_order, 57) \ + x(btree_root_bkey_invalid, 58) \ + x(btree_root_read_error, 59) \ + x(btree_root_bad_min_key, 50) \ + x(btree_root_bad_max_key, 61) \ + x(btree_node_read_error, 62) \ + x(btree_node_topology_bad_min_key, 63) \ + x(btree_node_topology_bad_max_key, 64) \ + x(btree_node_topology_overwritten_by_prev_node, 65) \ + x(btree_node_topology_overwritten_by_next_node, 66) \ + x(btree_node_topology_interior_node_empty, 67) \ + x(fs_usage_hidden_wrong, 68) \ + x(fs_usage_btree_wrong, 69) \ + x(fs_usage_data_wrong, 70) \ + x(fs_usage_cached_wrong, 71) \ + x(fs_usage_reserved_wrong, 72) \ + x(fs_usage_persistent_reserved_wrong, 73) \ + x(fs_usage_nr_inodes_wrong, 74) \ + x(fs_usage_replicas_wrong, 75) \ + x(dev_usage_buckets_wrong, 76) \ + x(dev_usage_sectors_wrong, 77) \ + x(dev_usage_fragmented_wrong, 78) \ + x(dev_usage_buckets_ec_wrong, 79) \ + x(bkey_version_in_future, 80) \ + x(bkey_u64s_too_small, 81) \ + x(bkey_invalid_type_for_btree, 82) \ + x(bkey_extent_size_zero, 83) \ + x(bkey_extent_size_greater_than_offset, 84) \ + x(bkey_size_nonzero, 85) \ + x(bkey_snapshot_nonzero, 86) \ + x(bkey_snapshot_zero, 87) \ + x(bkey_at_pos_max, 88) \ + x(bkey_before_start_of_btree_node, 89) \ + x(bkey_after_end_of_btree_node, 90) \ + x(bkey_val_size_nonzero, 91) \ + x(bkey_val_size_too_small, 92) \ + x(alloc_v1_val_size_bad, 93) \ + x(alloc_v2_unpack_error, 94) \ + x(alloc_v3_unpack_error, 95) \ + x(alloc_v4_val_size_bad, 96) \ + x(alloc_v4_backpointers_start_bad, 97) \ + x(alloc_key_data_type_bad, 98) \ + x(alloc_key_empty_but_have_data, 99) \ + x(alloc_key_dirty_sectors_0, 100) \ + x(alloc_key_data_type_inconsistency, 101) \ + x(alloc_key_to_missing_dev_bucket, 102) \ + x(alloc_key_cached_inconsistency, 103) \ + x(alloc_key_cached_but_read_time_zero, 104) \ + x(alloc_key_to_missing_lru_entry, 105) \ + x(alloc_key_data_type_wrong, 106) \ + x(alloc_key_gen_wrong, 107) \ + x(alloc_key_dirty_sectors_wrong, 108) \ + x(alloc_key_cached_sectors_wrong, 109) \ + x(alloc_key_stripe_wrong, 110) \ + x(alloc_key_stripe_redundancy_wrong, 111) \ + x(bucket_sector_count_overflow, 112) \ + x(bucket_metadata_type_mismatch, 113) \ + x(need_discard_key_wrong, 114) \ + x(freespace_key_wrong, 115) \ + x(freespace_hole_missing, 116) \ + x(bucket_gens_val_size_bad, 117) \ + x(bucket_gens_key_wrong, 118) \ + x(bucket_gens_hole_wrong, 119) \ + x(bucket_gens_to_invalid_dev, 120) \ + x(bucket_gens_to_invalid_buckets, 121) \ + x(bucket_gens_nonzero_for_invalid_buckets, 122) \ + x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \ + x(need_discard_freespace_key_bad, 124) \ + x(backpointer_pos_wrong, 125) \ + x(backpointer_to_missing_device, 126) \ + x(backpointer_to_missing_alloc, 127) \ + x(backpointer_to_missing_ptr, 128) \ + x(lru_entry_at_time_0, 129) \ + x(lru_entry_to_invalid_bucket, 130) \ + x(lru_entry_bad, 131) \ + x(btree_ptr_val_too_big, 132) \ + x(btree_ptr_v2_val_too_big, 133) \ + x(btree_ptr_has_non_ptr, 134) \ + x(extent_ptrs_invalid_entry, 135) \ + x(extent_ptrs_no_ptrs, 136) \ + x(extent_ptrs_too_many_ptrs, 137) \ + x(extent_ptrs_redundant_crc, 138) \ + x(extent_ptrs_redundant_stripe, 139) \ + x(extent_ptrs_unwritten, 140) \ + x(extent_ptrs_written_and_unwritten, 141) \ + x(ptr_to_invalid_device, 142) \ + x(ptr_to_duplicate_device, 143) \ + x(ptr_after_last_bucket, 144) \ + x(ptr_before_first_bucket, 145) \ + x(ptr_spans_multiple_buckets, 146) \ + x(ptr_to_missing_backpointer, 147) \ + x(ptr_to_missing_alloc_key, 148) \ + x(ptr_to_missing_replicas_entry, 149) \ + x(ptr_to_missing_stripe, 150) \ + x(ptr_to_incorrect_stripe, 151) \ + x(ptr_gen_newer_than_bucket_gen, 152) \ + x(ptr_too_stale, 153) \ + x(stale_dirty_ptr, 154) \ + x(ptr_bucket_data_type_mismatch, 155) \ + x(ptr_cached_and_erasure_coded, 156) \ + x(ptr_crc_uncompressed_size_too_small, 157) \ + x(ptr_crc_csum_type_unknown, 158) \ + x(ptr_crc_compression_type_unknown, 159) \ + x(ptr_crc_redundant, 160) \ + x(ptr_crc_uncompressed_size_too_big, 161) \ + x(ptr_crc_nonce_mismatch, 162) \ + x(ptr_stripe_redundant, 163) \ + x(reservation_key_nr_replicas_invalid, 164) \ + x(reflink_v_refcount_wrong, 165) \ + x(reflink_p_to_missing_reflink_v, 166) \ + x(stripe_pos_bad, 167) \ + x(stripe_val_size_bad, 168) \ + x(stripe_sector_count_wrong, 169) \ + x(snapshot_tree_pos_bad, 170) \ + x(snapshot_tree_to_missing_snapshot, 171) \ + x(snapshot_tree_to_missing_subvol, 172) \ + x(snapshot_tree_to_wrong_subvol, 173) \ + x(snapshot_tree_to_snapshot_subvol, 174) \ + x(snapshot_pos_bad, 175) \ + x(snapshot_parent_bad, 176) \ + x(snapshot_children_not_normalized, 177) \ + x(snapshot_child_duplicate, 178) \ + x(snapshot_child_bad, 179) \ + x(snapshot_skiplist_not_normalized, 180) \ + x(snapshot_skiplist_bad, 181) \ + x(snapshot_should_not_have_subvol, 182) \ + x(snapshot_to_bad_snapshot_tree, 183) \ + x(snapshot_bad_depth, 184) \ + x(snapshot_bad_skiplist, 185) \ + x(subvol_pos_bad, 186) \ + x(subvol_not_master_and_not_snapshot, 187) \ + x(subvol_to_missing_root, 188) \ + x(subvol_root_wrong_bi_subvol, 189) \ + x(bkey_in_missing_snapshot, 190) \ + x(inode_pos_inode_nonzero, 191) \ + x(inode_pos_blockdev_range, 192) \ + x(inode_unpack_error, 193) \ + x(inode_str_hash_invalid, 194) \ + x(inode_v3_fields_start_bad, 195) \ + x(inode_snapshot_mismatch, 196) \ + x(inode_unlinked_but_clean, 197) \ + x(inode_unlinked_but_nlink_nonzero, 198) \ + x(inode_checksum_type_invalid, 199) \ + x(inode_compression_type_invalid, 200) \ + x(inode_subvol_root_but_not_dir, 201) \ + x(inode_i_size_dirty_but_clean, 202) \ + x(inode_i_sectors_dirty_but_clean, 203) \ + x(inode_i_sectors_wrong, 204) \ + x(inode_dir_wrong_nlink, 205) \ + x(inode_dir_multiple_links, 206) \ + x(inode_multiple_links_but_nlink_0, 207) \ + x(inode_wrong_backpointer, 208) \ + x(inode_wrong_nlink, 209) \ + x(inode_unreachable, 210) \ + x(deleted_inode_but_clean, 211) \ + x(deleted_inode_missing, 212) \ + x(deleted_inode_is_dir, 213) \ + x(deleted_inode_not_unlinked, 214) \ + x(extent_overlapping, 215) \ + x(extent_in_missing_inode, 216) \ + x(extent_in_non_reg_inode, 217) \ + x(extent_past_end_of_inode, 218) \ + x(dirent_empty_name, 219) \ + x(dirent_val_too_big, 220) \ + x(dirent_name_too_long, 221) \ + x(dirent_name_embedded_nul, 222) \ + x(dirent_name_dot_or_dotdot, 223) \ + x(dirent_name_has_slash, 224) \ + x(dirent_d_type_wrong, 225) \ + x(dirent_d_parent_subvol_wrong, 226) \ + x(dirent_in_missing_dir_inode, 227) \ + x(dirent_in_non_dir_inode, 228) \ + x(dirent_to_missing_inode, 229) \ + x(dirent_to_missing_subvol, 230) \ + x(dirent_to_itself, 231) \ + x(quota_type_invalid, 232) \ + x(xattr_val_size_too_small, 233) \ + x(xattr_val_size_too_big, 234) \ + x(xattr_invalid_type, 235) \ + x(xattr_name_invalid_chars, 236) \ + x(xattr_in_missing_inode, 237) \ + x(root_subvol_missing, 238) \ + x(root_dir_missing, 239) \ + x(root_inode_not_dir, 240) \ + x(dir_loop, 241) \ + x(hash_table_key_duplicate, 242) \ + x(hash_table_key_wrong_offset, 243) enum bch_sb_error_id { #define x(t, n) BCH_FSCK_ERR_##t = n, diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index e70adfcbd953..a3fecc785001 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -30,17 +30,18 @@ void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c, le32_to_cpu(t.v->root_snapshot)); } -int bch2_snapshot_tree_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_snapshot_tree_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { - if (bkey_gt(k.k->p, POS(0, U32_MAX)) || - bkey_lt(k.k->p, POS(0, 1))) { - prt_printf(err, "bad pos"); - return -BCH_ERR_invalid_bkey; - } + int ret = 0; - return 0; + bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || + bkey_lt(k.k->p, POS(0, 1)), c, err, + snapshot_tree_pos_bad, + "bad pos"); +fsck_err: + return ret; } int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id, @@ -202,67 +203,60 @@ void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c, le32_to_cpu(s.v->skip[2])); } -int bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_snapshot_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { struct bkey_s_c_snapshot s; u32 i, id; + int ret = 0; - if (bkey_gt(k.k->p, POS(0, U32_MAX)) || - bkey_lt(k.k->p, POS(0, 1))) { - prt_printf(err, "bad pos"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || + bkey_lt(k.k->p, POS(0, 1)), c, err, + snapshot_pos_bad, + "bad pos"); s = bkey_s_c_to_snapshot(k); id = le32_to_cpu(s.v->parent); - if (id && id <= k.k->p.offset) { - prt_printf(err, "bad parent node (%u <= %llu)", - id, k.k->p.offset); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(id && id <= k.k->p.offset, c, err, + snapshot_parent_bad, + "bad parent node (%u <= %llu)", + id, k.k->p.offset); - if (le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1])) { - prt_printf(err, "children not normalized"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), c, err, + snapshot_children_not_normalized, + "children not normalized"); - if (s.v->children[0] && - s.v->children[0] == s.v->children[1]) { - prt_printf(err, "duplicate child nodes"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], c, err, + snapshot_child_duplicate, + "duplicate child nodes"); for (i = 0; i < 2; i++) { id = le32_to_cpu(s.v->children[i]); - if (id >= k.k->p.offset) { - prt_printf(err, "bad child node (%u >= %llu)", - id, k.k->p.offset); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(id >= k.k->p.offset, c, err, + snapshot_child_bad, + "bad child node (%u >= %llu)", + id, k.k->p.offset); } if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) { - if (le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) || - le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2])) { - prt_printf(err, "skiplist not normalized"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) || + le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), c, err, + snapshot_skiplist_not_normalized, + "skiplist not normalized"); for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) { id = le32_to_cpu(s.v->skip[i]); - if (id && id < le32_to_cpu(s.v->parent)) { - prt_printf(err, "bad skiplist node %u", id); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), c, err, + snapshot_skiplist_bad, + "bad skiplist node %u", id); } } - - return 0; +fsck_err: + return ret; } static void __set_is_ancestor_bitmap(struct bch_fs *c, u32 id) @@ -529,7 +523,7 @@ static int check_snapshot_tree(struct btree_trans *trans, if (fsck_err_on(ret || root_id != bch2_snapshot_root(c, root_id) || st.k->p.offset != le32_to_cpu(s.tree), - c, + c, snapshot_tree_to_missing_snapshot, "snapshot tree points to missing/incorrect snapshot:\n %s", (bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { ret = bch2_btree_delete_at(trans, iter, 0); @@ -541,17 +535,20 @@ static int check_snapshot_tree(struct btree_trans *trans, if (ret && !bch2_err_matches(ret, ENOENT)) goto err; - if (fsck_err_on(ret, c, + if (fsck_err_on(ret, + c, snapshot_tree_to_missing_subvol, "snapshot tree points to missing subvolume:\n %s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || fsck_err_on(!bch2_snapshot_is_ancestor_early(c, le32_to_cpu(subvol.snapshot), - root_id), c, + root_id), + c, snapshot_tree_to_wrong_subvol, "snapshot tree points to subvolume that does not point to snapshot in this tree:\n %s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || - fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol), c, + fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol), + c, snapshot_tree_to_snapshot_subvol, "snapshot tree points to snapshot subvolume:\n %s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { @@ -787,7 +784,9 @@ static int check_snapshot(struct btree_trans *trans, goto err; } } else { - if (fsck_err_on(s.subvol, c, "snapshot should not point to subvol:\n %s", + if (fsck_err_on(s.subvol, + c, snapshot_should_not_have_subvol, + "snapshot should not point to subvol:\n %s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); ret = PTR_ERR_OR_ZERO(u); @@ -803,7 +802,8 @@ static int check_snapshot(struct btree_trans *trans, if (ret < 0) goto err; - if (fsck_err_on(!ret, c, "snapshot points to missing/incorrect tree:\n %s", + if (fsck_err_on(!ret, c, snapshot_to_bad_snapshot_tree, + "snapshot points to missing/incorrect tree:\n %s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { ret = snapshot_tree_ptr_repair(trans, iter, k, &s); if (ret) @@ -815,7 +815,8 @@ static int check_snapshot(struct btree_trans *trans, if (le32_to_cpu(s.depth) != real_depth && (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists || - fsck_err(c, "snapshot with incorrect depth field, should be %u:\n %s", + fsck_err(c, snapshot_bad_depth, + "snapshot with incorrect depth field, should be %u:\n %s", real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) { u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); ret = PTR_ERR_OR_ZERO(u); @@ -832,7 +833,8 @@ static int check_snapshot(struct btree_trans *trans, if (!ret && (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists || - fsck_err(c, "snapshot with bad skiplist field:\n %s", + fsck_err(c, snapshot_bad_skiplist, + "snapshot with bad skiplist field:\n %s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) { u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); ret = PTR_ERR_OR_ZERO(u); diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h index 01f006cac831..f09a22f44239 100644 --- a/fs/bcachefs/snapshot.h +++ b/fs/bcachefs/snapshot.h @@ -5,7 +5,7 @@ enum bkey_invalid_flags; void bch2_snapshot_tree_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_snapshot_tree_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_snapshot_tree_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); #define bch2_bkey_ops_snapshot_tree ((struct bkey_ops) { \ @@ -19,7 +19,7 @@ struct bkey_i_snapshot_tree *__bch2_snapshot_tree_create(struct btree_trans *); int bch2_snapshot_tree_lookup(struct btree_trans *, u32, struct bch_snapshot_tree *); void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_snapshot_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_snapshot_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s_c, unsigned); diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 73ba22c219a1..fccd25aa3242 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -62,7 +62,8 @@ static int check_subvol(struct btree_trans *trans, if (ret) return ret; - if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset, c, + if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset, + c, subvol_not_master_and_not_snapshot, "subvolume %llu is not set as snapshot but is not master subvolume", k.k->p.offset)) { struct bkey_i_subvolume *s = @@ -97,16 +98,17 @@ int bch2_check_subvols(struct bch_fs *c) /* Subvolumes: */ -int bch2_subvolume_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { - if (bkey_lt(k.k->p, SUBVOL_POS_MIN) || - bkey_gt(k.k->p, SUBVOL_POS_MAX)) { - prt_printf(err, "invalid pos"); - return -BCH_ERR_invalid_bkey; - } + int ret = 0; - return 0; + bkey_fsck_err_on(bkey_lt(k.k->p, SUBVOL_POS_MIN) || + bkey_gt(k.k->p, SUBVOL_POS_MAX), c, err, + subvol_pos_bad, + "invalid pos"); +fsck_err: + return ret; } void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c, diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index bb14f92e8687..a1003d30ab0a 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -9,7 +9,7 @@ enum bkey_invalid_flags; int bch2_check_subvols(struct bch_fs *); -int bch2_subvolume_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_subvolume_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 74b41f567ab8..a39ff0c296ec 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -70,46 +70,38 @@ const struct bch_hash_desc bch2_xattr_hash_desc = { .cmp_bkey = xattr_cmp_bkey, }; -int bch2_xattr_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_xattr_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { - const struct xattr_handler *handler; struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k); + unsigned val_u64s = xattr_val_u64s(xattr.v->x_name_len, + le16_to_cpu(xattr.v->x_val_len)); + int ret = 0; - if (bkey_val_u64s(k.k) < - xattr_val_u64s(xattr.v->x_name_len, - le16_to_cpu(xattr.v->x_val_len))) { - prt_printf(err, "value too small (%zu < %u)", - bkey_val_u64s(k.k), - xattr_val_u64s(xattr.v->x_name_len, - le16_to_cpu(xattr.v->x_val_len))); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(bkey_val_u64s(k.k) < val_u64s, c, err, + xattr_val_size_too_small, + "value too small (%zu < %u)", + bkey_val_u64s(k.k), val_u64s); /* XXX why +4 ? */ - if (bkey_val_u64s(k.k) > - xattr_val_u64s(xattr.v->x_name_len, - le16_to_cpu(xattr.v->x_val_len) + 4)) { - prt_printf(err, "value too big (%zu > %u)", - bkey_val_u64s(k.k), - xattr_val_u64s(xattr.v->x_name_len, - le16_to_cpu(xattr.v->x_val_len) + 4)); - return -BCH_ERR_invalid_bkey; - } - - handler = bch2_xattr_type_to_handler(xattr.v->x_type); - if (!handler) { - prt_printf(err, "invalid type (%u)", xattr.v->x_type); - return -BCH_ERR_invalid_bkey; - } - - if (memchr(xattr.v->x_name, '\0', xattr.v->x_name_len)) { - prt_printf(err, "xattr name has invalid characters"); - return -BCH_ERR_invalid_bkey; - } - - return 0; + val_u64s = xattr_val_u64s(xattr.v->x_name_len, + le16_to_cpu(xattr.v->x_val_len) + 4); + + bkey_fsck_err_on(bkey_val_u64s(k.k) > val_u64s, c, err, + xattr_val_size_too_big, + "value too big (%zu > %u)", + bkey_val_u64s(k.k), val_u64s); + + bkey_fsck_err_on(!bch2_xattr_type_to_handler(xattr.v->x_type), c, err, + xattr_invalid_type, + "invalid type (%u)", xattr.v->x_type); + + bkey_fsck_err_on(memchr(xattr.v->x_name, '\0', xattr.v->x_name_len), c, err, + xattr_name_invalid_chars, + "xattr name has invalid characters"); +fsck_err: + return ret; } void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c, diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h index f5a52e3a6016..1337f31a5c49 100644 --- a/fs/bcachefs/xattr.h +++ b/fs/bcachefs/xattr.h @@ -6,7 +6,7 @@ extern const struct bch_hash_desc bch2_xattr_hash_desc; -int bch2_xattr_invalid(const struct bch_fs *, struct bkey_s_c, +int bch2_xattr_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -- cgit v1.2.3 From 85103d15ca3fe3b987f912873cb4f91b6f557c6c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 26 Oct 2023 23:02:42 -0400 Subject: bcachefs: Fix error path in bch2_replicas_gc_end() We were dropping a lock we hadn't taken when entering with an error. Signed-off-by: Kent Overstreet --- fs/bcachefs/replicas.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index cef2a0447b86..1c3ae13bfced 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -462,18 +462,13 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret) { lockdep_assert_held(&c->replicas_gc_lock); - if (ret) - goto err; - mutex_lock(&c->sb_lock); percpu_down_write(&c->mark_lock); - ret = bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc); - if (ret) - goto err; + ret = ret ?: + bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc) ?: + replicas_table_update(c, &c->replicas_gc); - ret = replicas_table_update(c, &c->replicas_gc); -err: kfree(c->replicas_gc.entries); c->replicas_gc.entries = NULL; @@ -579,12 +574,9 @@ retry: bch2_cpu_replicas_sort(&new); - ret = bch2_cpu_replicas_to_sb_replicas(c, &new); - if (ret) - goto err; + ret = bch2_cpu_replicas_to_sb_replicas(c, &new) ?: + replicas_table_update(c, &new); - ret = replicas_table_update(c, &new); -err: kfree(new.entries); percpu_up_write(&c->mark_lock); -- cgit v1.2.3 From 2e7acdfbcad8b60eeef29d3beb3eb9a7085e3768 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 27 Oct 2023 13:53:07 -0400 Subject: bcachefs: Fix deleted inodes btree in snapshot deletion Signed-off-by: Kent Overstreet --- fs/bcachefs/snapshot.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs') diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index a3fecc785001..e9af77b384c7 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -1437,6 +1437,15 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) if (!btree_type_has_snapshots(id)) continue; + /* + * deleted inodes btree is maintained by a trigger on the inodes + * btree - no work for us to do here, and it's not safe to scan + * it because we'll see out of date keys due to the btree write + * buffer: + */ + if (id == BTREE_ID_deleted_inodes) + continue; + ret = for_each_btree_key_commit(trans, iter, id, POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, -- cgit v1.2.3 From be9e782df3cb557715630a61dc79d9f966737859 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 27 Oct 2023 15:23:46 -0400 Subject: bcachefs: Don't downgrade locks on transaction restart We should only be downgrading locks on success - otherwise, our transaction restarts won't be getting the correct locks and we'll livelock. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 3 ++- fs/bcachefs/btree_key_cache.c | 2 +- fs/bcachefs/btree_locking.c | 38 +++++++++++++++++++++++------- fs/bcachefs/btree_locking.h | 18 ++++++++++---- fs/bcachefs/btree_trans_commit.c | 9 +++---- fs/bcachefs/btree_types.h | 2 ++ fs/bcachefs/btree_update_interior.c | 2 +- fs/bcachefs/data_update.c | 12 +--------- fs/bcachefs/trace.h | 47 +++++++++++++++++++++++++++++++++---- 9 files changed, 96 insertions(+), 37 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 3b629420655a..0622f729411f 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1523,6 +1523,7 @@ static inline struct btree_path *btree_path_alloc(struct btree_trans *trans, path->ref = 0; path->intent_ref = 0; path->nodes_locked = 0; + path->alloc_seq++; btree_path_list_add(trans, pos, path); trans->paths_sorted = false; @@ -1598,7 +1599,7 @@ struct btree_path *bch2_path_get(struct btree_trans *trans, locks_want = min(locks_want, BTREE_MAX_DEPTH); if (locks_want > path->locks_want) - bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want); + bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want, NULL); return path; } diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 634ffdcb55f9..3304bff7d464 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -509,7 +509,7 @@ fill: * path->uptodate yet: */ if (!path->locks_want && - !__bch2_btree_path_upgrade(trans, path, 1)) { + !__bch2_btree_path_upgrade(trans, path, 1, NULL)) { trace_and_count(trans->c, trans_restart_key_cache_upgrade, trans, _THIS_IP_); ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_upgrade); goto err; diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index 40c8ed8f7bf1..bc45cd2a34a4 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -431,7 +431,8 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *trans, static inline bool btree_path_get_locks(struct btree_trans *trans, struct btree_path *path, - bool upgrade) + bool upgrade, + struct get_locks_fail *f) { unsigned l = path->level; int fail_idx = -1; @@ -442,8 +443,14 @@ static inline bool btree_path_get_locks(struct btree_trans *trans, if (!(upgrade ? bch2_btree_node_upgrade(trans, path, l) - : bch2_btree_node_relock(trans, path, l))) - fail_idx = l; + : bch2_btree_node_relock(trans, path, l))) { + fail_idx = l; + + if (f) { + f->l = l; + f->b = path->l[l].b; + } + } l++; } while (l < path->locks_want); @@ -584,7 +591,9 @@ __flatten bool bch2_btree_path_relock_norestart(struct btree_trans *trans, struct btree_path *path, unsigned long trace_ip) { - return btree_path_get_locks(trans, path, false); + struct get_locks_fail f; + + return btree_path_get_locks(trans, path, false, &f); } int __bch2_btree_path_relock(struct btree_trans *trans, @@ -600,22 +609,24 @@ int __bch2_btree_path_relock(struct btree_trans *trans, bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *trans, struct btree_path *path, - unsigned new_locks_want) + unsigned new_locks_want, + struct get_locks_fail *f) { EBUG_ON(path->locks_want >= new_locks_want); path->locks_want = new_locks_want; - return btree_path_get_locks(trans, path, true); + return btree_path_get_locks(trans, path, true, f); } bool __bch2_btree_path_upgrade(struct btree_trans *trans, struct btree_path *path, - unsigned new_locks_want) + unsigned new_locks_want, + struct get_locks_fail *f) { struct btree_path *linked; - if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want)) + if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want, f)) return true; /* @@ -644,7 +655,7 @@ bool __bch2_btree_path_upgrade(struct btree_trans *trans, linked->btree_id == path->btree_id && linked->locks_want < new_locks_want) { linked->locks_want = new_locks_want; - btree_path_get_locks(trans, linked, true); + btree_path_get_locks(trans, linked, true, NULL); } return false; @@ -656,6 +667,9 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans, { unsigned l; + if (trans->restarted) + return; + EBUG_ON(path->locks_want < new_locks_want); path->locks_want = new_locks_want; @@ -674,6 +688,9 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans, } bch2_btree_path_verify_locks(path); + + path->downgrade_seq++; + trace_path_downgrade(trans, _RET_IP_, path); } /* Btree transaction locking: */ @@ -682,6 +699,9 @@ void bch2_trans_downgrade(struct btree_trans *trans) { struct btree_path *path; + if (trans->restarted) + return; + trans_for_each_path(trans, path) bch2_btree_path_downgrade(trans, path); } diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index 6231e9ffc5d7..11b0a2c8cd69 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -355,26 +355,36 @@ static inline bool bch2_btree_node_relock_notrace(struct btree_trans *trans, /* upgrade */ + +struct get_locks_fail { + unsigned l; + struct btree *b; +}; + bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *, - struct btree_path *, unsigned); + struct btree_path *, unsigned, + struct get_locks_fail *); + bool __bch2_btree_path_upgrade(struct btree_trans *, - struct btree_path *, unsigned); + struct btree_path *, unsigned, + struct get_locks_fail *); static inline int bch2_btree_path_upgrade(struct btree_trans *trans, struct btree_path *path, unsigned new_locks_want) { + struct get_locks_fail f; unsigned old_locks_want = path->locks_want; new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH); if (path->locks_want < new_locks_want - ? __bch2_btree_path_upgrade(trans, path, new_locks_want) + ? __bch2_btree_path_upgrade(trans, path, new_locks_want, &f) : path->uptodate == BTREE_ITER_UPTODATE) return 0; trace_and_count(trans->c, trans_restart_upgrade, trans, _THIS_IP_, path, - old_locks_want, new_locks_want); + old_locks_want, new_locks_want, &f); return btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade); } diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 53ddcaf042a2..8140b6e6e9a6 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -861,12 +861,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags */ bch2_journal_res_put(&c->journal, &trans->journal_res); - if (unlikely(ret)) - return ret; - - bch2_trans_downgrade(trans); - - return 0; + return ret; } static int journal_reclaim_wait_done(struct bch_fs *c) @@ -1135,6 +1130,8 @@ out: if (likely(!(flags & BTREE_INSERT_NOCHECK_RW))) bch2_write_ref_put(c, BCH_WRITE_REF_trans); out_reset: + if (!ret) + bch2_trans_downgrade(trans); bch2_trans_reset_updates(trans); return ret; diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index a039ce4a4809..ecbb44b939a0 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -228,6 +228,8 @@ struct btree_path { u8 sorted_idx; u8 ref; u8 intent_ref; + u32 alloc_seq; + u32 downgrade_seq; /* btree_iter_copy starts here: */ struct bpos pos; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 818a83f35d27..d029e0348c91 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1987,7 +1987,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, out: if (new_path) bch2_path_put(trans, new_path, true); - bch2_btree_path_downgrade(trans, iter->path); + bch2_trans_downgrade(trans); return ret; err: bch2_btree_node_free_never_used(as, trans, n); diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index d116f2f03db2..0771a6d880bf 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -162,11 +162,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, if (((1U << i) & m->data_opts.rewrite_ptrs) && (ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) && !ptr->cached) { - bch2_bkey_drop_ptr_noerror(bkey_i_to_s(insert), ptr); - /* - * See comment below: bch2_extent_ptr_set_cached(bkey_i_to_s(insert), ptr); - */ rewrites_found |= 1U << i; } i++; @@ -212,14 +208,8 @@ restart_drop_extra_replicas: if (!p.ptr.cached && durability - ptr_durability >= m->op.opts.data_replicas) { durability -= ptr_durability; - bch2_bkey_drop_ptr_noerror(bkey_i_to_s(insert), &entry->ptr); - /* - * Currently, we're dropping unneeded replicas - * instead of marking them as cached, since - * cached data in stripe buckets prevents them - * from being reused: + bch2_extent_ptr_set_cached(bkey_i_to_s(insert), &entry->ptr); - */ goto restart_drop_extra_replicas; } } diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 81f72b2add09..893304a1f06e 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -1043,13 +1043,16 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_split, TP_ARGS(trans, caller_ip, path) ); +struct get_locks_fail; + TRACE_EVENT(trans_restart_upgrade, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, struct btree_path *path, unsigned old_locks_want, - unsigned new_locks_want), - TP_ARGS(trans, caller_ip, path, old_locks_want, new_locks_want), + unsigned new_locks_want, + struct get_locks_fail *f), + TP_ARGS(trans, caller_ip, path, old_locks_want, new_locks_want, f), TP_STRUCT__entry( __array(char, trans_fn, 32 ) @@ -1057,6 +1060,11 @@ TRACE_EVENT(trans_restart_upgrade, __field(u8, btree_id ) __field(u8, old_locks_want ) __field(u8, new_locks_want ) + __field(u8, level ) + __field(u32, path_seq ) + __field(u32, node_seq ) + __field(u32, path_alloc_seq ) + __field(u32, downgrade_seq) TRACE_BPOS_entries(pos) ), @@ -1066,10 +1074,15 @@ TRACE_EVENT(trans_restart_upgrade, __entry->btree_id = path->btree_id; __entry->old_locks_want = old_locks_want; __entry->new_locks_want = new_locks_want; + __entry->level = f->l; + __entry->path_seq = path->l[f->l].lock_seq; + __entry->node_seq = IS_ERR_OR_NULL(f->b) ? 0 : f->b->c.lock.seq; + __entry->path_alloc_seq = path->alloc_seq; + __entry->downgrade_seq = path->downgrade_seq; TRACE_BPOS_assign(pos, path->pos) ), - TP_printk("%s %pS btree %s pos %llu:%llu:%u locks_want %u -> %u", + TP_printk("%s %pS btree %s pos %llu:%llu:%u locks_want %u -> %u level %u path seq %u node seq %u alloc_seq %u downgrade_seq %u", __entry->trans_fn, (void *) __entry->caller_ip, bch2_btree_id_str(__entry->btree_id), @@ -1077,7 +1090,12 @@ TRACE_EVENT(trans_restart_upgrade, __entry->pos_offset, __entry->pos_snapshot, __entry->old_locks_want, - __entry->new_locks_want) + __entry->new_locks_want, + __entry->level, + __entry->path_seq, + __entry->node_seq, + __entry->path_alloc_seq, + __entry->downgrade_seq) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_relock, @@ -1238,6 +1256,27 @@ TRACE_EVENT(trans_restart_key_cache_key_realloced, __entry->new_u64s) ); +TRACE_EVENT(path_downgrade, + TP_PROTO(struct btree_trans *trans, + unsigned long caller_ip, + struct btree_path *path), + TP_ARGS(trans, caller_ip, path), + + TP_STRUCT__entry( + __array(char, trans_fn, 32 ) + __field(unsigned long, caller_ip ) + ), + + TP_fast_assign( + strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + __entry->caller_ip = caller_ip; + ), + + TP_printk("%s %pS", + __entry->trans_fn, + (void *) __entry->caller_ip) +); + DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), -- cgit v1.2.3 From df94cb2e57b2cc539f325003e7abb76d3060d55b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 27 Oct 2023 17:19:31 -0400 Subject: bcachefs: Fix an integer overflow Fixes: bcachefs (e7fdc10e-54a3-49d9-bd0c-390370889d84): disk usage increased 4294967296 more than 2823707312 sectors reserved) transaction updates for __bchfs_fallocate journal seq 467859 update: btree=extents cached=0 bch2_trans_update+0x4e8/0x540 old u64s 5 type deleted 536925940:3559337304:4294967283 len 0 ver 0 new u64s 6 type reservation 536925940:3559337304:4294967283 len 3559337304 ver 0: generation 0 replicas 2 update: btree=inodes cached=1 bch2_extent_update_i_size_sectors+0x305/0x3b0 old u64s 19 type inode_v3 0:536925940:4294967283 len 0 ver 0: mode 100600 flags 15300000 journal_seq 467859 bi_size 0 bi_sectors 0 bi_version 0 bi_atime 40905301656446 bi_ctime 40905301656446 bi_mtime 40905301656446 bi_otime 40905301656446 bi_uid 0 bi_gid 0 bi_nlink 0 bi_generation 0 bi_dev 0 bi_data_checksum 0 bi_compression 0 bi_project 0 bi_background_compression 0 bi_data_replicas 0 bi_promote_target 0 bi_foreground_target 0 bi_background_target 0 bi_erasure_code 0 bi_fields_set 0 bi_dir 1879048193 bi_dir_offset 3384856038735393365 bi_subvol 0 bi_parent_subvol 0 bi_nocow 0 new u64s 19 type inode_v3 0:536925940:4294967283 len 0 ver 0: mode 100600 flags 15300000 journal_seq 467859 bi_size 0 bi_sectors 3559337304 bi_version 0 bi_atime 40905301656446 bi_ctime 40905301656446 bi_mtime 40905301656446 bi_otime 40905301656446 bi_uid 0 bi_gid 0 bi_nlink 0 bi_generation 0 bi_dev 0 bi_data_checksum 0 bi_compression 0 bi_project 0 bi_background_compression 0 bi_data_replicas 0 bi_promote_target 0 bi_foreground_target 0 bi_background_target 0 bi_erasure_code 0 bi_fields_set 0 bi_dir 1879048193 bi_dir_offset 3384856038735393365 bi_subvol 0 bi_parent_subvol 0 bi_nocow 0 Kernel panic - not syncing: bcachefs (e7fdc10e-54a3-49d9-bd0c-390370889d84): panic after error CPU: 4 PID: 5154 Comm: rsync Not tainted 6.5.9-gateway-gca1614174cc0-dirty #1 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./X570 Phantom Gaming 4, BIOS P4.20 08/02/2021 Call Trace: dump_stack_lvl+0x5a/0x90 panic+0x105/0x300 ? console_unlock+0xf1/0x130 ? bch2_printbuf_exit+0x16/0x30 ? srso_return_thunk+0x5/0x10 bch2_inconsistent_error+0x6f/0x80 bch2_trans_fs_usage_apply+0x279/0x3d0 __bch2_trans_commit+0x112a/0x1df0 ? bch2_extent_update+0x13a/0x1d0 bch2_extent_update+0x13a/0x1d0 bch2_extent_fallocate+0x58e/0x740 bch2_fallocate_dispatch+0xb7c/0x1030 ? do_filp_open+0xa0/0x140 vfs_fallocate+0x18e/0x1d0 __x64_sys_fallocate+0x46/0x70 do_syscall_64+0x48/0xa0 ? exit_to_user_mode_prepare+0x4d/0xa0 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 RIP: 0033:0x7fc85d91bbb3 Code: 64 89 02 b8 ff ff ff ff eb bd 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 80 3d 31 da 0d 00 00 49 89 ca 74 14 b8 1d 01 00 00 0f 05 <48> 3d 00 f0 ff ff 77 5d c3 0f 1f 40 00 48 83 ec 28 48 89 54 24 10 Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 4 ++-- fs/bcachefs/io_misc.c | 4 ++-- fs/bcachefs/io_misc.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 2acd727d3f9b..58d8c6ffd955 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1322,7 +1322,7 @@ int bch2_trans_fs_usage_apply(struct btree_trans *trans, struct bch_fs *c = trans->c; static int warned_disk_usage = 0; bool warn = false; - unsigned disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0; + u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0; struct replicas_delta *d, *d2; struct replicas_delta *top = (void *) deltas->d + deltas->used; struct bch_fs_usage *dst; @@ -1381,7 +1381,7 @@ int bch2_trans_fs_usage_apply(struct btree_trans *trans, if (unlikely(warn) && !xchg(&warned_disk_usage, 1)) bch2_trans_inconsistent(trans, - "disk usage increased %lli more than %u sectors reserved)", + "disk usage increased %lli more than %llu sectors reserved)", should_not_have_added, disk_res_sectors); return 0; need_mark: diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 0979d5e05713..bebc11444ef5 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -23,7 +23,7 @@ int bch2_extent_fallocate(struct btree_trans *trans, subvol_inum inum, struct btree_iter *iter, - unsigned sectors, + u64 sectors, struct bch_io_opts opts, s64 *i_sectors_delta, struct write_point_specifier write_point) @@ -105,7 +105,7 @@ int bch2_extent_fallocate(struct btree_trans *trans, if (ret) goto err; - sectors = min(sectors, wp->sectors_free); + sectors = min_t(u64, sectors, wp->sectors_free); sectors_allocated = sectors; bch2_key_resize(&e->k, sectors); diff --git a/fs/bcachefs/io_misc.h b/fs/bcachefs/io_misc.h index c9e6ed40e1b8..9cb44a7c43c1 100644 --- a/fs/bcachefs/io_misc.h +++ b/fs/bcachefs/io_misc.h @@ -3,7 +3,7 @@ #define _BCACHEFS_IO_MISC_H int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *, - unsigned, struct bch_io_opts, s64 *, + u64, struct bch_io_opts, s64 *, struct write_point_specifier); int bch2_fpunch_at(struct btree_trans *, struct btree_iter *, subvol_inum, u64, s64 *); -- cgit v1.2.3 From dc7a15fb90bf658be8289c9540c11f50993d10ff Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 27 Oct 2023 19:37:24 -0400 Subject: bcachefs: Skip deleted members in member_to_text() This fixes show-super output - we shouldn't be printing members that have been deleted. Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-members.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 032fe45481d3..ab5de12eca4a 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -168,6 +168,9 @@ static void member_to_text(struct printbuf *out, u64 bucket_size = le16_to_cpu(m.bucket_size); u64 device_size = le64_to_cpu(m.nbuckets) * bucket_size; + if (!bch2_member_exists(&m)) + return; + prt_printf(out, "Device:"); prt_tab(out); prt_printf(out, "%u", i); @@ -304,10 +307,8 @@ static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb, struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); unsigned i; - for (i = 0; i < sb->nr_devices; i++) { - struct bch_member m = members_v1_get(mi, i); - member_to_text(out, m, gi, sb, i); - } + for (i = 0; i < sb->nr_devices; i++) + member_to_text(out, members_v1_get(mi, i), gi, sb, i); } const struct bch_sb_field_ops bch_sb_field_ops_members_v1 = { @@ -322,10 +323,8 @@ static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb, struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); unsigned i; - for (i = 0; i < sb->nr_devices; i++) { - struct bch_member m = members_v2_get(mi, i); - member_to_text(out, m, gi, sb, i); - } + for (i = 0; i < sb->nr_devices; i++) + member_to_text(out, members_v2_get(mi, i), gi, sb, i); } static int bch2_sb_members_v2_validate(struct bch_sb *sb, -- cgit v1.2.3 From 1f7056b735d59843faee70f504f71e1fbffc51d8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 30 Oct 2023 13:15:36 -0400 Subject: bcachefs: Ensure copygc does not spin If copygc does no work - finds no fragmented buckets - wait for a bit of IO to happen. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 11 +++++++++++ fs/bcachefs/alloc_background.h | 1 + fs/bcachefs/movinggc.c | 20 ++++++++++++++++++-- fs/bcachefs/rebalance.c | 10 ++++------ 4 files changed, 34 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index c342ec3b0385..bcfae91667af 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -2085,6 +2085,17 @@ void bch2_recalc_capacity(struct bch_fs *c) closure_wake_up(&c->freelist_wait); } +u64 bch2_min_rw_member_capacity(struct bch_fs *c) +{ + struct bch_dev *ca; + unsigned i; + u64 ret = U64_MAX; + + for_each_rw_member(ca, c, i) + ret = min(ret, ca->mi.nbuckets * ca->mi.bucket_size); + return ret; +} + static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca) { struct open_bucket *ob; diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index e1ce38ef052e..73faf99a222a 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -249,6 +249,7 @@ int bch2_dev_freespace_init(struct bch_fs *, struct bch_dev *, u64, u64); int bch2_fs_freespace_init(struct bch_fs *); void bch2_recalc_capacity(struct bch_fs *); +u64 bch2_min_rw_member_capacity(struct bch_fs *); void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *); void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *); diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index f73b9b7f4bf7..e0efa5282a77 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -188,7 +188,8 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt, noinline static int bch2_copygc(struct moving_context *ctxt, - struct buckets_in_flight *buckets_in_flight) + struct buckets_in_flight *buckets_in_flight, + bool *did_work) { struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; @@ -224,6 +225,8 @@ static int bch2_copygc(struct moving_context *ctxt, f->bucket.k.gen, data_opts); if (ret) goto err; + + *did_work = true; } err: darray_exit(&buckets); @@ -322,6 +325,8 @@ static int bch2_copygc_thread(void *arg) false); while (!ret && !kthread_should_stop()) { + bool did_work = false; + bch2_trans_unlock(ctxt.trans); cond_resched(); @@ -352,10 +357,21 @@ static int bch2_copygc_thread(void *arg) c->copygc_wait = 0; c->copygc_running = true; - ret = bch2_copygc(&ctxt, &buckets); + ret = bch2_copygc(&ctxt, &buckets, &did_work); c->copygc_running = false; wake_up(&c->copygc_running_wq); + + if (!wait && !did_work) { + u64 min_member_capacity = bch2_min_rw_member_capacity(c); + + if (min_member_capacity == U64_MAX) + min_member_capacity = 128 * 2048; + + bch2_trans_unlock_long(ctxt.trans); + bch2_kthread_io_clock_wait(clock, last + (min_member_capacity >> 6), + MAX_SCHEDULE_TIMEOUT); + } } move_buckets_wait(&ctxt, &buckets, true); diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 6ee4d2e02073..82014cc6e271 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "alloc_background.h" #include "alloc_foreground.h" #include "btree_iter.h" #include "btree_update.h" @@ -282,15 +283,12 @@ static int do_rebalance_scan(struct moving_context *ctxt, u64 inum, u64 cookie) static void rebalance_wait(struct bch_fs *c) { struct bch_fs_rebalance *r = &c->rebalance; - struct bch_dev *ca; struct io_clock *clock = &c->io_clock[WRITE]; u64 now = atomic64_read(&clock->now); - u64 min_member_capacity = 128 * 2048; - unsigned i; + u64 min_member_capacity = bch2_min_rw_member_capacity(c); - for_each_rw_member(ca, c, i) - min_member_capacity = min(min_member_capacity, - ca->mi.nbuckets * ca->mi.bucket_size); + if (min_member_capacity == U64_MAX) + min_member_capacity = 128 * 2048; r->wait_iotime_end = now + (min_member_capacity >> 6); -- cgit v1.2.3 From 4db8ac8629b1ee75316849b0e8ea5bbf90335706 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 31 Oct 2023 12:29:58 -0400 Subject: bcachefs: Fix MEAN_AND_VARIANCE kconfig options Fixes: https://lore.kernel.org/linux-bcachefs/CAMuHMdXpwMdLuoWsNGa8qacT_5Wv-vSTz0xoBR5n_fnD9cNOuQ@mail.gmail.com/ Signed-off-by: Kent Overstreet --- fs/bcachefs/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig index df13a4f9a6e3..87dc2e934ad7 100644 --- a/fs/bcachefs/Kconfig +++ b/fs/bcachefs/Kconfig @@ -24,7 +24,6 @@ config BCACHEFS_FS select XXHASH select SRCU select SYMBOLIC_ERRNAME - select MEAN_AND_VARIANCE help The bcachefs filesystem - a modern, copy on write filesystem, with support for multiple devices, compression, checksumming, etc. @@ -78,7 +77,7 @@ config BCACHEFS_NO_LATENCY_ACCT config MEAN_AND_VARIANCE_UNIT_TEST tristate "mean_and_variance unit tests" if !KUNIT_ALL_TESTS depends on KUNIT - select MEAN_AND_VARIANCE + depends on BCACHEFS_FS default KUNIT_ALL_TESTS help This option enables the kunit tests for mean_and_variance module. -- cgit v1.2.3 From 6dfa10ab22a6a322269a3454d7ac720dc2f8bf11 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 31 Oct 2023 18:05:22 -0400 Subject: bcachefs: Fix build errors with gcc 10 gcc 10 seems to complain about array bounds in situations where gcc 11 does not - curious. This unfortunately requires adding some casts for now; we may investigate getting rid of our __u64 _data[] VLA in a future patch so that our start[0] members can be VLAs. Reported-by: John Stoffel Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 4 +--- fs/bcachefs/btree_trans_commit.c | 6 +++--- fs/bcachefs/btree_update_interior.c | 2 +- fs/bcachefs/btree_update_interior.h | 2 +- fs/bcachefs/recovery.c | 2 +- 5 files changed, 7 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 29b000c6b7e1..5b44598b9df9 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1617,9 +1617,7 @@ struct journal_seq_blacklist_entry { struct bch_sb_field_journal_seq_blacklist { struct bch_sb_field field; - - struct journal_seq_blacklist_entry start[0]; - __u64 _data[]; + struct journal_seq_blacklist_entry start[]; }; struct bch_sb_field_errors { diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 8140b6e6e9a6..32693f7c6221 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -681,7 +681,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, BCH_JSET_ENTRY_overwrite, i->btree_id, i->level, i->old_k.u64s); - bkey_reassemble(&entry->start[0], + bkey_reassemble((struct bkey_i *) entry->start, (struct bkey_s_c) { &i->old_k, i->old_v }); } @@ -689,7 +689,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, BCH_JSET_ENTRY_btree_keys, i->btree_id, i->level, i->k->k.u64s); - bkey_copy(&entry->start[0], i->k); + bkey_copy((struct bkey_i *) entry->start, i->k); } trans_for_each_wb_update(trans, wb) { @@ -697,7 +697,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, BCH_JSET_ENTRY_btree_keys, wb->btree, 0, wb->k.k.u64s); - bkey_copy(&entry->start[0], &wb->k); + bkey_copy((struct bkey_i *) entry->start, &wb->k); } if (trans->journal_seq) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index d029e0348c91..89ada89eafe7 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -2411,7 +2411,7 @@ void bch2_journal_entry_to_btree_root(struct bch_fs *c, struct jset_entry *entry r->level = entry->level; r->alive = true; - bkey_copy(&r->key, &entry->start[0]); + bkey_copy(&r->key, (struct bkey_i *) entry->start); mutex_unlock(&c->btree_root_lock); } diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index 5e0a467fe905..d92b3cf5f5e0 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -271,7 +271,7 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, struct btree_node_entry *bne = max(write_block(b), (void *) btree_bkey_last(b, bset_tree_last(b))); ssize_t remaining_space = - __bch_btree_u64s_remaining(c, b, &bne->keys.start[0]); + __bch_btree_u64s_remaining(c, b, bne->keys.start); if (unlikely(bset_written(b, bset(b, t)))) { if (remaining_space > (ssize_t) (block_bytes(c) >> 3)) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index f73338f37bf1..9600b8083175 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -226,7 +226,7 @@ static int journal_replay_entry_early(struct bch_fs *c, if (entry->u64s) { r->level = entry->level; - bkey_copy(&r->key, &entry->start[0]); + bkey_copy(&r->key, (struct bkey_i *) entry->start); r->error = 0; } else { r->error = -EIO; -- cgit v1.2.3 From c4accde498dd7db8352d574958d19a5f710aba69 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 30 Oct 2023 12:30:52 -0400 Subject: bcachefs: Ensure srcu lock is not held too long The SRCU read lock that btree_trans takes exists to make it safe for bch2_trans_relock() to deref pointers to btree nodes/key cache items we don't have locked, but as a side effect it blocks reclaim from freeing those items. Thus, it's important to not hold it for too long: we need to differentiate between bch2_trans_unlock() calls that will be only for a short duration, and ones that will be for an unbounded duration. This introduces bch2_trans_unlock_long(), to be used mainly by the data move paths. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 42 +++++++++++++++++++++++++++++------------- fs/bcachefs/btree_iter.h | 4 ++++ fs/bcachefs/btree_locking.c | 6 ++++++ fs/bcachefs/btree_types.h | 1 + 4 files changed, 40 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 0622f729411f..b22fd395a1fd 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1109,6 +1109,9 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans, if (unlikely(ret)) goto out; + if (unlikely(!trans->srcu_held)) + bch2_trans_srcu_lock(trans); + /* * Ensure we obey path->should_be_locked: if it's set, we can't unlock * and re-traverse the path without a transaction restart: @@ -2830,18 +2833,28 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) return p; } -static noinline void bch2_trans_reset_srcu_lock(struct btree_trans *trans) +void bch2_trans_srcu_unlock(struct btree_trans *trans) { - struct bch_fs *c = trans->c; - struct btree_path *path; + if (trans->srcu_held) { + struct bch_fs *c = trans->c; + struct btree_path *path; - trans_for_each_path(trans, path) - if (path->cached && !btree_node_locked(path, 0)) - path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset); + trans_for_each_path(trans, path) + if (path->cached && !btree_node_locked(path, 0)) + path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset); - srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); - trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); - trans->srcu_lock_time = jiffies; + srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); + trans->srcu_held = false; + } +} + +void bch2_trans_srcu_lock(struct btree_trans *trans) +{ + if (!trans->srcu_held) { + trans->srcu_idx = srcu_read_lock(&trans->c->btree_trans_barrier); + trans->srcu_lock_time = jiffies; + trans->srcu_held = true; + } } /** @@ -2895,8 +2908,9 @@ u32 bch2_trans_begin(struct btree_trans *trans) } trans->last_begin_time = now; - if (unlikely(time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10)))) - bch2_trans_reset_srcu_lock(trans); + if (unlikely(trans->srcu_held && + time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10)))) + bch2_trans_srcu_unlock(trans); trans->last_begin_ip = _RET_IP_; if (trans->restarted) { @@ -2981,8 +2995,9 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) trans->wb_updates_size = s->wb_updates_size; } - trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); + trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); trans->srcu_lock_time = jiffies; + trans->srcu_held = true; if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) { struct btree_trans *pos; @@ -3059,7 +3074,8 @@ void bch2_trans_put(struct btree_trans *trans) check_btree_paths_leaked(trans); - srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); + if (trans->srcu_held) + srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); bch2_journal_preres_put(&c->journal, &trans->journal_preres); diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 70759ee3e5c7..5e103f519e62 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -274,6 +274,7 @@ void bch2_path_put(struct btree_trans *, struct btree_path *, bool); int bch2_trans_relock(struct btree_trans *); int bch2_trans_relock_notrace(struct btree_trans *); void bch2_trans_unlock(struct btree_trans *); +void bch2_trans_unlock_long(struct btree_trans *); bool bch2_trans_locked(struct btree_trans *); static inline int trans_was_restarted(struct btree_trans *trans, u32 restart_count) @@ -579,6 +580,9 @@ static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans, __bch2_bkey_get_val_typed(_trans, _btree_id, _pos, _flags, \ KEY_TYPE_##_type, sizeof(*_val), _val) +void bch2_trans_srcu_unlock(struct btree_trans *); +void bch2_trans_srcu_lock(struct btree_trans *); + u32 bch2_trans_begin(struct btree_trans *); /* diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index bc45cd2a34a4..3d48834d091f 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -753,6 +753,12 @@ void bch2_trans_unlock(struct btree_trans *trans) __bch2_btree_path_unlock(trans, path); } +void bch2_trans_unlock_long(struct btree_trans *trans) +{ + bch2_trans_unlock(trans); + bch2_trans_srcu_unlock(trans); +} + bool bch2_trans_locked(struct btree_trans *trans) { struct btree_path *path; diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index ecbb44b939a0..7cc8d6b12161 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -426,6 +426,7 @@ struct btree_trans { u8 nr_updates; u8 nr_wb_updates; u8 wb_updates_size; + bool srcu_held:1; bool used_mempool:1; bool in_traverse_all:1; bool paths_sorted:1; -- cgit v1.2.3 From f82755e4e8b83a4a98ebd6d819d716547fe11919 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 30 Oct 2023 15:13:09 -0400 Subject: bcachefs: Data move path now uses bch2_trans_unlock_long() Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 12 +++++++++++- fs/bcachefs/move.c | 13 ++++++++----- fs/bcachefs/move.h | 1 + fs/bcachefs/movinggc.c | 4 ++-- fs/bcachefs/rebalance.c | 2 +- 5 files changed, 23 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index b22fd395a1fd..af98545e0f35 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2833,6 +2833,13 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) return p; } +static inline void check_srcu_held_too_long(struct btree_trans *trans) +{ + WARN(trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10), + "btree trans held srcu lock (delaying memory reclaim) for %lu seconds", + (jiffies - trans->srcu_lock_time) / HZ); +} + void bch2_trans_srcu_unlock(struct btree_trans *trans) { if (trans->srcu_held) { @@ -2843,6 +2850,7 @@ void bch2_trans_srcu_unlock(struct btree_trans *trans) if (path->cached && !btree_node_locked(path, 0)) path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset); + check_srcu_held_too_long(trans); srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); trans->srcu_held = false; } @@ -3074,8 +3082,10 @@ void bch2_trans_put(struct btree_trans *trans) check_btree_paths_leaked(trans); - if (trans->srcu_held) + if (trans->srcu_held) { + check_srcu_held_too_long(trans); srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); + } bch2_journal_preres_put(&c->journal, &trans->journal_preres); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 1b15b010461a..ab749bf2fcbc 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -147,9 +147,8 @@ void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt) { struct moving_io *io; - bch2_trans_unlock(ctxt->trans); - while ((io = bch2_moving_ctxt_next_pending_write(ctxt))) { + bch2_trans_unlock_long(ctxt->trans); list_del(&io->read_list); move_write(io); } @@ -485,8 +484,8 @@ int bch2_move_ratelimit(struct moving_context *ctxt) struct bch_fs *c = ctxt->trans->c; u64 delay; - if (ctxt->wait_on_copygc) { - bch2_trans_unlock(ctxt->trans); + if (ctxt->wait_on_copygc && !c->copygc_running) { + bch2_trans_unlock_long(ctxt->trans); wait_event_killable(c->copygc_running_wq, !c->copygc_running || kthread_should_stop()); @@ -495,8 +494,12 @@ int bch2_move_ratelimit(struct moving_context *ctxt) do { delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0; + if (delay) { - bch2_trans_unlock(ctxt->trans); + if (delay > HZ / 10) + bch2_trans_unlock_long(ctxt->trans); + else + bch2_trans_unlock(ctxt->trans); set_current_state(TASK_INTERRUPTIBLE); } diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index 1b1e8678bfae..07cf9d42643b 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -45,6 +45,7 @@ do { \ \ if (_cond) \ break; \ + bch2_trans_unlock_long((_ctxt)->trans); \ __wait_event((_ctxt)->wait, \ bch2_moving_ctxt_next_pending_write(_ctxt) || \ (cond_finished = (_cond))); \ diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index e0efa5282a77..4d955f3cc5b2 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -128,7 +128,7 @@ static void move_buckets_wait(struct moving_context *ctxt, kfree(i); } - bch2_trans_unlock(ctxt->trans); + bch2_trans_unlock_long(ctxt->trans); } static bool bucket_in_flight(struct buckets_in_flight *list, @@ -327,7 +327,7 @@ static int bch2_copygc_thread(void *arg) while (!ret && !kthread_should_stop()) { bool did_work = false; - bch2_trans_unlock(ctxt.trans); + bch2_trans_unlock_long(ctxt.trans); cond_resched(); if (!c->copy_gc_enabled) { diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 82014cc6e271..3319190b8d9c 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -348,7 +348,7 @@ static int do_rebalance(struct moving_context *ctxt) !kthread_should_stop() && !atomic64_read(&r->work_stats.sectors_seen) && !atomic64_read(&r->scan_stats.sectors_seen)) { - bch2_trans_unlock(trans); + bch2_trans_unlock_long(trans); rebalance_wait(c); } -- cgit v1.2.3 From 385a82f62a9b46d84d545062375274bdc6f50c37 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 1 Nov 2023 15:02:44 -0400 Subject: bcachefs: serialize on cached key in early bucket allocator bcachefs had a transient bug where freespace_initialized was not properly being set, which lead to unexpected use of the early bucket allocator at runtime. This issue has been fixed, but the existence of it uncovered a coherency issue in the early bucket allocation code that is somewhat related to how uncached iterators deal with the key cache. The problem itself manifests as occasional failure of generic/113 due to corruption, often seen as a duplicate backpointer or multiple data types per-bucket error. The immediate cause of the error is a racing bucket allocation along the lines of the following sequence: - Task 1 selects key A in bch2_bucket_alloc_early() and schedules. - Task 2 selects the same key A, but proceeds to complete the allocation and associated I/O, after which it releases the open_bucket. - Task 1 resumes with key A, but does not recognize the bucket is now allocated because the open_bucket has been removed from the hash when it was released in the previous step. This generally shouldn't happen because the allocating task updates the alloc btree key before releasing the bucket. This is not sufficient in this particular instance, however, because an uncached iterator for a cached btree doesn't actually lock the key cache slot when no key exists for a given slot in the cache. Thus the fact that the allocation side updates the cached key means that multiple uncached iters can stumble across the same alloc key and duplicate the bucket allocation as described above. This is something that probably needs a longer term fix in the iterator code. As a short term fix, close the race through explicit use of a cached iterator for likely allocation candidates. We don't want to scan the btree with a cached iterator because that would unnecessarily pollute the cache. This mitigates cache pollution by primarily scanning the tree with an uncached iterator, but closes the race by creating a key cache entry for any prospective slot prior to the bucket allocation attempt (also similar to how _alloc_freelist() works via try_alloc_bucket()). This survives many iterations of generic/113 on a kernel hacked to always use the early bucket allocator. Signed-off-by: Brian Foster Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 3bc4abd3d7d5..75ecfc3472ce 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -399,12 +399,22 @@ bch2_bucket_alloc_early(struct btree_trans *trans, struct bucket_alloc_state *s, struct closure *cl) { - struct btree_iter iter; - struct bkey_s_c k; + struct btree_iter iter, citer; + struct bkey_s_c k, ck; struct open_bucket *ob = NULL; u64 alloc_start = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx); u64 alloc_cursor = max(alloc_start, READ_ONCE(ca->alloc_cursor)); int ret; + + /* + * Scan with an uncached iterator to avoid polluting the key cache. An + * uncached iter will return a cached key if one exists, but if not + * there is no other underlying protection for the associated key cache + * slot. To avoid racing bucket allocations, look up the cached key slot + * of any likely allocation candidate before attempting to proceed with + * the allocation. This provides proper exclusion on the associated + * bucket. + */ again: for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, alloc_cursor), BTREE_ITER_SLOTS, k, ret) { @@ -419,13 +429,25 @@ again: continue; a = bch2_alloc_to_v4(k, &a_convert); - if (a->data_type != BCH_DATA_free) continue; + /* now check the cached key to serialize concurrent allocs of the bucket */ + ck = bch2_bkey_get_iter(trans, &citer, BTREE_ID_alloc, k.k->p, BTREE_ITER_CACHED); + ret = bkey_err(ck); + if (ret) + break; + + a = bch2_alloc_to_v4(ck, &a_convert); + if (a->data_type != BCH_DATA_free) + goto next; + s->buckets_seen++; ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, watermark, a, s, cl); +next: + citer.path->preserve = false; + bch2_trans_iter_exit(trans, &citer); if (ob) break; } -- cgit v1.2.3 From e0fb0dccfd6fd8dd9c07adc6eafb1a12e2121a36 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 1 Nov 2023 15:02:45 -0400 Subject: bcachefs: update alloc cursor in early bucket allocator A recent bug report uncovered a scenario where a filesystem never runs with freespace_initialized, and therefore the user observes significantly degraded write performance by virtue of running the early bucket allocator. The associated bug aside, the primary cause of the performance drop in this particular instance is that the early bucket allocator does not update the allocation cursor. This means that every allocation walks the alloc btree from the first bucket of the associated device looking for a bucket marked as free space. Update the early allocator code to set the alloc cursor to the last processed position in the tree, similar to how the freelist allocator behaves. With the alloc_cursor being updated, the retry logic also needs to be updated to restart from the beginning of the device when a free bucket is not available between the cursor and the end of the device. Track the restart position in a first_bucket variable to make the code a bit more easily readable and consistent with the freelist allocator. Signed-off-by: Brian Foster Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 75ecfc3472ce..b85c7765272f 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -402,8 +402,9 @@ bch2_bucket_alloc_early(struct btree_trans *trans, struct btree_iter iter, citer; struct bkey_s_c k, ck; struct open_bucket *ob = NULL; - u64 alloc_start = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx); - u64 alloc_cursor = max(alloc_start, READ_ONCE(ca->alloc_cursor)); + u64 first_bucket = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx); + u64 alloc_start = max(first_bucket, READ_ONCE(ca->alloc_cursor)); + u64 alloc_cursor = alloc_start; int ret; /* @@ -453,13 +454,14 @@ next: } bch2_trans_iter_exit(trans, &iter); + alloc_cursor = iter.pos.offset; ca->alloc_cursor = alloc_cursor; if (!ob && ret) ob = ERR_PTR(ret); - if (!ob && alloc_cursor > alloc_start) { - alloc_cursor = alloc_start; + if (!ob && alloc_start > first_bucket) { + alloc_cursor = alloc_start = first_bucket; goto again; } -- cgit v1.2.3 From 0e91d3a6d59ed3c6630c7c50f17534f2b02d2abf Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 1 Nov 2023 09:01:02 -0400 Subject: bcachefs: fix odebug warn and lockdep splat due to on-stack rhashtable Guenter Roeck reports a lockdep splat and DEBUG_OBJECTS_WORK related warning when bch2_copygc_thread() initializes its rhashtable. The lockdep splat relates to a warning print caused by the fact that the rhashtable exists on the stack but is not annotated as so. This is something that could be addressed by INIT_WORK_ONSTACK(), but rhashtable doesn't expose that control and probably isnt worth the churn for just one user. Instead, dynamically allocate the buckets_in_flight structure and avoid the splat that way. Reported-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Brian Foster Signed-off-by: Kent Overstreet --- fs/bcachefs/movinggc.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 4d955f3cc5b2..0a0576326c5b 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -305,14 +305,16 @@ static int bch2_copygc_thread(void *arg) struct moving_context ctxt; struct bch_move_stats move_stats; struct io_clock *clock = &c->io_clock[WRITE]; - struct buckets_in_flight buckets; + struct buckets_in_flight *buckets; u64 last, wait; int ret = 0; - memset(&buckets, 0, sizeof(buckets)); - - ret = rhashtable_init(&buckets.table, &bch_move_bucket_params); + buckets = kzalloc(sizeof(struct buckets_in_flight), GFP_KERNEL); + if (!buckets) + return -ENOMEM; + ret = rhashtable_init(&buckets->table, &bch_move_bucket_params); if (ret) { + kfree(buckets); bch_err_msg(c, ret, "allocating copygc buckets in flight"); return ret; } @@ -331,12 +333,12 @@ static int bch2_copygc_thread(void *arg) cond_resched(); if (!c->copy_gc_enabled) { - move_buckets_wait(&ctxt, &buckets, true); + move_buckets_wait(&ctxt, buckets, true); kthread_wait_freezable(c->copy_gc_enabled); } if (unlikely(freezing(current))) { - move_buckets_wait(&ctxt, &buckets, true); + move_buckets_wait(&ctxt, buckets, true); __refrigerator(false); continue; } @@ -347,7 +349,7 @@ static int bch2_copygc_thread(void *arg) if (wait > clock->max_slop) { c->copygc_wait_at = last; c->copygc_wait = last + wait; - move_buckets_wait(&ctxt, &buckets, true); + move_buckets_wait(&ctxt, buckets, true); trace_and_count(c, copygc_wait, c, wait, last + wait); bch2_kthread_io_clock_wait(clock, last + wait, MAX_SCHEDULE_TIMEOUT); @@ -357,7 +359,7 @@ static int bch2_copygc_thread(void *arg) c->copygc_wait = 0; c->copygc_running = true; - ret = bch2_copygc(&ctxt, &buckets, &did_work); + ret = bch2_copygc(&ctxt, buckets, &did_work); c->copygc_running = false; wake_up(&c->copygc_running_wq); @@ -374,8 +376,10 @@ static int bch2_copygc_thread(void *arg) } } - move_buckets_wait(&ctxt, &buckets, true); - rhashtable_destroy(&buckets.table); + move_buckets_wait(&ctxt, buckets, true); + + rhashtable_destroy(&buckets->table); + kfree(buckets); bch2_moving_ctxt_exit(&ctxt); bch2_move_stats_exit(&move_stats, c); -- cgit v1.2.3 From 2a4e7497604b20b19b6d9dbd109c42900892d7c9 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 3 Nov 2023 09:09:36 -0400 Subject: bcachefs: allow writeback to fill bio completely The bcachefs folio writeback code includes a bio full check as well as a fixed size check to determine when to split off and submit writeback I/O. The inclusive check of the latter against the limit means that writeback can submit slightly prematurely. This is not a functional problem, but results in unnecessarily split I/Os and extent merging. This can be observed with a buffered write sized exactly to the current maximum value (1MB) and with key_merging_disabled=1. The latter prevents the merge from the second write such that a subsequent check of the extent list shows a 1020k extent followed by a contiguous 4k extent. The purpose for the fixed size check is also undocumented and somewhat obscure. Lift this check into a new helper that wraps the bio check, fix the comparison logic, and add a comment to document the purpose and how we might improve on this in the future. Signed-off-by: Brian Foster Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io-buffered.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index 58ccc7b91ac7..52f0e7acda3d 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -389,6 +389,21 @@ static inline struct bch_writepage_state bch_writepage_state_init(struct bch_fs return ret; } +/* + * Determine when a writepage io is full. We have to limit writepage bios to a + * single page per bvec (i.e. 1MB with 4k pages) because that is the limit to + * what the bounce path in bch2_write_extent() can handle. In theory we could + * loosen this restriction for non-bounce I/O, but we don't have that context + * here. Ideally, we can up this limit and make it configurable in the future + * when the bounce path can be enhanced to accommodate larger source bios. + */ +static inline bool bch_io_full(struct bch_writepage_io *io, unsigned len) +{ + struct bio *bio = &io->op.wbio.bio; + return bio_full(bio, len) || + (bio->bi_iter.bi_size + len > BIO_MAX_VECS * PAGE_SIZE); +} + static void bch2_writepage_io_done(struct bch_write_op *op) { struct bch_writepage_io *io = @@ -606,9 +621,7 @@ do_io: if (w->io && (w->io->op.res.nr_replicas != nr_replicas_this_write || - bio_full(&w->io->op.wbio.bio, sectors << 9) || - w->io->op.wbio.bio.bi_iter.bi_size + (sectors << 9) >= - (BIO_MAX_VECS * PAGE_SIZE) || + bch_io_full(w->io, sectors << 9) || bio_end_sector(&w->io->op.wbio.bio) != sector)) bch2_writepage_do_io(w); -- cgit v1.2.3 From 0996c72a0f300bfedf8df52a8e437435494fc204 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 3 Nov 2023 09:09:37 -0400 Subject: bcachefs: byte order swap bch_alloc_v4.fragmentation_lru field A simple test to populate a filesystem on one CPU architecture and fsck on an arch of the opposite byte order produces errors related to the fragmentation LRU. This occurs because the 64-bit fragmentation_lru field is not byte-order swapped when reads detect that the on-disk/bset key values were written in opposite byte-order of the current CPU. Update the bch2_alloc_v4 swab callback to handle fragmentation_lru as is done for other multi-byte fields. This doesn't affect existing filesystems when accessed by CPUs of the same endianness because the ->swab() callback is only called when the bset flags indicate an endianness mismatch between the CPU and on-disk data. Signed-off-by: Brian Foster Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index bcfae91667af..ad256a88cb5c 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -319,6 +319,7 @@ void bch2_alloc_v4_swab(struct bkey_s k) a->io_time[1] = swab64(a->io_time[1]); a->stripe = swab32(a->stripe); a->nr_external_backpointers = swab32(a->nr_external_backpointers); + a->fragmentation_lru = swab64(a->fragmentation_lru); bps = alloc_v4_backpointers(a); for (bp = bps; bp < bps + BCH_ALLOC_V4_NR_BACKPOINTERS(a); bp++) { -- cgit v1.2.3 From 7cb2a7895d94db2979c29e4a20f33b5557c702d5 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 3 Nov 2023 09:09:38 -0400 Subject: bcachefs: use swab40 for bch_backpointer.bucket_offset bitfield The bucket_offset field of bch_backpointer is a 40-bit bitfield, but the bch2_backpointer_swab() helper uses swab32. This leads to inconsistency when an on-disk fs is accessed from an opposite endian machine. As it turns out, we already have an internal swab40() helper that is used from the bch_alloc_v4 swab callback. Lift it into the backpointers header file and use it consistently in both places. Signed-off-by: Brian Foster Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 9 --------- fs/bcachefs/backpointers.c | 2 +- fs/bcachefs/backpointers.h | 9 +++++++++ 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index ad256a88cb5c..1fec0e67891f 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -297,15 +297,6 @@ fsck_err: return ret; } -static inline u64 swab40(u64 x) -{ - return (((x & 0x00000000ffULL) << 32)| - ((x & 0x000000ff00ULL) << 16)| - ((x & 0x0000ff0000ULL) >> 0)| - ((x & 0x00ff000000ULL) >> 16)| - ((x & 0xff00000000ULL) >> 32)); -} - void bch2_alloc_v4_swab(struct bkey_s k) { struct bch_alloc_v4 *a = bkey_s_to_alloc_v4(k).v; diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 3b79bde1ce2f..5ed96dddae08 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -77,7 +77,7 @@ void bch2_backpointer_swab(struct bkey_s k) { struct bkey_s_backpointer bp = bkey_s_to_backpointer(k); - bp.v->bucket_offset = swab32(bp.v->bucket_offset); + bp.v->bucket_offset = swab40(bp.v->bucket_offset); bp.v->bucket_len = swab32(bp.v->bucket_len); bch2_bpos_swab(&bp.v->pos); } diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h index 4ab9f3562912..ab866feeaf66 100644 --- a/fs/bcachefs/backpointers.h +++ b/fs/bcachefs/backpointers.h @@ -7,6 +7,15 @@ #include "buckets.h" #include "super.h" +static inline u64 swab40(u64 x) +{ + return (((x & 0x00000000ffULL) << 32)| + ((x & 0x000000ff00ULL) << 16)| + ((x & 0x0000ff0000ULL) >> 0)| + ((x & 0x00ff000000ULL) >> 16)| + ((x & 0xff00000000ULL) >> 32)); +} + int bch2_backpointer_invalid(struct bch_fs *, struct bkey_s_c k, enum bkey_invalid_flags, struct printbuf *); void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *); -- cgit v1.2.3 From 4bd156c4b44ef34bd57d20a0a48aad829e1c54c1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 2 Nov 2023 15:28:15 -0400 Subject: bcachefs: Fix bch2_delete_dead_inodes() - the fsck_err() check for the filesystem being clean was incorrect, causing us to always fail to delete unlinked inodes - if a snapshot had been taken, the unlinked inode needs to be propagated to snapshot leaves so the unlink can happen there - fixed. Signed-off-by: Kent Overstreet --- fs/bcachefs/inode.c | 78 +++++++++++++++++++++++++++++++++++++++-------------- fs/bcachefs/inode.h | 13 +++++++-- 2 files changed, 69 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 925d1b7f2887..789f8958f685 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -362,9 +362,10 @@ int bch2_inode_peek(struct btree_trans *trans, return ret; } -int bch2_inode_write(struct btree_trans *trans, +int bch2_inode_write_flags(struct btree_trans *trans, struct btree_iter *iter, - struct bch_inode_unpacked *inode) + struct bch_inode_unpacked *inode, + enum btree_update_flags flags) { struct bkey_inode_buf *inode_p; @@ -374,7 +375,7 @@ int bch2_inode_write(struct btree_trans *trans, bch2_inode_pack_inlined(inode_p, inode); inode_p->inode.k.p.snapshot = iter->snapshot; - return bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0); + return bch2_trans_update(trans, iter, &inode_p->inode.k_i, flags); } struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k) @@ -1055,24 +1056,18 @@ err: return ret ?: -BCH_ERR_transaction_restart_nested; } -static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos) +static int may_delete_deleted_inode(struct btree_trans *trans, + struct btree_iter *iter, + struct bpos pos, + bool *need_another_pass) { struct bch_fs *c = trans->c; - struct btree_iter iter; + struct btree_iter inode_iter; struct bkey_s_c k; struct bch_inode_unpacked inode; int ret; - if (bch2_snapshot_is_internal_node(c, pos.snapshot)) - return 0; - - if (!fsck_err_on(c->sb.clean, c, - deleted_inode_but_clean, - "filesystem marked as clean but have deleted inode %llu:%u", - pos.offset, pos.snapshot)) - return 0; - - k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, pos, BTREE_ITER_CACHED); + k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, pos, BTREE_ITER_CACHED); ret = bkey_err(k); if (ret) return ret; @@ -1086,7 +1081,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos) ret = bch2_inode_unpack(k, &inode); if (ret) - goto err; + goto out; if (fsck_err_on(S_ISDIR(inode.bi_mode), c, deleted_inode_is_dir, @@ -1100,12 +1095,46 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos) pos.offset, pos.snapshot)) goto delete; - return 1; -err: + if (c->sb.clean && + !fsck_err(c, + deleted_inode_but_clean, + "filesystem marked as clean but have deleted inode %llu:%u", + pos.offset, pos.snapshot)) { + ret = 0; + goto out; + } + + if (bch2_snapshot_is_internal_node(c, pos.snapshot)) { + struct bpos new_min_pos; + + ret = bch2_propagate_key_to_snapshot_leaves(trans, inode_iter.btree_id, k, &new_min_pos); + if (ret) + goto out; + + inode.bi_flags &= ~BCH_INODE_UNLINKED; + + ret = bch2_inode_write_flags(trans, &inode_iter, &inode, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); + bch_err_msg(c, ret, "clearing inode unlinked flag"); + if (ret) + goto out; + + /* + * We'll need another write buffer flush to pick up the new + * unlinked inodes in the snapshot leaves: + */ + *need_another_pass = true; + return 0; + } + + ret = 1; +out: fsck_err: + bch2_trans_iter_exit(trans, &inode_iter); return ret; delete: - return bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, pos, false); + ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, pos, false); + goto out; } int bch2_delete_dead_inodes(struct bch_fs *c) @@ -1113,7 +1142,10 @@ int bch2_delete_dead_inodes(struct bch_fs *c) struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; + bool need_another_pass; int ret; +again: + need_another_pass = false; ret = bch2_btree_write_buffer_flush_sync(trans); if (ret) @@ -1127,7 +1159,8 @@ int bch2_delete_dead_inodes(struct bch_fs *c) */ for_each_btree_key(trans, iter, BTREE_ID_deleted_inodes, POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) { - ret = lockrestart_do(trans, may_delete_deleted_inode(trans, k.k->p)); + ret = lockrestart_do(trans, may_delete_deleted_inode(trans, &iter, k.k->p, + &need_another_pass)); if (ret < 0) break; @@ -1137,12 +1170,17 @@ int bch2_delete_dead_inodes(struct bch_fs *c) bch2_fs_lazy_rw(c); } + bch_verbose(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot); + ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot); if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) break; } } bch2_trans_iter_exit(trans, &iter); + + if (!ret && need_another_pass) + goto again; err: bch2_trans_put(trans); diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index 74c62e6c16cc..b09fda452d6f 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -3,6 +3,7 @@ #define _BCACHEFS_INODE_H #include "bkey.h" +#include "bkey_methods.h" #include "opts.h" enum bkey_invalid_flags; @@ -101,8 +102,16 @@ void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *) int bch2_inode_peek(struct btree_trans *, struct btree_iter *, struct bch_inode_unpacked *, subvol_inum, unsigned); -int bch2_inode_write(struct btree_trans *, struct btree_iter *, - struct bch_inode_unpacked *); + +int bch2_inode_write_flags(struct btree_trans *, struct btree_iter *, + struct bch_inode_unpacked *, enum btree_update_flags); + +static inline int bch2_inode_write(struct btree_trans *trans, + struct btree_iter *iter, + struct bch_inode_unpacked *inode) +{ + return bch2_inode_write_flags(trans, iter, inode, 0); +} void bch2_inode_init_early(struct bch_fs *, struct bch_inode_unpacked *); -- cgit v1.2.3 From ce3e9a8a10086b28444cab1431dfc926787ecfcb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 2 Nov 2023 16:54:00 -0400 Subject: bcachefs: .get_parent() should return an error pointer Delete the useless check for inum == 0; we'll return -ENOENT without it, which is what we want. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 6642b88c41a0..a425c7783cda 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1213,9 +1213,6 @@ static struct dentry *bch2_get_parent(struct dentry *child) .inum = inode->ei_inode.bi_dir, }; - if (!parent_inum.inum) - return NULL; - return d_obtain_alias(bch2_vfs_inode_get(c, parent_inum)); } -- cgit v1.2.3 From 5a53f851e6fe0e7cc41e682a4a9e40bb178fb80b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 3 Nov 2023 11:55:44 -0400 Subject: bcachefs: Fix recovery when forced to use JSET_NO_FLUSH journal entry When we didn't find anything in the journal that we'd like to use, and we're forced to use whatever we can find - that entry will have been a JSET_NO_FLUSH entry with a garbage last_seq value, since it's not normally used. Initialize it to something sane, for bch2_fs_journal_start(). Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 9600b8083175..9c30500ce920 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -735,6 +735,13 @@ int bch2_fs_recovery(struct bch_fs *c) if (*i) { last_journal_entry = &(*i)->j; (*i)->ignore = false; + /* + * This was probably a NO_FLUSH entry, + * so last_seq was garbage - but we know + * we're only using a single journal + * entry, set it here: + */ + (*i)->j.last_seq = (*i)->j.seq; break; } } -- cgit v1.2.3 From 01ccee225a373d859eb6e5d42dbe0138a40a7e0a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 3 Nov 2023 12:06:18 -0400 Subject: bcachefs: Add missing printk newlines This was causing error messages in -tools to not get printed. Signed-off-by: Kent Overstreet --- fs/bcachefs/super-io.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 83bdb4368289..cceedd2bc68d 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -721,7 +721,7 @@ retry: if (opt_defined(*opts, sb)) goto err; - printk(KERN_ERR "bcachefs (%s): error reading default superblock: %s", + printk(KERN_ERR "bcachefs (%s): error reading default superblock: %s\n", path, err.buf); printbuf_reset(&err); @@ -783,7 +783,7 @@ got_super: ret = bch2_sb_validate(sb, &err, READ); if (ret) { - printk(KERN_ERR "bcachefs (%s): error validating superblock: %s", + printk(KERN_ERR "bcachefs (%s): error validating superblock: %s\n", path, err.buf); goto err_no_print; } @@ -791,7 +791,7 @@ out: printbuf_exit(&err); return ret; err: - printk(KERN_ERR "bcachefs (%s): error reading superblock: %s", + printk(KERN_ERR "bcachefs (%s): error reading superblock: %s\n", path, err.buf); err_no_print: bch2_free_super(sb); -- cgit v1.2.3 From d3c7727bb9269c7f7a2f17ef76b9e5c9b8cc8863 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 3 Nov 2023 18:30:08 -0400 Subject: bcachefs: rebalance_work btree is not a snapshots btree rebalance_work entries may refer to entries in the extents btree, which is a snapshots btree, or they may also refer to entries in the reflink btree, which is not. Hence rebalance_work keys may use the snapshot field but it's not required to be nonzero - add a new btree flag to reflect this. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 7 ++++--- fs/bcachefs/bkey_methods.c | 23 ++++++++++++++--------- fs/bcachefs/btree_iter.h | 1 + fs/bcachefs/btree_trans_commit.c | 1 + fs/bcachefs/btree_types.h | 11 +++++++++++ 5 files changed, 31 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 5b44598b9df9..a191baa518f0 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -2260,7 +2260,8 @@ LE32_BITMASK(JSET_NO_FLUSH, struct jset, flags, 5, 6); enum btree_id_flags { BTREE_ID_EXTENTS = BIT(0), BTREE_ID_SNAPSHOTS = BIT(1), - BTREE_ID_DATA = BIT(2), + BTREE_ID_SNAPSHOT_FIELD = BIT(2), + BTREE_ID_DATA = BIT(3), }; #define BCH_BTREE_IDS() \ @@ -2315,12 +2316,12 @@ enum btree_id_flags { BIT_ULL(KEY_TYPE_bucket_gens)) \ x(snapshot_trees, 15, 0, \ BIT_ULL(KEY_TYPE_snapshot_tree)) \ - x(deleted_inodes, 16, BTREE_ID_SNAPSHOTS, \ + x(deleted_inodes, 16, BTREE_ID_SNAPSHOT_FIELD, \ BIT_ULL(KEY_TYPE_set)) \ x(logged_ops, 17, 0, \ BIT_ULL(KEY_TYPE_logged_op_truncate)| \ BIT_ULL(KEY_TYPE_logged_op_finsert)) \ - x(rebalance_work, 18, BTREE_ID_SNAPSHOTS, \ + x(rebalance_work, 18, BTREE_ID_SNAPSHOT_FIELD, \ BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie)) enum btree_id { diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 2f518d7e1a64..761f5e33b1e6 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -186,15 +186,20 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, if (type != BKEY_TYPE_btree) { enum btree_id btree = type - 1; - bkey_fsck_err_on(!btree_type_has_snapshots(btree) && - k.k->p.snapshot, c, err, - bkey_snapshot_nonzero, - "nonzero snapshot"); - - bkey_fsck_err_on(btree_type_has_snapshots(btree) && - !k.k->p.snapshot, c, err, - bkey_snapshot_zero, - "snapshot == 0"); + if (btree_type_has_snapshots(btree)) { + bkey_fsck_err_on(!k.k->p.snapshot, c, err, + bkey_snapshot_zero, + "snapshot == 0"); + } else if (!btree_type_has_snapshot_field(btree)) { + bkey_fsck_err_on(k.k->p.snapshot, c, err, + bkey_snapshot_nonzero, + "nonzero snapshot"); + } else { + /* + * btree uses snapshot field but it's not required to be + * nonzero + */ + } bkey_fsck_err_on(bkey_eq(k.k->p, POS_MAX), c, err, bkey_at_pos_max, diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 5e103f519e62..967cde33d433 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -416,6 +416,7 @@ static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans, flags |= BTREE_ITER_IS_EXTENTS; if (!(flags & __BTREE_ITER_ALL_SNAPSHOTS) && + !btree_type_has_snapshot_field(btree_id) && !btree_type_has_snapshots(btree_id)) flags &= ~BTREE_ITER_ALL_SNAPSHOTS; diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 32693f7c6221..decad7b66c59 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -269,6 +269,7 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans, BUG_ON(i->level != i->path->level); BUG_ON(i->btree_id != i->path->btree_id); EBUG_ON(!i->level && + btree_type_has_snapshots(i->btree_id) && !(i->flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) && test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags) && i->k->k.p.snapshot && diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 7cc8d6b12161..a685883e5405 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -710,6 +710,17 @@ static inline bool btree_type_has_snapshots(enum btree_id id) return (1U << id) & mask; } +static inline bool btree_type_has_snapshot_field(enum btree_id id) +{ + const unsigned mask = 0 +#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_SNAPSHOT_FIELD)) << nr) + BCH_BTREE_IDS() +#undef x + ; + + return (1U << id) & mask; +} + static inline bool btree_type_has_ptrs(enum btree_id id) { const unsigned mask = 0 -- cgit v1.2.3 From 9fcdd23b6eea3f04475cd9cfb4497f6e26906061 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 4 Nov 2023 13:49:31 -0400 Subject: bcachefs: Add a comment for BTREE_INSERT_NOJOURNAL usage BTREE_INSERT_NOJOURNAL is primarily used for a performance optimization related to inode updates and fsync - document it. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 2 +- fs/bcachefs/btree_iter.h | 3 +-- fs/bcachefs/btree_types.h | 2 +- fs/bcachefs/io_write.c | 11 +++++++++++ 4 files changed, 14 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index af98545e0f35..c2adf3fbb0b3 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -257,7 +257,7 @@ static void bch2_btree_iter_verify(struct btree_iter *iter) BUG_ON(!(iter->flags & __BTREE_ITER_ALL_SNAPSHOTS) && (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) && - !btree_type_has_snapshots(iter->btree_id)); + !btree_type_has_snapshot_field(iter->btree_id)); if (iter->update_path) bch2_btree_path_verify(trans, iter->update_path); diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 967cde33d433..85e7cb52f6b6 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -416,8 +416,7 @@ static inline unsigned __bch2_btree_iter_flags(struct btree_trans *trans, flags |= BTREE_ITER_IS_EXTENTS; if (!(flags & __BTREE_ITER_ALL_SNAPSHOTS) && - !btree_type_has_snapshot_field(btree_id) && - !btree_type_has_snapshots(btree_id)) + !btree_type_has_snapshot_field(btree_id)) flags &= ~BTREE_ITER_ALL_SNAPSHOTS; if (!(flags & BTREE_ITER_ALL_SNAPSHOTS) && diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index a685883e5405..3ab773005484 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -713,7 +713,7 @@ static inline bool btree_type_has_snapshots(enum btree_id id) static inline bool btree_type_has_snapshot_field(enum btree_id id) { const unsigned mask = 0 -#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_SNAPSHOT_FIELD)) << nr) +#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr) BCH_BTREE_IDS() #undef x ; diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 613f38436640..4eb0eda723ab 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -202,6 +202,17 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, struct btree_iter iter; struct bkey_i *k; struct bkey_i_inode_v3 *inode; + /* + * Crazy performance optimization: + * Every extent update needs to also update the inode: the inode trigger + * will set bi->journal_seq to the journal sequence number of this + * transaction - for fsync. + * + * But if that's the only reason we're updating the inode (we're not + * updating bi_size or bi_sectors), then we don't need the inode update + * to be journalled - if we crash, the bi_journal_seq update will be + * lost, but that's fine. + */ unsigned inode_update_flags = BTREE_UPDATE_NOJOURNAL; int ret; -- cgit v1.2.3 From bf61dcdfc12c3890c7a062cfcd46c443883defc9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 4 Nov 2023 18:31:42 -0400 Subject: bcachefs: CONFIG_BCACHEFS_DEBUG_TRANSACTIONS no longer defaults to y BCACHEFS_DEBUG_TRANSACTIONS is useful, but it's too expensive to have on by default - and it hasn't been coming up in bug reports. Turn it off by default until we figure out a way to make it cheaper. Signed-off-by: Kent Overstreet --- fs/bcachefs/Kconfig | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig index 87dc2e934ad7..c08c2c7d6fbb 100644 --- a/fs/bcachefs/Kconfig +++ b/fs/bcachefs/Kconfig @@ -41,7 +41,6 @@ config BCACHEFS_POSIX_ACL config BCACHEFS_DEBUG_TRANSACTIONS bool "bcachefs runtime info" depends on BCACHEFS_FS - default y help This makes the list of running btree transactions available in debugfs. -- cgit v1.2.3 From 59154f2c66ce5625bc00f3e66af7b71608e991f4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 31 Oct 2023 23:43:47 -0400 Subject: bcachefs: bch2_prt_datetime() Improved, better named version of pr_time(). Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-errors.c | 5 +---- fs/bcachefs/sb-members.c | 2 +- fs/bcachefs/super-io.c | 2 +- fs/bcachefs/util.c | 18 ++++++++++++++++++ fs/bcachefs/util.h | 21 +-------------------- 5 files changed, 22 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c index 3d66f15ae8f5..f0930ab7f036 100644 --- a/fs/bcachefs/sb-errors.c +++ b/fs/bcachefs/sb-errors.c @@ -61,7 +61,6 @@ static void bch2_sb_errors_to_text(struct printbuf *out, struct bch_sb *sb, { struct bch_sb_field_errors *e = field_to_type(f, errors); unsigned i, nr = bch2_sb_field_errors_nr_entries(e); - u64 now = ktime_get_real_seconds(); if (out->nr_tabstops <= 1) printbuf_tabstop_push(out, 16); @@ -71,9 +70,7 @@ static void bch2_sb_errors_to_text(struct printbuf *out, struct bch_sb *sb, prt_tab(out); prt_u64(out, BCH_SB_ERROR_ENTRY_NR(&e->entries[i])); prt_tab(out); - bch2_pr_time_units(out, (now - le64_to_cpu(e->entries[i].last_error_time)) * - NSEC_PER_SEC); - prt_str(out, " ago"); + bch2_prt_datetime(out, le64_to_cpu(e->entries[i].last_error_time)); prt_newline(out); } } diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index ab5de12eca4a..09d5453707fa 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -235,7 +235,7 @@ static void member_to_text(struct printbuf *out, prt_printf(out, "Last mount:"); prt_tab(out); if (m.last_mount) - pr_time(out, le64_to_cpu(m.last_mount)); + bch2_prt_datetime(out, le64_to_cpu(m.last_mount)); else prt_printf(out, "(never)"); prt_newline(out); diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index cceedd2bc68d..f4cad903f4d6 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -1183,7 +1183,7 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, prt_printf(out, "Created:"); prt_tab(out); if (sb->time_base_lo) - pr_time(out, div_u64(le64_to_cpu(sb->time_base_lo), NSEC_PER_SEC)); + bch2_prt_datetime(out, div_u64(le64_to_cpu(sb->time_base_lo), NSEC_PER_SEC)); else prt_printf(out, "(not set)"); prt_newline(out); diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 08bac0ba8d0b..84b142fcc3df 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -467,6 +467,24 @@ static void bch2_pr_time_units_aligned(struct printbuf *out, u64 ns) prt_printf(out, "%s", u->name); } +#ifndef __KERNEL__ +#include +void bch2_prt_datetime(struct printbuf *out, time64_t sec) +{ + time_t t = sec; + char buf[64]; + ctime_r(&t, buf); + prt_str(out, buf); +} +#else +void bch2_prt_datetime(struct printbuf *out, time64_t sec) +{ + char buf[64]; + snprintf(buf, sizeof(buf), "%ptT", &sec); + prt_u64(out, sec); +} +#endif + #define TABSTOP_SIZE 12 static inline void pr_name_and_units(struct printbuf *out, const char *name, u64 ns) diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 849a37ae497c..2984b57b2958 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -245,26 +245,7 @@ do { \ #define prt_bitflags(...) bch2_prt_bitflags(__VA_ARGS__) void bch2_pr_time_units(struct printbuf *, u64); - -#ifdef __KERNEL__ -static inline void pr_time(struct printbuf *out, u64 time) -{ - prt_printf(out, "%llu", time); -} -#else -#include -static inline void pr_time(struct printbuf *out, u64 _time) -{ - char time_str[64]; - time_t time = _time; - struct tm *tm = localtime(&time); - size_t err = strftime(time_str, sizeof(time_str), "%c", tm); - if (!err) - prt_printf(out, "(formatting error)"); - else - prt_printf(out, "%s", time_str); -} -#endif +void bch2_prt_datetime(struct printbuf *, time64_t); #ifdef __KERNEL__ static inline void uuid_unparse_lower(u8 *uuid, char *out) -- cgit v1.2.3 From 0f0fc312380b93d203be3282b2cbaee7016f2b72 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 31 Oct 2023 23:19:59 -0400 Subject: bcachefs: Move __bch2_members_v2_get_mut to sb-members.h Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-members.c | 17 ++++++----------- fs/bcachefs/sb-members.h | 11 ++++++++--- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 09d5453707fa..bed0f857fe5b 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -21,19 +21,14 @@ char * const bch2_member_error_strs[] = { /* Code for bch_sb_field_members_v1: */ -static struct bch_member *members_v2_get_mut(struct bch_sb_field_members_v2 *mi, int i) -{ - return (void *) mi->_members + (i * le16_to_cpu(mi->member_bytes)); -} - struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i) { - return members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i); + return __bch2_members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i); } static struct bch_member members_v2_get(struct bch_sb_field_members_v2 *mi, int i) { - struct bch_member ret, *p = members_v2_get_mut(mi, i); + struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i); memset(&ret, 0, sizeof(ret)); memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret))); return ret; @@ -75,7 +70,7 @@ static int sb_members_v2_resize_entries(struct bch_fs *c) for (int i = c->disk_sb.sb->nr_devices - 1; i >= 0; --i) { void *dst = (void *) mi->_members + (i * sizeof(struct bch_member)); - memmove(dst, members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes)); + memmove(dst, __bch2_members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes)); memset(dst + le16_to_cpu(mi->member_bytes), 0, (sizeof(struct bch_member) - le16_to_cpu(mi->member_bytes))); } @@ -118,7 +113,7 @@ int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb) mi2 = bch2_sb_field_get(disk_sb->sb, members_v2); for (unsigned i = 0; i < disk_sb->sb->nr_devices; i++) - memcpy(members_v1_get_mut(mi1, i), members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES); + memcpy(members_v1_get_mut(mi1, i), __bch2_members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES); return 0; } @@ -332,7 +327,7 @@ static int bch2_sb_members_v2_validate(struct bch_sb *sb, struct printbuf *err) { struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2); - size_t mi_bytes = (void *) members_v2_get_mut(mi, sb->nr_devices) - + size_t mi_bytes = (void *) __bch2_members_v2_get_mut(mi, sb->nr_devices) - (void *) mi; if (mi_bytes > vstruct_bytes(&mi->field)) { @@ -363,7 +358,7 @@ void bch2_sb_members_from_cpu(struct bch_fs *c) rcu_read_lock(); for_each_member_device_rcu(ca, c, i, NULL) { - struct bch_member *m = members_v2_get_mut(mi, i); + struct bch_member *m = __bch2_members_v2_get_mut(mi, i); for (e = 0; e < BCH_MEMBER_ERROR_NR; e++) m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e])); diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index 1583e80afcbf..03613e3eb8e3 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -4,6 +4,12 @@ extern char * const bch2_member_error_strs[]; +static inline struct bch_member * +__bch2_members_v2_get_mut(struct bch_sb_field_members_v2 *mi, unsigned i) +{ + return (void *) mi->_members + (i * le16_to_cpu(mi->member_bytes)); +} + int bch2_sb_members_v2_init(struct bch_fs *c); int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb); struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i); @@ -186,11 +192,10 @@ static inline bool bch2_member_exists(struct bch_member *m) return !bch2_is_zero(&m->uuid, sizeof(m->uuid)); } -static inline bool bch2_dev_exists(struct bch_sb *sb, - unsigned dev) +static inline bool bch2_dev_exists(struct bch_sb *sb, unsigned dev) { if (dev < sb->nr_devices) { - struct bch_member m = bch2_sb_member_get(sb, dev); + struct bch_member m = bch2_sb_member_get(sb, dev); return bch2_member_exists(&m); } return false; -- cgit v1.2.3 From d4c8bb69d0208907f98af6a0f4da02778f2d7bdb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 31 Oct 2023 22:35:49 -0400 Subject: bcachefs: Convert bch2_fs_open() to darray Open coded dynamic arrays are deprecated. Signed-off-by: Kent Overstreet --- fs/bcachefs/darray.h | 6 ++++++ fs/bcachefs/super.c | 60 ++++++++++++++++++++++++---------------------------- 2 files changed, 34 insertions(+), 32 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h index 114f86b45fd5..87b4b2d1ec76 100644 --- a/fs/bcachefs/darray.h +++ b/fs/bcachefs/darray.h @@ -69,9 +69,15 @@ static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more, _ret; \ }) +#define darray_remove_item(_d, _pos) \ + array_remove_item((_d)->data, (_d)->nr, (_pos) - (_d)->data) + #define darray_for_each(_d, _i) \ for (_i = (_d).data; _i < (_d).data + (_d).nr; _i++) +#define darray_for_each_reverse(_d, _i) \ + for (_i = (_d).data + (_d).nr - 1; _i >= (_d).data; --_i) + #define darray_init(_d) \ do { \ (_d)->data = NULL; \ diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 1b5c2a1bd68a..24672bb31cbe 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1885,9 +1885,9 @@ found: struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, struct bch_opts opts) { - struct bch_sb_handle *sb = NULL; + DARRAY(struct bch_sb_handle) sbs = { 0 }; struct bch_fs *c = NULL; - unsigned i, best_sb = 0; + struct bch_sb_handle *sb, *best = NULL; struct printbuf errbuf = PRINTBUF; int ret = 0; @@ -1899,49 +1899,46 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, goto err; } - sb = kcalloc(nr_devices, sizeof(*sb), GFP_KERNEL); - if (!sb) { - ret = -ENOMEM; + ret = darray_make_room(&sbs, nr_devices); + if (ret) goto err; - } - for (i = 0; i < nr_devices; i++) { - ret = bch2_read_super(devices[i], &opts, &sb[i]); + for (unsigned i = 0; i < nr_devices; i++) { + struct bch_sb_handle sb = { NULL }; + + ret = bch2_read_super(devices[i], &opts, &sb); if (ret) goto err; + BUG_ON(darray_push(&sbs, sb)); } - for (i = 1; i < nr_devices; i++) - if (le64_to_cpu(sb[i].sb->seq) > - le64_to_cpu(sb[best_sb].sb->seq)) - best_sb = i; - - i = 0; - while (i < nr_devices) { - if (i != best_sb && - !bch2_dev_exists(sb[best_sb].sb, sb[i].sb->dev_idx)) { - pr_info("%pg has been removed, skipping", sb[i].bdev); - bch2_free_super(&sb[i]); - array_remove_item(sb, nr_devices, i); + darray_for_each(sbs, sb) + if (!best || le64_to_cpu(sb->sb->seq) > le64_to_cpu(best->sb->seq)) + best = sb; + + darray_for_each_reverse(sbs, sb) { + if (sb != best && !bch2_dev_exists(best->sb, sb->sb->dev_idx)) { + pr_info("%pg has been removed, skipping", sb->bdev); + bch2_free_super(sb); + darray_remove_item(&sbs, sb); + best -= best > sb; continue; } - ret = bch2_dev_in_fs(sb[best_sb].sb, sb[i].sb); + ret = bch2_dev_in_fs(best->sb, sb->sb); if (ret) goto err_print; - i++; } - c = bch2_fs_alloc(sb[best_sb].sb, opts); - if (IS_ERR(c)) { - ret = PTR_ERR(c); + c = bch2_fs_alloc(best->sb, opts); + ret = PTR_ERR_OR_ZERO(c); + if (ret) goto err; - } down_write(&c->state_lock); - for (i = 0; i < nr_devices; i++) { - ret = bch2_dev_attach_bdev(c, &sb[i]); + darray_for_each(sbs, sb) { + ret = bch2_dev_attach_bdev(c, sb); if (ret) { up_write(&c->state_lock); goto err; @@ -1960,7 +1957,9 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, goto err; } out: - kfree(sb); + darray_for_each(sbs, sb) + bch2_free_super(sb); + darray_exit(&sbs); printbuf_exit(&errbuf); module_put(THIS_MODULE); return c; @@ -1970,9 +1969,6 @@ err_print: err: if (!IS_ERR_OR_NULL(c)) bch2_fs_stop(c); - if (sb) - for (i = 0; i < nr_devices; i++) - bch2_free_super(&sb[i]); c = ERR_PTR(ret); goto out; } -- cgit v1.2.3 From 103ffe9aaf85660f40c8b68797a374b80b29b91d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 2 Nov 2023 11:42:48 -0400 Subject: bcachefs: x-macro-ify inode flags enum This lets us use bch2_prt_bitflags to print them out. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 52 ++++++++++++++++++++----------------------- fs/bcachefs/fs-common.c | 2 +- fs/bcachefs/fs-ioctl.c | 4 ++-- fs/bcachefs/fs-ioctl.h | 28 +++++++++++------------ fs/bcachefs/fs.c | 6 ++--- fs/bcachefs/fsck.c | 34 ++++++++++++++-------------- fs/bcachefs/inode.c | 38 +++++++++++++++++++------------ fs/bcachefs/inode.h | 6 ++--- fs/bcachefs/io_write.c | 2 +- 9 files changed, 89 insertions(+), 83 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index a191baa518f0..0a750953ff92 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -824,34 +824,30 @@ enum inode_opt_id { Inode_opt_nr, }; -enum { - /* - * User flags (get/settable with FS_IOC_*FLAGS, correspond to FS_*_FL - * flags) - */ - __BCH_INODE_SYNC = 0, - __BCH_INODE_IMMUTABLE = 1, - __BCH_INODE_APPEND = 2, - __BCH_INODE_NODUMP = 3, - __BCH_INODE_NOATIME = 4, - - __BCH_INODE_I_SIZE_DIRTY = 5, /* obsolete */ - __BCH_INODE_I_SECTORS_DIRTY = 6, /* obsolete */ - __BCH_INODE_UNLINKED = 7, - __BCH_INODE_BACKPTR_UNTRUSTED = 8, - - /* bits 20+ reserved for packed fields below: */ -}; - -#define BCH_INODE_SYNC (1 << __BCH_INODE_SYNC) -#define BCH_INODE_IMMUTABLE (1 << __BCH_INODE_IMMUTABLE) -#define BCH_INODE_APPEND (1 << __BCH_INODE_APPEND) -#define BCH_INODE_NODUMP (1 << __BCH_INODE_NODUMP) -#define BCH_INODE_NOATIME (1 << __BCH_INODE_NOATIME) -#define BCH_INODE_I_SIZE_DIRTY (1 << __BCH_INODE_I_SIZE_DIRTY) -#define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY) -#define BCH_INODE_UNLINKED (1 << __BCH_INODE_UNLINKED) -#define BCH_INODE_BACKPTR_UNTRUSTED (1 << __BCH_INODE_BACKPTR_UNTRUSTED) +#define BCH_INODE_FLAGS() \ + x(sync, 0) \ + x(immutable, 1) \ + x(append, 2) \ + x(nodump, 3) \ + x(noatime, 4) \ + x(i_size_dirty, 5) \ + x(i_sectors_dirty, 6) \ + x(unlinked, 7) \ + x(backptr_untrusted, 8) + +/* bits 20+ reserved for packed fields below: */ + +enum bch_inode_flags { +#define x(t, n) BCH_INODE_##t = 1U << n, + BCH_INODE_FLAGS() +#undef x +}; + +enum __bch_inode_flags { +#define x(t, n) __BCH_INODE_##t = n, + BCH_INODE_FLAGS() +#undef x +}; LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24); LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 31); diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c index bb5305441f27..4496cf91a4c1 100644 --- a/fs/bcachefs/fs-common.c +++ b/fs/bcachefs/fs-common.c @@ -51,7 +51,7 @@ int bch2_create_trans(struct btree_trans *trans, bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u); if (flags & BCH_CREATE_TMPFILE) - new_inode->bi_flags |= BCH_INODE_UNLINKED; + new_inode->bi_flags |= BCH_INODE_unlinked; ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu); if (ret) diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 6040bd3f0778..5a39bcb597a3 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -45,13 +45,13 @@ static int bch2_inode_flags_set(struct btree_trans *trans, unsigned newflags = s->flags; unsigned oldflags = bi->bi_flags & s->mask; - if (((newflags ^ oldflags) & (BCH_INODE_APPEND|BCH_INODE_IMMUTABLE)) && + if (((newflags ^ oldflags) & (BCH_INODE_append|BCH_INODE_immutable)) && !capable(CAP_LINUX_IMMUTABLE)) return -EPERM; if (!S_ISREG(bi->bi_mode) && !S_ISDIR(bi->bi_mode) && - (newflags & (BCH_INODE_NODUMP|BCH_INODE_NOATIME)) != newflags) + (newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags) return -EINVAL; if (s->set_projinherit) { diff --git a/fs/bcachefs/fs-ioctl.h b/fs/bcachefs/fs-ioctl.h index 54a9c21a3b83..d30f9bb056fd 100644 --- a/fs/bcachefs/fs-ioctl.h +++ b/fs/bcachefs/fs-ioctl.h @@ -6,28 +6,28 @@ /* bcachefs inode flags -> vfs inode flags: */ static const __maybe_unused unsigned bch_flags_to_vfs[] = { - [__BCH_INODE_SYNC] = S_SYNC, - [__BCH_INODE_IMMUTABLE] = S_IMMUTABLE, - [__BCH_INODE_APPEND] = S_APPEND, - [__BCH_INODE_NOATIME] = S_NOATIME, + [__BCH_INODE_sync] = S_SYNC, + [__BCH_INODE_immutable] = S_IMMUTABLE, + [__BCH_INODE_append] = S_APPEND, + [__BCH_INODE_noatime] = S_NOATIME, }; /* bcachefs inode flags -> FS_IOC_GETFLAGS: */ static const __maybe_unused unsigned bch_flags_to_uflags[] = { - [__BCH_INODE_SYNC] = FS_SYNC_FL, - [__BCH_INODE_IMMUTABLE] = FS_IMMUTABLE_FL, - [__BCH_INODE_APPEND] = FS_APPEND_FL, - [__BCH_INODE_NODUMP] = FS_NODUMP_FL, - [__BCH_INODE_NOATIME] = FS_NOATIME_FL, + [__BCH_INODE_sync] = FS_SYNC_FL, + [__BCH_INODE_immutable] = FS_IMMUTABLE_FL, + [__BCH_INODE_append] = FS_APPEND_FL, + [__BCH_INODE_nodump] = FS_NODUMP_FL, + [__BCH_INODE_noatime] = FS_NOATIME_FL, }; /* bcachefs inode flags -> FS_IOC_FSGETXATTR: */ static const __maybe_unused unsigned bch_flags_to_xflags[] = { - [__BCH_INODE_SYNC] = FS_XFLAG_SYNC, - [__BCH_INODE_IMMUTABLE] = FS_XFLAG_IMMUTABLE, - [__BCH_INODE_APPEND] = FS_XFLAG_APPEND, - [__BCH_INODE_NODUMP] = FS_XFLAG_NODUMP, - [__BCH_INODE_NOATIME] = FS_XFLAG_NOATIME, + [__BCH_INODE_sync] = FS_XFLAG_SYNC, + [__BCH_INODE_immutable] = FS_XFLAG_IMMUTABLE, + [__BCH_INODE_append] = FS_XFLAG_APPEND, + [__BCH_INODE_nodump] = FS_XFLAG_NODUMP, + [__BCH_INODE_noatime] = FS_XFLAG_NOATIME, //[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT; }; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index a425c7783cda..8dbc848f25b8 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -764,15 +764,15 @@ static int bch2_getattr(struct mnt_idmap *idmap, stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime); } - if (inode->ei_inode.bi_flags & BCH_INODE_IMMUTABLE) + if (inode->ei_inode.bi_flags & BCH_INODE_immutable) stat->attributes |= STATX_ATTR_IMMUTABLE; stat->attributes_mask |= STATX_ATTR_IMMUTABLE; - if (inode->ei_inode.bi_flags & BCH_INODE_APPEND) + if (inode->ei_inode.bi_flags & BCH_INODE_append) stat->attributes |= STATX_ATTR_APPEND; stat->attributes_mask |= STATX_ATTR_APPEND; - if (inode->ei_inode.bi_flags & BCH_INODE_NODUMP) + if (inode->ei_inode.bi_flags & BCH_INODE_nodump) stat->attributes |= STATX_ATTR_NODUMP; stat->attributes_mask |= STATX_ATTR_NODUMP; diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 0e470ebd7f10..9f3e9bd3d767 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -854,9 +854,9 @@ static int check_inode(struct btree_trans *trans, BUG_ON(bch2_inode_unpack(k, &u)); if (!full && - !(u.bi_flags & (BCH_INODE_I_SIZE_DIRTY| - BCH_INODE_I_SECTORS_DIRTY| - BCH_INODE_UNLINKED))) + !(u.bi_flags & (BCH_INODE_i_size_dirty| + BCH_INODE_i_sectors_dirty| + BCH_INODE_unlinked))) return 0; if (prev->bi_inum != u.bi_inum) @@ -870,7 +870,7 @@ static int check_inode(struct btree_trans *trans, return -EINVAL; } - if ((u.bi_flags & (BCH_INODE_I_SIZE_DIRTY|BCH_INODE_UNLINKED)) && + if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) && bch2_key_has_snapshot_overwrites(trans, BTREE_ID_inodes, k.k->p)) { struct bpos new_min_pos; @@ -878,7 +878,7 @@ static int check_inode(struct btree_trans *trans, if (ret) goto err; - u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY|BCH_INODE_UNLINKED; + u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked; ret = __write_inode(trans, &u, iter->pos.snapshot); bch_err_msg(c, ret, "in fsck updating inode"); @@ -890,7 +890,7 @@ static int check_inode(struct btree_trans *trans, return 0; } - if (u.bi_flags & BCH_INODE_UNLINKED && + if (u.bi_flags & BCH_INODE_unlinked && (!c->sb.clean || fsck_err(c, inode_unlinked_but_clean, "filesystem marked clean, but inode %llu unlinked", @@ -903,7 +903,7 @@ static int check_inode(struct btree_trans *trans, return ret; } - if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY && + if (u.bi_flags & BCH_INODE_i_size_dirty && (!c->sb.clean || fsck_err(c, inode_i_size_dirty_but_clean, "filesystem marked clean, but inode %llu has i_size dirty", @@ -930,13 +930,13 @@ static int check_inode(struct btree_trans *trans, * We truncated without our normal sector accounting hook, just * make sure we recalculate it: */ - u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY; + u.bi_flags |= BCH_INODE_i_sectors_dirty; - u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY; + u.bi_flags &= ~BCH_INODE_i_size_dirty; do_update = true; } - if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY && + if (u.bi_flags & BCH_INODE_i_sectors_dirty && (!c->sb.clean || fsck_err(c, inode_i_sectors_dirty_but_clean, "filesystem marked clean, but inode %llu has i_sectors dirty", @@ -953,14 +953,14 @@ static int check_inode(struct btree_trans *trans, } u.bi_sectors = sectors; - u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY; + u.bi_flags &= ~BCH_INODE_i_sectors_dirty; do_update = true; } - if (u.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) { + if (u.bi_flags & BCH_INODE_backptr_untrusted) { u.bi_dir = 0; u.bi_dir_offset = 0; - u.bi_flags &= ~BCH_INODE_BACKPTR_UNTRUSTED; + u.bi_flags &= ~BCH_INODE_backptr_untrusted; do_update = true; } @@ -1065,7 +1065,7 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) return -BCH_ERR_internal_fsck_err; } - if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY), + if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty), c, inode_i_sectors_wrong, "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", w->last_pos.inode, i->snapshot, @@ -1405,7 +1405,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, continue; if (k.k->type != KEY_TYPE_whiteout) { - if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) && + if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_size_dirty) && k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && !bkey_extent_is_reservation(k), c, extent_past_end_of_inode, @@ -1588,7 +1588,7 @@ static int check_dirent_target(struct btree_trans *trans, "inode %llu type %s has multiple links but i_nlink 0", target->bi_inum, bch2_d_types[d.v->d_type])) { target->bi_nlink++; - target->bi_flags &= ~BCH_INODE_UNLINKED; + target->bi_flags &= ~BCH_INODE_unlinked; ret = __write_inode(trans, target, target_snapshot); if (ret) @@ -2160,7 +2160,7 @@ int bch2_check_directory_structure(struct bch_fs *c) break; } - if (u.bi_flags & BCH_INODE_UNLINKED) + if (u.bi_flags & BCH_INODE_unlinked) continue; ret = check_path(trans, &path, &u, iter.pos.snapshot); diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 789f8958f685..def77f2d8802 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -20,13 +20,18 @@ #include -const char * const bch2_inode_opts[] = { #define x(name, ...) #name, +const char * const bch2_inode_opts[] = { BCH_INODE_OPTS() -#undef x NULL, }; +static const char * const bch2_inode_flag_strs[] = { + BCH_INODE_FLAGS() + NULL +}; +#undef x + static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 }; static int inode_decode_field(const u8 *in, const u8 *end, @@ -426,7 +431,7 @@ static int __bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, struct prin inode_compression_type_invalid, "invalid compression opt %u", unpacked.bi_compression - 1); - bkey_fsck_err_on((unpacked.bi_flags & BCH_INODE_UNLINKED) && + bkey_fsck_err_on((unpacked.bi_flags & BCH_INODE_unlinked) && unpacked.bi_nlink != 0, c, err, inode_unlinked_but_nlink_nonzero, "flagged as unlinked but bi_nlink != 0"); @@ -500,15 +505,20 @@ fsck_err: static void __bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode) { - prt_printf(out, "mode %o flags %x journal_seq %llu bi_size %llu bi_sectors %llu bi_version %llu", - inode->bi_mode, inode->bi_flags, + prt_printf(out, "mode=%o ", inode->bi_mode); + + prt_str(out, "flags="); + prt_bitflags(out, bch2_inode_flag_strs, inode->bi_flags & ((1U << 20) - 1)); + prt_printf(out, " (%x)", inode->bi_flags); + + prt_printf(out, " journal_seq=%llu bi_size=%llu bi_sectors=%llu bi_version=%llu", inode->bi_journal_seq, inode->bi_size, inode->bi_sectors, inode->bi_version); #define x(_name, _bits) \ - prt_printf(out, " "#_name " %llu", (u64) inode->_name); + prt_printf(out, " "#_name "=%llu", (u64) inode->_name); BCH_INODE_FIELDS_v3() #undef x } @@ -547,7 +557,7 @@ static inline u64 bkey_inode_flags(struct bkey_s_c k) static inline bool bkey_is_deleted_inode(struct bkey_s_c k) { - return bkey_inode_flags(k) & BCH_INODE_UNLINKED; + return bkey_inode_flags(k) & BCH_INODE_unlinked; } int bch2_trans_mark_inode(struct btree_trans *trans, @@ -928,8 +938,8 @@ int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum, int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi) { - if (bi->bi_flags & BCH_INODE_UNLINKED) - bi->bi_flags &= ~BCH_INODE_UNLINKED; + if (bi->bi_flags & BCH_INODE_unlinked) + bi->bi_flags &= ~BCH_INODE_unlinked; else { if (bi->bi_nlink == U32_MAX) return -EINVAL; @@ -942,13 +952,13 @@ int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi) void bch2_inode_nlink_dec(struct btree_trans *trans, struct bch_inode_unpacked *bi) { - if (bi->bi_nlink && (bi->bi_flags & BCH_INODE_UNLINKED)) { + if (bi->bi_nlink && (bi->bi_flags & BCH_INODE_unlinked)) { bch2_trans_inconsistent(trans, "inode %llu unlinked but link count nonzero", bi->bi_inum); return; } - if (bi->bi_flags & BCH_INODE_UNLINKED) { + if (bi->bi_flags & BCH_INODE_unlinked) { bch2_trans_inconsistent(trans, "inode %llu link count underflow", bi->bi_inum); return; } @@ -956,7 +966,7 @@ void bch2_inode_nlink_dec(struct btree_trans *trans, struct bch_inode_unpacked * if (bi->bi_nlink) bi->bi_nlink--; else - bi->bi_flags |= BCH_INODE_UNLINKED; + bi->bi_flags |= BCH_INODE_unlinked; } struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *inode) @@ -1089,7 +1099,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans, pos.offset, pos.snapshot)) goto delete; - if (fsck_err_on(!(inode.bi_flags & BCH_INODE_UNLINKED), c, + if (fsck_err_on(!(inode.bi_flags & BCH_INODE_unlinked), c, deleted_inode_not_unlinked, "non-deleted inode %llu:%u in deleted_inodes btree", pos.offset, pos.snapshot)) @@ -1111,7 +1121,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans, if (ret) goto out; - inode.bi_flags &= ~BCH_INODE_UNLINKED; + inode.bi_flags &= ~BCH_INODE_unlinked; ret = bch2_inode_write_flags(trans, &inode_iter, &inode, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index b09fda452d6f..88818a332b1e 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -186,7 +186,7 @@ static inline unsigned nlink_bias(umode_t mode) static inline unsigned bch2_inode_nlink_get(struct bch_inode_unpacked *bi) { - return bi->bi_flags & BCH_INODE_UNLINKED + return bi->bi_flags & BCH_INODE_unlinked ? 0 : bi->bi_nlink + nlink_bias(bi->bi_mode); } @@ -196,10 +196,10 @@ static inline void bch2_inode_nlink_set(struct bch_inode_unpacked *bi, { if (nlink) { bi->bi_nlink = nlink - nlink_bias(bi->bi_mode); - bi->bi_flags &= ~BCH_INODE_UNLINKED; + bi->bi_flags &= ~BCH_INODE_unlinked; } else { bi->bi_nlink = 0; - bi->bi_flags |= BCH_INODE_UNLINKED; + bi->bi_flags |= BCH_INODE_unlinked; } } diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 4eb0eda723ab..f02b3f7d26a0 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -234,7 +234,7 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, inode = bkey_i_to_inode_v3(k); - if (!(le64_to_cpu(inode->v.bi_flags) & BCH_INODE_I_SIZE_DIRTY) && + if (!(le64_to_cpu(inode->v.bi_flags) & BCH_INODE_i_size_dirty) && new_i_size > le64_to_cpu(inode->v.bi_size)) { inode->v.bi_size = cpu_to_le64(new_i_size); inode_update_flags = 0; -- cgit v1.2.3 From a8958a1a95b28e16dbca0654eeb6aa458bb1d3b0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 2 Nov 2023 19:33:48 -0400 Subject: bcachefs: bkey_copy() is no longer a macro Signed-off-by: Kent Overstreet --- fs/bcachefs/bkey.h | 22 +++++++++------------- fs/bcachefs/bkey_sort.c | 6 +++--- fs/bcachefs/btree_io.c | 4 ++-- fs/bcachefs/btree_update_interior.h | 2 +- 4 files changed, 15 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index 518450209236..831be01809f2 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -92,19 +92,15 @@ enum bkey_lr_packed { #define bkey_lr_packed(_l, _r) \ ((_l)->format + ((_r)->format << 1)) -#define bkey_copy(_dst, _src) \ -do { \ - BUILD_BUG_ON(!type_is(_dst, struct bkey_i *) && \ - !type_is(_dst, struct bkey_packed *)); \ - BUILD_BUG_ON(!type_is(_src, struct bkey_i *) && \ - !type_is(_src, struct bkey_packed *)); \ - EBUG_ON((u64 *) (_dst) > (u64 *) (_src) && \ - (u64 *) (_dst) < (u64 *) (_src) + \ - ((struct bkey *) (_src))->u64s); \ - \ - memcpy_u64s_small((_dst), (_src), \ - ((struct bkey *) (_src))->u64s); \ -} while (0) +static inline void bkey_p_copy(struct bkey_packed *dst, const struct bkey_packed *src) +{ + memcpy_u64s_small(dst, src, src->u64s); +} + +static inline void bkey_copy(struct bkey_i *dst, const struct bkey_i *src) +{ + memcpy_u64s_small(dst, src, src->k.u64s); +} struct btree; diff --git a/fs/bcachefs/bkey_sort.c b/fs/bcachefs/bkey_sort.c index b9aa027c881b..bcca9e76a0b4 100644 --- a/fs/bcachefs/bkey_sort.c +++ b/fs/bcachefs/bkey_sort.c @@ -106,7 +106,7 @@ bch2_key_sort_fix_overlapping(struct bch_fs *c, struct bset *dst, while ((k = sort_iter_peek(iter))) { if (!bkey_deleted(k) && !should_drop_next_key(iter)) { - bkey_copy(out, k); + bkey_p_copy(out, k); btree_keys_account_key_add(&nr, 0, out); out = bkey_p_next(out); } @@ -137,7 +137,7 @@ bch2_sort_repack(struct bset *dst, struct btree *src, continue; if (!transform) - bkey_copy(out, in); + bkey_p_copy(out, in); else if (bch2_bkey_transform(out_f, out, bkey_packed(in) ? in_f : &bch2_bkey_format_current, in)) out->format = KEY_FORMAT_LOCAL_BTREE; @@ -191,7 +191,7 @@ unsigned bch2_sort_keys(struct bkey_packed *dst, memcpy_u64s_small(out, in, bkeyp_key_u64s(f, in)); set_bkeyp_val_u64s(f, out, 0); } else { - bkey_copy(out, in); + bkey_p_copy(out, in); } out->needs_whiteout |= needs_whiteout; out = bkey_p_next(out); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 4d2d6f93500d..37d896edb06e 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -184,7 +184,7 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b) k = new_whiteouts; while (ptrs != ptrs_end) { - bkey_copy(k, *ptrs); + bkey_p_copy(k, *ptrs); k = bkey_p_next(k); ptrs++; } @@ -260,7 +260,7 @@ static bool bch2_drop_whiteouts(struct btree *b, enum compact_mode mode) n = bkey_p_next(k); if (!bkey_deleted(k)) { - bkey_copy(out, k); + bkey_p_copy(out, k); out = bkey_p_next(out); } else { BUG_ON(k->needs_whiteout); diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index d92b3cf5f5e0..c2ffeb30884d 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -303,7 +303,7 @@ static inline void push_whiteout(struct bch_fs *c, struct btree *b, k.needs_whiteout = true; b->whiteout_u64s += k.u64s; - bkey_copy(unwritten_whiteouts_start(c, b), &k); + bkey_p_copy(unwritten_whiteouts_start(c, b), &k); } /* -- cgit v1.2.3 From a973de85e3976f3418f35cf82112190fac2eeddb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 3 Nov 2023 11:40:32 -0400 Subject: bcachefs: Replace ERANGE with private error codes We avoid using standard error codes: private, per-callsite error codes make debugging easier. Signed-off-by: Kent Overstreet --- fs/bcachefs/errcode.h | 2 ++ fs/bcachefs/opts.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 2a11f32cf30a..68a1a96bb7ca 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -3,6 +3,8 @@ #define _BCACHEFS_ERRCODE_H #define BCH_ERRCODES() \ + x(ERANGE, ERANGE_option_too_small) \ + x(ERANGE, ERANGE_option_too_big) \ x(ENOMEM, ENOMEM_stripe_buf) \ x(ENOMEM, ENOMEM_replicas_table) \ x(ENOMEM, ENOMEM_cpu_replicas) \ diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c index 4ad5880664b0..8dd4046cca41 100644 --- a/fs/bcachefs/opts.c +++ b/fs/bcachefs/opts.c @@ -265,14 +265,14 @@ int bch2_opt_validate(const struct bch_option *opt, u64 v, struct printbuf *err) if (err) prt_printf(err, "%s: too small (min %llu)", opt->attr.name, opt->min); - return -ERANGE; + return -BCH_ERR_ERANGE_option_too_small; } if (opt->max && v >= opt->max) { if (err) prt_printf(err, "%s: too big (max %llu)", opt->attr.name, opt->max); - return -ERANGE; + return -BCH_ERR_ERANGE_option_too_big; } if ((opt->flags & OPT_SB_FIELD_SECTORS) && (v & 511)) { -- cgit v1.2.3 From 80396a47490936f73729548310ad60e9f5df61c9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 2 Nov 2023 21:01:25 -0400 Subject: bcachefs: Break up bch2_journal_write() Split up bch2_journal_write() to simplify locking: - bch2_journal_write_pick_flush(), which needs j->lock - bch2_journal_write_prep, which operates on the journal buffer to be written and will need the upcoming buf_lock for synchronization with the btree write buffer flush path Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_io.c | 163 ++++++++++++++++++++++++++--------------------- 1 file changed, 92 insertions(+), 71 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 65878542940d..392e90d4d4fb 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1724,68 +1724,18 @@ static void bch2_journal_entries_postprocess(struct bch_fs *c, struct jset *jset jset->u64s = cpu_to_le32((u64 *) prev - jset->_data); } -void bch2_journal_write(struct closure *cl) +static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) { - struct journal *j = container_of(cl, struct journal, io); struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct bch_dev *ca; - struct journal_buf *w = journal_last_unwritten_buf(j); - struct bch_replicas_padded replicas; struct jset_entry *start, *end; struct jset *jset; - struct bio *bio; - struct printbuf journal_debug_buf = PRINTBUF; + unsigned sectors, bytes, u64s; bool validate_before_checksum = false; - unsigned i, sectors, bytes, u64s, nr_rw_members = 0; int ret; - BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb)); - journal_buf_realloc(j, w); jset = w->data; - j->write_start_time = local_clock(); - - spin_lock(&j->lock); - - /* - * If the journal is in an error state - we did an emergency shutdown - - * we prefer to continue doing journal writes. We just mark them as - * noflush so they'll never be used, but they'll still be visible by the - * list_journal tool - this helps in debugging. - * - * There's a caveat: the first journal write after marking the - * superblock dirty must always be a flush write, because on startup - * from a clean shutdown we didn't necessarily read the journal and the - * new journal write might overwrite whatever was in the journal - * previously - we can't leave the journal without any flush writes in - * it. - * - * So if we're in an error state, and we're still starting up, we don't - * write anything at all. - */ - if (!test_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags) && - (bch2_journal_error(j) || - w->noflush || - (!w->must_flush && - (jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) && - test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags)))) { - w->noflush = true; - SET_JSET_NO_FLUSH(jset, true); - jset->last_seq = 0; - w->last_seq = 0; - - j->nr_noflush_writes++; - } else if (!bch2_journal_error(j)) { - j->last_flush_write = jiffies; - j->nr_flush_writes++; - clear_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags); - } else { - spin_unlock(&j->lock); - goto err; - } - spin_unlock(&j->lock); - /* * New btree roots are set by journalling them; when the journal entry * gets written we have to propagate them to c->btree_roots @@ -1816,7 +1766,7 @@ void bch2_journal_write(struct closure *cl) bch2_fs_fatal_error(c, "aieeee! journal write overran available space, %zu > %u (extra %u reserved %u/%u)", vstruct_bytes(jset), w->sectors << 9, u64s, w->u64s_reserved, j->entry_u64s_reserved); - goto err; + return -EINVAL; } jset->magic = cpu_to_le64(jset_magic(c)); @@ -1835,37 +1785,115 @@ void bch2_journal_write(struct closure *cl) validate_before_checksum = true; if (validate_before_checksum && - jset_validate(c, NULL, jset, 0, WRITE)) - goto err; + (ret = jset_validate(c, NULL, jset, 0, WRITE))) + return ret; ret = bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset->encrypted_start, vstruct_end(jset) - (void *) jset->encrypted_start); if (bch2_fs_fatal_err_on(ret, c, "error decrypting journal entry: %i", ret)) - goto err; + return ret; jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset); if (!validate_before_checksum && - jset_validate(c, NULL, jset, 0, WRITE)) - goto err; + (ret = jset_validate(c, NULL, jset, 0, WRITE))) + return ret; memset((void *) jset + bytes, 0, (sectors << 9) - bytes); + return 0; +} + +static int bch2_journal_write_pick_flush(struct journal *j, struct journal_buf *w) +{ + struct bch_fs *c = container_of(j, struct bch_fs, journal); + int error = bch2_journal_error(j); + + /* + * If the journal is in an error state - we did an emergency shutdown - + * we prefer to continue doing journal writes. We just mark them as + * noflush so they'll never be used, but they'll still be visible by the + * list_journal tool - this helps in debugging. + * + * There's a caveat: the first journal write after marking the + * superblock dirty must always be a flush write, because on startup + * from a clean shutdown we didn't necessarily read the journal and the + * new journal write might overwrite whatever was in the journal + * previously - we can't leave the journal without any flush writes in + * it. + * + * So if we're in an error state, and we're still starting up, we don't + * write anything at all. + */ + if (error && test_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags)) + return -EIO; + + if (error || + w->noflush || + (!w->must_flush && + (jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) && + test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags))) { + w->noflush = true; + SET_JSET_NO_FLUSH(w->data, true); + w->data->last_seq = 0; + w->last_seq = 0; + + j->nr_noflush_writes++; + } else { + j->last_flush_write = jiffies; + j->nr_flush_writes++; + clear_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags); + } + + return 0; +} + +void bch2_journal_write(struct closure *cl) +{ + struct journal *j = container_of(cl, struct journal, io); + struct bch_fs *c = container_of(j, struct bch_fs, journal); + struct bch_dev *ca; + struct journal_buf *w = journal_last_unwritten_buf(j); + struct bch_replicas_padded replicas; + struct bio *bio; + struct printbuf journal_debug_buf = PRINTBUF; + unsigned i, nr_rw_members = 0; + int ret; + + BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb)); + + j->write_start_time = local_clock(); -retry_alloc: spin_lock(&j->lock); - ret = journal_write_alloc(j, w); + ret = bch2_journal_write_pick_flush(j, w); + spin_unlock(&j->lock); + if (ret) + goto err; + + ret = bch2_journal_write_prep(j, w); + if (ret) + goto err; + + while (1) { + spin_lock(&j->lock); + ret = journal_write_alloc(j, w); + if (!ret || !j->can_discard) + break; - if (ret && j->can_discard) { spin_unlock(&j->lock); bch2_journal_do_discards(j); - goto retry_alloc; } - if (ret) + if (ret) { __bch2_journal_debug_to_text(&journal_debug_buf, j); + spin_unlock(&j->lock); + bch_err(c, "Unable to allocate journal write:\n%s", + journal_debug_buf.buf); + printbuf_exit(&journal_debug_buf); + goto err; + } /* * write is allocated, no longer need to account for it in @@ -1880,13 +1908,6 @@ retry_alloc: bch2_journal_space_available(j); spin_unlock(&j->lock); - if (ret) { - bch_err(c, "Unable to allocate journal write:\n%s", - journal_debug_buf.buf); - printbuf_exit(&journal_debug_buf); - goto err; - } - w->devs_written = bch2_bkey_devs(bkey_i_to_s_c(&w->key)); if (c->opts.nochanges) @@ -1908,7 +1929,7 @@ retry_alloc: if (ret) goto err; - if (!JSET_NO_FLUSH(jset) && w->separate_flush) { + if (!JSET_NO_FLUSH(w->data) && w->separate_flush) { for_each_rw_member(ca, c, i) { percpu_ref_get(&ca->io_ref); -- cgit v1.2.3 From 769b3600495b8a2ea3c2136121800ce6b566a457 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 2 Nov 2023 21:43:26 -0400 Subject: bcachefs: Don't iterate over journal entries just for btree roots Small performance optimization, and a bit of a code cleanup too. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_interior.c | 12 +++------ fs/bcachefs/btree_update_interior.h | 2 +- fs/bcachefs/journal_io.c | 53 +++++++++++++++++-------------------- fs/bcachefs/sb-clean.c | 2 +- 4 files changed, 29 insertions(+), 40 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 89ada89eafe7..39c2db68123b 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -2418,23 +2418,17 @@ void bch2_journal_entry_to_btree_root(struct bch_fs *c, struct jset_entry *entry struct jset_entry * bch2_btree_roots_to_journal_entries(struct bch_fs *c, - struct jset_entry *start, - struct jset_entry *end) + struct jset_entry *end, + unsigned long skip) { - struct jset_entry *entry; - unsigned long have = 0; unsigned i; - for (entry = start; entry < end; entry = vstruct_next(entry)) - if (entry->type == BCH_JSET_ENTRY_btree_root) - __set_bit(entry->btree_id, &have); - mutex_lock(&c->btree_root_lock); for (i = 0; i < btree_id_nr_alive(c); i++) { struct btree_root *r = bch2_btree_id_root(c, i); - if (r->alive && !test_bit(i, &have)) { + if (r->alive && !test_bit(i, &skip)) { journal_entry_set(end, BCH_JSET_ENTRY_btree_root, i, r->level, &r->key, r->key.k.u64s); end = vstruct_next(end); diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index c2ffeb30884d..4df21512d640 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -325,7 +325,7 @@ bool bch2_btree_interior_updates_flush(struct bch_fs *); void bch2_journal_entry_to_btree_root(struct bch_fs *, struct jset_entry *); struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *, - struct jset_entry *, struct jset_entry *); + struct jset_entry *, unsigned long); void bch2_do_pending_node_rewrites(struct bch_fs *); void bch2_free_pending_node_rewrites(struct bch_fs *); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 392e90d4d4fb..f4bc2cdbfdd7 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1678,9 +1678,15 @@ static void do_journal_write(struct closure *cl) continue_at(cl, journal_write_done, c->io_complete_wq); } -static void bch2_journal_entries_postprocess(struct bch_fs *c, struct jset *jset) +static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) { - struct jset_entry *i, *next, *prev = NULL; + struct bch_fs *c = container_of(j, struct bch_fs, journal); + struct jset_entry *start, *end, *i, *next, *prev = NULL; + struct jset *jset = w->data; + unsigned sectors, bytes, u64s; + bool validate_before_checksum = false; + unsigned long btree_roots_have = 0; + int ret; /* * Simple compaction, dropping empty jset_entries (from journal @@ -1697,8 +1703,20 @@ static void bch2_journal_entries_postprocess(struct bch_fs *c, struct jset *jset if (!u64s) continue; - if (i->type == BCH_JSET_ENTRY_btree_root) + /* + * New btree roots are set by journalling them; when the journal + * entry gets written we have to propagate them to + * c->btree_roots + * + * But, every journal entry we write has to contain all the + * btree roots (at least for now); so after we copy btree roots + * to c->btree_roots we have to get any missing btree roots and + * add them to this journal entry: + */ + if (i->type == BCH_JSET_ENTRY_btree_root) { bch2_journal_entry_to_btree_root(c, i); + __set_bit(i->btree_id, &btree_roots_have); + } /* Can we merge with previous entry? */ if (prev && @@ -1722,35 +1740,10 @@ static void bch2_journal_entries_postprocess(struct bch_fs *c, struct jset *jset prev = prev ? vstruct_next(prev) : jset->start; jset->u64s = cpu_to_le32((u64 *) prev - jset->_data); -} - -static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) -{ - struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct jset_entry *start, *end; - struct jset *jset; - unsigned sectors, bytes, u64s; - bool validate_before_checksum = false; - int ret; - - journal_buf_realloc(j, w); - jset = w->data; - - /* - * New btree roots are set by journalling them; when the journal entry - * gets written we have to propagate them to c->btree_roots - * - * But, every journal entry we write has to contain all the btree roots - * (at least for now); so after we copy btree roots to c->btree_roots we - * have to get any missing btree roots and add them to this journal - * entry: - */ - - bch2_journal_entries_postprocess(c, jset); start = end = vstruct_last(jset); - end = bch2_btree_roots_to_journal_entries(c, jset->start, end); + end = bch2_btree_roots_to_journal_entries(c, end, btree_roots_have); bch2_journal_super_entries_add_common(c, &end, le64_to_cpu(jset->seq)); @@ -1872,6 +1865,8 @@ void bch2_journal_write(struct closure *cl) if (ret) goto err; + journal_buf_realloc(j, w); + ret = bch2_journal_write_prep(j, w); if (ret) goto err; diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c index 9b6cc86d264a..e151ada1c8bd 100644 --- a/fs/bcachefs/sb-clean.c +++ b/fs/bcachefs/sb-clean.c @@ -376,7 +376,7 @@ void bch2_fs_mark_clean(struct bch_fs *c) entry = sb_clean->start; bch2_journal_super_entries_add_common(c, &entry, 0); - entry = bch2_btree_roots_to_journal_entries(c, entry, entry); + entry = bch2_btree_roots_to_journal_entries(c, entry, 0); BUG_ON((void *) entry > vstruct_end(&sb_clean->field)); memset(entry, 0, -- cgit v1.2.3 From da4aa3b00123b8a588d23482993751e88bbaa324 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 3 Nov 2023 23:56:14 -0400 Subject: bcachefs: bch2_stripe_to_text() now prints ptr gens Signed-off-by: Kent Overstreet --- fs/bcachefs/ec.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 5da0e7a69323..62a3ccd71dc4 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -150,6 +150,7 @@ void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c, prt_printf(out, " %u:%llu:%u", ptr->dev, b, offset); if (i < nr_data) prt_printf(out, "#%u", stripe_blockcount_get(s, i)); + prt_printf(out, " gen %u", ptr->gen); if (ptr_stale(ca, ptr)) prt_printf(out, " stale"); } -- cgit v1.2.3 From aa982665887590a9443f12323fdf508a22d8c86f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 4 Nov 2023 00:25:52 -0400 Subject: bcachefs: bch2_ec_read_extent() now takes btree_trans We're not supposed to have more than one btree_trans at a time in a given thread - that causes recursive locking deadlocks. Signed-off-by: Kent Overstreet --- fs/bcachefs/ec.c | 10 +++------- fs/bcachefs/ec.h | 2 +- fs/bcachefs/io_read.c | 2 +- 3 files changed, 5 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 62a3ccd71dc4..ee6416b1c576 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -476,14 +476,10 @@ err: return ret; } -static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe) -{ - return bch2_trans_run(c, get_stripe_key_trans(trans, idx, stripe)); -} - /* recovery read path: */ -int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio) +int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio) { + struct bch_fs *c = trans->c; struct ec_stripe_buf *buf; struct closure cl; struct bch_stripe *v; @@ -498,7 +494,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio) if (!buf) return -BCH_ERR_ENOMEM_ec_read_extent; - ret = get_stripe_key(c, rbio->pick.ec.idx, buf); + ret = lockrestart_do(trans, get_stripe_key_trans(trans, rbio->pick.ec.idx, buf)); if (ret) { bch_err_ratelimited(c, "error doing reconstruct read: error %i looking up stripe", ret); diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 61c67aa0aa49..7d0237c9819f 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -199,7 +199,7 @@ struct ec_stripe_head { struct ec_stripe_new *s; }; -int bch2_ec_read_extent(struct bch_fs *, struct bch_read_bio *); +int bch2_ec_read_extent(struct btree_trans *, struct bch_read_bio *); void *bch2_writepoint_ec_buf(struct bch_fs *, struct write_point *); diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index ae36fc485f5f..a56ed553dc15 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -1025,7 +1025,7 @@ get_bio: trans->notrace_relock_fail = true; } else { /* Attempting reconstruct read: */ - if (bch2_ec_read_extent(c, rbio)) { + if (bch2_ec_read_extent(trans, rbio)) { bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); goto out; } -- cgit v1.2.3 From daba90f2da9d1a32b94552207f8dad5adb646a5c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 4 Nov 2023 21:22:34 -0400 Subject: bcachefs: kill thing_it_points_to arg to backpointer_not_found() This can be calculated locally. Signed-off-by: Kent Overstreet --- fs/bcachefs/backpointers.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 5ed96dddae08..2cf93ad2f96d 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -220,8 +220,7 @@ out: static void backpointer_not_found(struct btree_trans *trans, struct bpos bp_pos, struct bch_backpointer bp, - struct bkey_s_c k, - const char *thing_it_points_to) + struct bkey_s_c k) { struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; @@ -231,7 +230,7 @@ static void backpointer_not_found(struct btree_trans *trans, return; prt_printf(&buf, "backpointer doesn't match %s it points to:\n ", - thing_it_points_to); + bp.level ? "btree node" : "extent"); prt_printf(&buf, "bucket: "); bch2_bpos_to_text(&buf, bucket); prt_printf(&buf, "\n "); @@ -303,7 +302,7 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, return bkey_s_c_null; } - backpointer_not_found(trans, bp_pos, bp, k, "extent"); + backpointer_not_found(trans, bp_pos, bp, k); } return bkey_s_c_null; @@ -338,8 +337,7 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans, if (b && btree_node_will_make_reachable(b)) { b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); } else { - backpointer_not_found(trans, bp_pos, bp, - bkey_i_to_s_c(&b->key), "btree node"); + backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key)); b = NULL; } err: @@ -797,7 +795,8 @@ static int check_one_backpointer(struct btree_trans *trans, if (fsck_err_on(!k.k, c, backpointer_to_missing_ptr, - "backpointer for missing extent\n %s", + "backpointer for missing %s\n %s", + bp.v->level ? "btree node" : "extent", (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p); goto out; -- cgit v1.2.3 From 853960d00b4b3df96acbf8e18980896f9115c45c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 4 Nov 2023 20:22:56 -0400 Subject: bcachefs: Simplify, fix bch2_backpointer_get_key() - backpointer_not_found() checks backpointers_no_use_write_buffer, no need to do it inbackpointer_get_key(). - always use backpointer_get_node() for pointers to nodes: backpointer_get_key() was sometimes returning the key from the root node unlocked. Signed-off-by: Kent Overstreet --- fs/bcachefs/backpointers.c | 77 ++++++++++++++++++++-------------------------- 1 file changed, 33 insertions(+), 44 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 2cf93ad2f96d..ef02c9bb0354 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -5,6 +5,7 @@ #include "backpointers.h" #include "btree_cache.h" #include "btree_update.h" +#include "btree_update_interior.h" #include "btree_write_buffer.h" #include "error.h" @@ -226,6 +227,11 @@ static void backpointer_not_found(struct btree_trans *trans, struct printbuf buf = PRINTBUF; struct bpos bucket = bp_pos_to_bucket(c, bp_pos); + /* + * If we're using the btree write buffer, the backpointer we were + * looking at may have already been deleted - failure to find what it + * pointed to is not an error: + */ if (likely(!bch2_backpointers_no_use_write_buffer)) return; @@ -256,56 +262,37 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, struct bch_backpointer bp, unsigned iter_flags) { - struct bch_fs *c = trans->c; - struct btree_root *r = bch2_btree_id_root(c, bp.btree_id); - struct bpos bucket = bp_pos_to_bucket(c, bp_pos); - struct bkey_s_c k; - - bch2_trans_node_iter_init(trans, iter, - bp.btree_id, - bp.pos, - 0, - min(bp.level, r->level), - iter_flags); - k = bch2_btree_iter_peek_slot(iter); - if (bkey_err(k)) { - bch2_trans_iter_exit(trans, iter); - return k; - } - - if (bp.level == r->level + 1) - k = bkey_i_to_s_c(&r->key); - - if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp)) - return k; - - bch2_trans_iter_exit(trans, iter); + if (likely(!bp.level)) { + struct bch_fs *c = trans->c; + struct bpos bucket = bp_pos_to_bucket(c, bp_pos); + struct bkey_s_c k; + + bch2_trans_node_iter_init(trans, iter, + bp.btree_id, + bp.pos, + 0, 0, + iter_flags); + k = bch2_btree_iter_peek_slot(iter); + if (bkey_err(k)) { + bch2_trans_iter_exit(trans, iter); + return k; + } - if (unlikely(bch2_backpointers_no_use_write_buffer)) { - if (bp.level) { - struct btree *b; + if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp)) + return k; - /* - * If a backpointer for a btree node wasn't found, it may be - * because it was overwritten by a new btree node that hasn't - * been written out yet - backpointer_get_node() checks for - * this: - */ - b = bch2_backpointer_get_node(trans, iter, bp_pos, bp); - if (!IS_ERR_OR_NULL(b)) - return bkey_i_to_s_c(&b->key); + bch2_trans_iter_exit(trans, iter); + backpointer_not_found(trans, bp_pos, bp, k); + return bkey_s_c_null; + } else { + struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp); + if (IS_ERR_OR_NULL(b)) { bch2_trans_iter_exit(trans, iter); - - if (IS_ERR(b)) - return bkey_s_c_err(PTR_ERR(b)); - return bkey_s_c_null; + return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null; } - - backpointer_not_found(trans, bp_pos, bp, k); + return bkey_i_to_s_c(&b->key); } - - return bkey_s_c_null; } struct btree *bch2_backpointer_get_node(struct btree_trans *trans, @@ -329,6 +316,8 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans, if (IS_ERR(b)) goto err; + BUG_ON(b->c.level != bp.level - 1); + if (b && extent_matches_bp(c, bp.btree_id, bp.level, bkey_i_to_s_c(&b->key), bucket, bp)) -- cgit v1.2.3 From c7046ed0cf9bb33599aa7e72e7b67bba4be42d64 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 4 Nov 2023 22:34:37 -0400 Subject: bcachefs: Improve stripe checksum error message We now include the name of the device in the error message - and also increment the number of checksum errors on that device. Signed-off-by: Kent Overstreet --- fs/bcachefs/ec.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index ee6416b1c576..875f7c5a6fca 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -304,16 +304,21 @@ static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf) struct bch_csum got = ec_block_checksum(buf, i, offset); if (bch2_crc_cmp(want, got)) { - struct printbuf buf2 = PRINTBUF; + struct printbuf err = PRINTBUF; + struct bch_dev *ca = bch_dev_bkey_exists(c, v->ptrs[i].dev); + + prt_printf(&err, "stripe checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)\n", + want.hi, want.lo, + got.hi, got.lo, + bch2_csum_types[v->csum_type]); + prt_printf(&err, " for %ps at %u of\n ", (void *) _RET_IP_, i); + bch2_bkey_val_to_text(&err, c, bkey_i_to_s_c(&buf->key)); + bch_err_ratelimited(ca, "%s", err.buf); + printbuf_exit(&err); - bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(&buf->key)); - - bch_err_ratelimited(c, - "stripe checksum error for %ps at %u:%u: csum type %u, expected %llx got %llx\n%s", - (void *) _RET_IP_, i, j, v->csum_type, - want.lo, got.lo, buf2.buf); - printbuf_exit(&buf2); clear_bit(i, buf->valid); + + bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); break; } -- cgit v1.2.3