diff options
Diffstat (limited to 'fs/bcachefs/btree_io.c')
-rw-r--r-- | fs/bcachefs/btree_io.c | 117 |
1 files changed, 62 insertions, 55 deletions
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index debb0edc3455..cbf8f5d90602 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -23,6 +23,18 @@ #include <linux/sched/mm.h> +static void bch2_btree_node_header_to_text(struct printbuf *out, struct btree_node *bn) +{ + prt_printf(out, "btree=%s l=%u seq %llux\n", + bch2_btree_id_str(BTREE_NODE_ID(bn)), + (unsigned) BTREE_NODE_LEVEL(bn), bn->keys.seq); + prt_str(out, "min: "); + bch2_bpos_to_text(out, bn->min_key); + prt_newline(out); + prt_str(out, "max: "); + bch2_bpos_to_text(out, bn->max_key); +} + void bch2_btree_node_io_unlock(struct btree *b) { EBUG_ON(!btree_node_write_in_flight(b)); @@ -217,7 +229,6 @@ static bool should_compact_bset(struct btree *b, struct bset_tree *t, static bool bch2_drop_whiteouts(struct btree *b, enum compact_mode mode) { - struct bset_tree *t; bool ret = false; for_each_bset(b, t) { @@ -288,8 +299,7 @@ bool bch2_compact_whiteouts(struct bch_fs *c, struct btree *b, static void btree_node_sort(struct bch_fs *c, struct btree *b, unsigned start_idx, - unsigned end_idx, - bool filter_whiteouts) + unsigned end_idx) { struct btree_node *out; struct sort_iter_stack sort_iter; @@ -320,7 +330,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b, start_time = local_clock(); - u64s = bch2_sort_keys(out->keys.start, &sort_iter.iter, filter_whiteouts); + u64s = bch2_sort_keys(out->keys.start, &sort_iter.iter); out->keys.u64s = cpu_to_le16(u64s); @@ -426,13 +436,12 @@ static bool btree_node_compact(struct bch_fs *c, struct btree *b) break; if (b->nsets - unwritten_idx > 1) { - btree_node_sort(c, b, unwritten_idx, - b->nsets, false); + btree_node_sort(c, b, unwritten_idx, b->nsets); ret = true; } if (unwritten_idx > 1) { - btree_node_sort(c, b, 0, unwritten_idx, false); + btree_node_sort(c, b, 0, unwritten_idx); ret = true; } @@ -441,8 +450,6 @@ static bool btree_node_compact(struct bch_fs *c, struct btree *b) void bch2_btree_build_aux_trees(struct btree *b) { - struct bset_tree *t; - for_each_bset(b, t) bch2_bset_build_aux_tree(b, t, !bset_written(b, bset(b, t)) && @@ -524,7 +531,9 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c, prt_printf(out, "at btree "); bch2_btree_pos_to_text(out, c, b); - prt_printf(out, "\n node offset %u/%u", + printbuf_indent_add(out, 2); + + prt_printf(out, "\nnode offset %u/%u", b->written, btree_ptr_sectors_written(&b->key)); if (i) prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s)); @@ -543,6 +552,7 @@ static int __btree_err(int ret, const char *fmt, ...) { struct printbuf out = PRINTBUF; + bool silent = c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes; va_list args; btree_err_msg(&out, c, ca, b, i, b->written, write); @@ -564,12 +574,14 @@ static int __btree_err(int ret, if (!have_retry && ret == -BCH_ERR_btree_node_read_err_must_retry) ret = -BCH_ERR_btree_node_read_err_bad_node; - if (ret != -BCH_ERR_btree_node_read_err_fixable) + if (!silent && ret != -BCH_ERR_btree_node_read_err_fixable) bch2_sb_error_count(c, err_type); switch (ret) { case -BCH_ERR_btree_node_read_err_fixable: - ret = bch2_fsck_err(c, FSCK_CAN_FIX, err_type, "%s", out.buf); + ret = !silent + ? bch2_fsck_err(c, FSCK_CAN_FIX, err_type, "%s", out.buf) + : -BCH_ERR_fsck_fix; if (ret != -BCH_ERR_fsck_fix && ret != -BCH_ERR_fsck_ignore) goto fsck_err; @@ -577,14 +589,17 @@ static int __btree_err(int ret, break; case -BCH_ERR_btree_node_read_err_want_retry: case -BCH_ERR_btree_node_read_err_must_retry: - bch2_print_string_as_lines(KERN_ERR, out.buf); + if (!silent) + bch2_print_string_as_lines(KERN_ERR, out.buf); break; case -BCH_ERR_btree_node_read_err_bad_node: - bch2_print_string_as_lines(KERN_ERR, out.buf); + if (!silent) + bch2_print_string_as_lines(KERN_ERR, out.buf); ret = bch2_topology_error(c); break; case -BCH_ERR_btree_node_read_err_incompatible: - bch2_print_string_as_lines(KERN_ERR, out.buf); + if (!silent) + bch2_print_string_as_lines(KERN_ERR, out.buf); ret = -BCH_ERR_fsck_errors_not_fixed; break; default: @@ -619,8 +634,6 @@ fsck_err: __cold void bch2_btree_node_drop_keys_outside_node(struct btree *b) { - struct bset_tree *t; - for_each_bset(b, t) { struct bset *i = bset(b, t); struct bkey_packed *k; @@ -1021,18 +1034,19 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, btree_node_bad_seq, - "got wrong btree node (want %llx got %llx)\n" - "got btree %s level %llu pos %s", - bp->seq, b->data->keys.seq, - bch2_btree_id_str(BTREE_NODE_ID(b->data)), - BTREE_NODE_LEVEL(b->data), - buf.buf); + "got wrong btree node: got\n%s", + (printbuf_reset(&buf), + bch2_btree_node_header_to_text(&buf, b->data), + buf.buf)); } else { btree_err_on(!b->data->keys.seq, -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, btree_node_bad_seq, - "bad btree header: seq 0"); + "bad btree header: seq 0\n%s", + (printbuf_reset(&buf), + bch2_btree_node_header_to_text(&buf, b->data), + buf.buf)); } while (b->written < (ptr_written ?: btree_sectors(c))) { @@ -1095,7 +1109,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, nonce = btree_nonce(i, b->written << 9); struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); csum_bad = bch2_crc_cmp(bne->csum, csum); - if (csum_bad) + if (ca && csum_bad) bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); btree_err_on(csum_bad, @@ -1249,12 +1263,14 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_node_reset_sib_u64s(b); + rcu_read_lock(); bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) { - struct bch_dev *ca2 = bch_dev_bkey_exists(c, ptr->dev); + struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev); - if (ca2->mi.state != BCH_MEMBER_STATE_rw) + if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) set_btree_node_need_rewrite(b); } + rcu_read_unlock(); if (!ptr_written) set_btree_node_need_rewrite(b); @@ -1279,8 +1295,8 @@ static void btree_node_read_work(struct work_struct *work) struct btree_read_bio *rb = container_of(work, struct btree_read_bio, work); struct bch_fs *c = rb->c; + struct bch_dev *ca = rb->have_ioref ? bch2_dev_have_ref(c, rb->pick.ptr.dev) : NULL; struct btree *b = rb->b; - struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev); struct bio *bio = &rb->bio; struct bch_io_failures failed = { .nr = 0 }; struct printbuf buf = PRINTBUF; @@ -1292,8 +1308,8 @@ static void btree_node_read_work(struct work_struct *work) while (1) { retry = true; bch_info(c, "retrying read"); - ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev); - rb->have_ioref = bch2_dev_get_ioref(ca, READ); + ca = bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ); + rb->have_ioref = ca != NULL; bio_reset(bio, NULL, REQ_OP_READ|REQ_SYNC|REQ_META); bio->bi_iter.bi_sector = rb->pick.ptr.offset; bio->bi_iter.bi_size = btree_buf_bytes(b); @@ -1307,7 +1323,7 @@ static void btree_node_read_work(struct work_struct *work) start: printbuf_reset(&buf); bch2_btree_pos_to_text(&buf, c, b); - bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read, + bch2_dev_io_err_on(ca && bio->bi_status, ca, BCH_MEMBER_ERROR_read, "btree read error %s for %s", bch2_blk_status_to_str(bio->bi_status), buf.buf); if (rb->have_ioref) @@ -1363,7 +1379,7 @@ static void btree_node_read_endio(struct bio *bio) struct bch_fs *c = rb->c; if (rb->have_ioref) { - struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev); + struct bch_dev *ca = bch2_dev_have_ref(c, rb->pick.ptr.dev); bch2_latency_acct(ca, rb->start_time, READ); } @@ -1560,7 +1576,7 @@ static void btree_node_read_all_replicas_endio(struct bio *bio) struct btree_node_read_all *ra = rb->ra; if (rb->have_ioref) { - struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev); + struct bch_dev *ca = bch2_dev_have_ref(c, rb->pick.ptr.dev); bch2_latency_acct(ca, rb->start_time, READ); } @@ -1602,14 +1618,14 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool i = 0; bkey_for_each_ptr_decode(k.k, ptrs, pick, entry) { - struct bch_dev *ca = bch_dev_bkey_exists(c, pick.ptr.dev); + struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ); struct btree_read_bio *rb = container_of(ra->bio[i], struct btree_read_bio, bio); rb->c = c; rb->b = b; rb->ra = ra; rb->start_time = local_clock(); - rb->have_ioref = bch2_dev_get_ioref(ca, READ); + rb->have_ioref = ca != NULL; rb->idx = i; rb->pick = pick; rb->bio.bi_iter.bi_sector = pick.ptr.offset; @@ -1679,7 +1695,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, return; } - ca = bch_dev_bkey_exists(c, pick.ptr.dev); + ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ); bio = bio_alloc_bioset(NULL, buf_pages(b->data, btree_buf_bytes(b)), @@ -1691,7 +1707,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, rb->b = b; rb->ra = NULL; rb->start_time = local_clock(); - rb->have_ioref = bch2_dev_get_ioref(ca, READ); + rb->have_ioref = ca != NULL; rb->pick = pick; INIT_WORK(&rb->work, btree_node_read_work); bio->bi_iter.bi_sector = pick.ptr.offset; @@ -1846,7 +1862,6 @@ static void btree_node_write_work(struct work_struct *work) container_of(work, struct btree_write_bio, work); struct bch_fs *c = wbio->wbio.c; struct btree *b = wbio->wbio.bio.bi_private; - struct bch_extent_ptr *ptr; int ret = 0; btree_bounce_free(c, @@ -1896,13 +1911,14 @@ static void btree_node_write_endio(struct bio *bio) struct btree_write_bio *wb = container_of(orig, struct btree_write_bio, wbio); struct bch_fs *c = wbio->c; struct btree *b = wbio->bio.bi_private; - struct bch_dev *ca = bch_dev_bkey_exists(c, wbio->dev); + struct bch_dev *ca = wbio->have_ioref ? bch2_dev_have_ref(c, wbio->dev) : NULL; unsigned long flags; if (wbio->have_ioref) bch2_latency_acct(ca, wbio->submit_time, WRITE); - if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write, + if (!ca || + bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write, "btree write error: %s", bch2_blk_status_to_str(bio->bi_status)) || bch2_meta_write_fault("btree")) { @@ -1969,7 +1985,6 @@ static void btree_write_submit(struct work_struct *work) void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags) { struct btree_write_bio *wbio; - struct bset_tree *t; struct bset *i; struct btree_node *bn = NULL; struct btree_node_entry *bne = NULL; @@ -2095,11 +2110,11 @@ do_write: unwritten_whiteouts_end(b)); SET_BSET_SEPARATE_WHITEOUTS(i, false); - b->whiteout_u64s = 0; - - u64s = bch2_sort_keys(i->start, &sort_iter.iter, false); + u64s = bch2_sort_keys_keep_unwritten_whiteouts(i->start, &sort_iter.iter); le16_add_cpu(&i->u64s, u64s); + b->whiteout_u64s = 0; + BUG_ON(!b->written && i->u64s != b->data->keys.u64s); set_needs_whiteout(i, false); @@ -2226,7 +2241,6 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b) { bool invalidated_iter = false; struct btree_node_entry *bne; - struct bset_tree *t; if (!btree_node_just_written(b)) return false; @@ -2249,7 +2263,7 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b) * single bset: */ if (b->nsets > 1) { - btree_node_sort(c, b, 0, b->nsets, true); + btree_node_sort(c, b, 0, b->nsets); invalidated_iter = true; } else { invalidated_iter = bch2_drop_whiteouts(b, COMPACT_ALL); @@ -2346,20 +2360,13 @@ void bch2_btree_write_stats_to_text(struct printbuf *out, struct bch_fs *c) printbuf_tabstop_push(out, 20); printbuf_tabstop_push(out, 10); - prt_tab(out); - prt_str(out, "nr"); - prt_tab(out); - prt_str(out, "size"); - prt_newline(out); + prt_printf(out, "\tnr\tsize\n"); for (unsigned i = 0; i < BTREE_WRITE_TYPE_NR; i++) { u64 nr = atomic64_read(&c->btree_write_stats[i].nr); u64 bytes = atomic64_read(&c->btree_write_stats[i].bytes); - prt_printf(out, "%s:", bch2_btree_write_types[i]); - prt_tab(out); - prt_u64(out, nr); - prt_tab(out); + prt_printf(out, "%s:\t%llu\t", bch2_btree_write_types[i], nr); prt_human_readable_u64(out, nr ? div64_u64(bytes, nr) : 0); prt_newline(out); } |