diff options
Diffstat (limited to 'drivers/md/bcache/bset.c')
-rw-r--r-- | drivers/md/bcache/bset.c | 289 |
1 files changed, 146 insertions, 143 deletions
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 22d1ae72c282..7d388b8bb50e 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -14,22 +14,12 @@ /* Keylists */ -void bch_keylist_copy(struct keylist *dest, struct keylist *src) -{ - *dest = *src; - - if (src->list == src->d) { - size_t n = (uint64_t *) src->top - src->d; - dest->top = (struct bkey *) &dest->d[n]; - dest->list = dest->d; - } -} - int bch_keylist_realloc(struct keylist *l, int nptrs, struct cache_set *c) { - unsigned oldsize = (uint64_t *) l->top - l->list; - unsigned newsize = oldsize + 2 + nptrs; - uint64_t *new; + size_t oldsize = bch_keylist_nkeys(l); + size_t newsize = oldsize + 2 + nptrs; + uint64_t *old_keys = l->keys_p == l->inline_keys ? NULL : l->keys_p; + uint64_t *new_keys; /* The journalling code doesn't handle the case where the keys to insert * is bigger than an empty write: If we just return -ENOMEM here, @@ -45,24 +35,23 @@ int bch_keylist_realloc(struct keylist *l, int nptrs, struct cache_set *c) roundup_pow_of_two(oldsize) == newsize) return 0; - new = krealloc(l->list == l->d ? NULL : l->list, - sizeof(uint64_t) * newsize, GFP_NOIO); + new_keys = krealloc(old_keys, sizeof(uint64_t) * newsize, GFP_NOIO); - if (!new) + if (!new_keys) return -ENOMEM; - if (l->list == l->d) - memcpy(new, l->list, sizeof(uint64_t) * KEYLIST_INLINE); + if (!old_keys) + memcpy(new_keys, l->inline_keys, sizeof(uint64_t) * oldsize); - l->list = new; - l->top = (struct bkey *) (&l->list[oldsize]); + l->keys_p = new_keys; + l->top_p = new_keys + oldsize; return 0; } struct bkey *bch_keylist_pop(struct keylist *l) { - struct bkey *k = l->bottom; + struct bkey *k = l->keys; if (k == l->top) return NULL; @@ -73,21 +62,20 @@ struct bkey *bch_keylist_pop(struct keylist *l) return l->top = k; } -/* Pointer validation */ - -bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k) +void bch_keylist_pop_front(struct keylist *l) { - unsigned i; - char buf[80]; + l->top_p -= bkey_u64s(l->keys); - if (level && (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k))) - goto bad; + memmove(l->keys, + bkey_next(l->keys), + bch_keylist_bytes(l)); +} - if (!level && KEY_SIZE(k) > KEY_OFFSET(k)) - goto bad; +/* Pointer validation */ - if (!KEY_SIZE(k)) - return true; +static bool __ptr_invalid(struct cache_set *c, const struct bkey *k) +{ + unsigned i; for (i = 0; i < KEY_PTRS(k); i++) if (ptr_available(c, k, i)) { @@ -98,13 +86,83 @@ bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k) if (KEY_SIZE(k) + r > c->sb.bucket_size || bucket < ca->sb.first_bucket || bucket >= ca->sb.nbuckets) - goto bad; + return true; } return false; +} + +bool bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k) +{ + char buf[80]; + + if (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k)) + goto bad; + + if (__ptr_invalid(c, k)) + goto bad; + + return false; +bad: + bch_bkey_to_text(buf, sizeof(buf), k); + cache_bug(c, "spotted btree ptr %s: %s", buf, bch_ptr_status(c, k)); + return true; +} + +bool bch_extent_ptr_invalid(struct cache_set *c, const struct bkey *k) +{ + char buf[80]; + + if (!KEY_SIZE(k)) + return true; + + if (KEY_SIZE(k) > KEY_OFFSET(k)) + goto bad; + + if (__ptr_invalid(c, k)) + goto bad; + + return false; bad: bch_bkey_to_text(buf, sizeof(buf), k); - cache_bug(c, "spotted bad key %s: %s", buf, bch_ptr_status(c, k)); + cache_bug(c, "spotted extent %s: %s", buf, bch_ptr_status(c, k)); + return true; +} + +static bool ptr_bad_expensive_checks(struct btree *b, const struct bkey *k, + unsigned ptr) +{ + struct bucket *g = PTR_BUCKET(b->c, k, ptr); + char buf[80]; + + if (mutex_trylock(&b->c->bucket_lock)) { + if (b->level) { + if (KEY_DIRTY(k) || + g->prio != BTREE_PRIO || + (b->c->gc_mark_valid && + GC_MARK(g) != GC_MARK_METADATA)) + goto err; + + } else { + if (g->prio == BTREE_PRIO) + goto err; + + if (KEY_DIRTY(k) && + b->c->gc_mark_valid && + GC_MARK(g) != GC_MARK_DIRTY) + goto err; + } + mutex_unlock(&b->c->bucket_lock); + } + + return false; +err: + mutex_unlock(&b->c->bucket_lock); + bch_bkey_to_text(buf, sizeof(buf), k); + btree_bug(b, +"inconsistent pointer %s: bucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i", + buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin), + g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen); return true; } @@ -118,64 +176,29 @@ bool bch_ptr_bad(struct btree *b, const struct bkey *k) bch_ptr_invalid(b, k)) return true; - if (KEY_PTRS(k) && PTR_DEV(k, 0) == PTR_CHECK_DEV) - return true; + for (i = 0; i < KEY_PTRS(k); i++) { + if (!ptr_available(b->c, k, i)) + return true; - for (i = 0; i < KEY_PTRS(k); i++) - if (ptr_available(b->c, k, i)) { - g = PTR_BUCKET(b->c, k, i); - stale = ptr_stale(b->c, k, i); + g = PTR_BUCKET(b->c, k, i); + stale = ptr_stale(b->c, k, i); - btree_bug_on(stale > 96, b, - "key too stale: %i, need_gc %u", - stale, b->c->need_gc); + btree_bug_on(stale > 96, b, + "key too stale: %i, need_gc %u", + stale, b->c->need_gc); - btree_bug_on(stale && KEY_DIRTY(k) && KEY_SIZE(k), - b, "stale dirty pointer"); + btree_bug_on(stale && KEY_DIRTY(k) && KEY_SIZE(k), + b, "stale dirty pointer"); - if (stale) - return true; + if (stale) + return true; -#ifdef CONFIG_BCACHE_EDEBUG - if (!mutex_trylock(&b->c->bucket_lock)) - continue; - - if (b->level) { - if (KEY_DIRTY(k) || - g->prio != BTREE_PRIO || - (b->c->gc_mark_valid && - GC_MARK(g) != GC_MARK_METADATA)) - goto bug; - - } else { - if (g->prio == BTREE_PRIO) - goto bug; - - if (KEY_DIRTY(k) && - b->c->gc_mark_valid && - GC_MARK(g) != GC_MARK_DIRTY) - goto bug; - } - mutex_unlock(&b->c->bucket_lock); -#endif - } + if (expensive_debug_checks(b->c) && + ptr_bad_expensive_checks(b, k, i)) + return true; + } return false; -#ifdef CONFIG_BCACHE_EDEBUG -bug: - mutex_unlock(&b->c->bucket_lock); - - { - char buf[80]; - - bch_bkey_to_text(buf, sizeof(buf), k); - btree_bug(b, -"inconsistent pointer %s: bucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i", - buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin), - g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen); - } - return true; -#endif } /* Key/pointer manipulation */ @@ -458,16 +481,8 @@ static struct bkey *table_to_bkey(struct bset_tree *t, unsigned cacheline) static inline uint64_t shrd128(uint64_t high, uint64_t low, uint8_t shift) { -#ifdef CONFIG_X86_64 - asm("shrd %[shift],%[high],%[low]" - : [low] "+Rm" (low) - : [high] "R" (high), - [shift] "ci" (shift) - : "cc"); -#else low >>= shift; low |= (high << 1) << (63U - shift); -#endif return low; } @@ -686,7 +701,7 @@ void bch_bset_init_next(struct btree *b) } else get_random_bytes(&i->seq, sizeof(uint64_t)); - i->magic = bset_magic(b->c); + i->magic = bset_magic(&b->c->sb); i->version = 0; i->keys = 0; @@ -824,16 +839,16 @@ struct bkey *__bch_bset_search(struct btree *b, struct bset_tree *t, } else i = bset_search_write_set(b, t, search); -#ifdef CONFIG_BCACHE_EDEBUG - BUG_ON(bset_written(b, t) && - i.l != t->data->start && - bkey_cmp(tree_to_prev_bkey(t, - inorder_to_tree(bkey_to_cacheline(t, i.l), t)), - search) > 0); + if (expensive_debug_checks(b->c)) { + BUG_ON(bset_written(b, t) && + i.l != t->data->start && + bkey_cmp(tree_to_prev_bkey(t, + inorder_to_tree(bkey_to_cacheline(t, i.l), t)), + search) > 0); - BUG_ON(i.r != end(t->data) && - bkey_cmp(i.r, search) <= 0); -#endif + BUG_ON(i.r != end(t->data) && + bkey_cmp(i.r, search) <= 0); + } while (likely(i.l != i.r) && bkey_cmp(i.l, search) <= 0) @@ -844,6 +859,13 @@ struct bkey *__bch_bset_search(struct btree *b, struct bset_tree *t, /* Btree iterator */ +/* + * Returns true if l > r - unless l == r, in which case returns true if l is + * older than r. + * + * Necessary for btree_sort_fixup() - if there are multiple keys that compare + * equal in different sets, we have to process them newest to oldest. + */ static inline bool btree_iter_cmp(struct btree_iter_set l, struct btree_iter_set r) { @@ -867,12 +889,16 @@ void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k, } struct bkey *__bch_btree_iter_init(struct btree *b, struct btree_iter *iter, - struct bkey *search, struct bset_tree *start) + struct bkey *search, struct bset_tree *start) { struct bkey *ret = NULL; iter->size = ARRAY_SIZE(iter->data); iter->used = 0; +#ifdef CONFIG_BCACHE_DEBUG + iter->b = b; +#endif + for (; start <= &b->sets[b->nsets]; start++) { ret = bch_bset_search(b, start, search); bch_btree_iter_push(iter, ret, end(start->data)); @@ -887,6 +913,8 @@ struct bkey *bch_btree_iter_next(struct btree_iter *iter) struct bkey *ret = NULL; if (!btree_iter_end(iter)) { + bch_btree_iter_next_check(iter); + ret = iter->data->k; iter->data->k = bkey_next(iter->data->k); @@ -916,14 +944,6 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter, return ret; } -struct bkey *bch_next_recurse_key(struct btree *b, struct bkey *search) -{ - struct btree_iter iter; - - bch_btree_iter_init(b, &iter, search); - return bch_btree_iter_next_filter(&iter, b, bch_ptr_bad); -} - /* Mergesort */ static void sort_key_next(struct btree_iter *iter, @@ -998,7 +1018,6 @@ static void btree_mergesort(struct btree *b, struct bset *out, out->keys = last ? (uint64_t *) bkey_next(last) - out->d : 0; pr_debug("sorted %i keys", out->keys); - bch_check_key_order(b, out); } static void __btree_sort(struct btree *b, struct btree_iter *iter, @@ -1029,7 +1048,7 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter, * memcpy() */ - out->magic = bset_magic(b->c); + out->magic = bset_magic(&b->c->sb); out->seq = b->sets[0].data->seq; out->version = b->sets[0].data->version; swap(out, b->sets[0].data); @@ -1050,24 +1069,21 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter, if (b->written) bset_build_written_tree(b); - if (!start) { - spin_lock(&b->c->sort_time_lock); + if (!start) bch_time_stats_update(&b->c->sort_time, start_time); - spin_unlock(&b->c->sort_time_lock); - } } void bch_btree_sort_partial(struct btree *b, unsigned start) { - size_t oldsize = 0, order = b->page_order, keys = 0; + size_t order = b->page_order, keys = 0; struct btree_iter iter; + int oldsize = bch_count_data(b); + __bch_btree_iter_init(b, &iter, NULL, &b->sets[start]); BUG_ON(b->sets[b->nsets].data == write_block(b) && (b->sets[b->nsets].size || b->nsets)); - if (b->written) - oldsize = bch_count_data(b); if (start) { unsigned i; @@ -1083,7 +1099,7 @@ void bch_btree_sort_partial(struct btree *b, unsigned start) __btree_sort(b, &iter, start, order, false); - EBUG_ON(b->written && bch_count_data(b) != oldsize); + EBUG_ON(b->written && oldsize >= 0 && bch_count_data(b) != oldsize); } void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter) @@ -1101,9 +1117,7 @@ void bch_btree_sort_into(struct btree *b, struct btree *new) btree_mergesort(b, new->sets->data, &iter, false, true); - spin_lock(&b->c->sort_time_lock); bch_time_stats_update(&b->c->sort_time, start_time); - spin_unlock(&b->c->sort_time_lock); bkey_copy_key(&new->key, &b->key); new->sets->size = 0; @@ -1148,16 +1162,16 @@ out: /* Sysfs stuff */ struct bset_stats { + struct btree_op op; size_t nodes; size_t sets_written, sets_unwritten; size_t bytes_written, bytes_unwritten; size_t floats, failed; }; -static int bch_btree_bset_stats(struct btree *b, struct btree_op *op, - struct bset_stats *stats) +static int btree_bset_stats(struct btree_op *op, struct btree *b) { - struct bkey *k; + struct bset_stats *stats = container_of(op, struct bset_stats, op); unsigned i; stats->nodes++; @@ -1182,30 +1196,19 @@ static int bch_btree_bset_stats(struct btree *b, struct btree_op *op, } } - if (b->level) { - struct btree_iter iter; - - for_each_key_filter(b, k, &iter, bch_ptr_bad) { - int ret = btree(bset_stats, k, b, op, stats); - if (ret) - return ret; - } - } - - return 0; + return MAP_CONTINUE; } int bch_bset_print_stats(struct cache_set *c, char *buf) { - struct btree_op op; struct bset_stats t; int ret; - bch_btree_op_init_stack(&op); memset(&t, 0, sizeof(struct bset_stats)); + bch_btree_op_init(&t.op, -1); - ret = btree_root(bset_stats, c, &op, &t); - if (ret) + ret = bch_btree_map_nodes(&t.op, c, &ZERO_KEY, btree_bset_stats); + if (ret < 0) return ret; return snprintf(buf, PAGE_SIZE, |