summaryrefslogtreecommitdiff
path: root/drivers/md/bcache/super.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-07-22 19:02:52 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-22 19:02:52 -0700
commitd4c90b1b9fe907da0d310008e5a769b591a14399 (patch)
treed37589ab70ada2778d315a0ad24d6e68c8615af6 /drivers/md/bcache/super.c
parent3b2f64d00c46e1e4e9bd0bb9bb12619adac27a4b (diff)
parent0878ae2db83a10894724cdeaba7ef9f1ac1c9ac8 (diff)
Merge branch 'for-3.11/drivers' of git://git.kernel.dk/linux-block
Pull block IO driver bits from Jens Axboe: "As I mentioned in the core block pull request, due to real life circumstances the driver pull request would be late. Now it looks like -rc2 late... On the plus side, apart form the rsxx update, these are all things that I could argue could go in later in the cycle as they are fixes and not features. So even though things are late, it's not ALL bad. The pull request contains: - Updates to bcache, all bug fixes, from Kent. - A pile of drbd bug fixes (no big features this time!). - xen blk front/back fixes. - rsxx driver updates, some of them deferred form 3.10. So should be well cooked by now" * 'for-3.11/drivers' of git://git.kernel.dk/linux-block: (63 commits) bcache: Allocation kthread fixes bcache: Fix GC_SECTORS_USED() calculation bcache: Journal replay fix bcache: Shutdown fix bcache: Fix a sysfs splat on shutdown bcache: Advertise that flushes are supported bcache: check for allocation failures bcache: Fix a dumb race bcache: Use standard utility code bcache: Update email address bcache: Delete fuzz tester bcache: Document shrinker reserve better bcache: FUA fixes drbd: Allow online change of al-stripes and al-stripe-size drbd: Constants should be UPPERCASE drbd: Ignore the exit code of a fence-peer handler if it returns too late drbd: Fix rcu_read_lock balance on error path drbd: fix error return code in drbd_init() drbd: Do not sleep inside rcu bcache: Refresh usage docs ...
Diffstat (limited to 'drivers/md/bcache/super.c')
-rw-r--r--drivers/md/bcache/super.c171
1 files changed, 129 insertions, 42 deletions
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index f88e2b653a3f..547c4c57b052 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -10,10 +10,13 @@
#include "btree.h"
#include "debug.h"
#include "request.h"
+#include "writeback.h"
+#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/debugfs.h>
#include <linux/genhd.h>
+#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/random.h>
#include <linux/reboot.h>
@@ -342,6 +345,7 @@ static void uuid_io(struct cache_set *c, unsigned long rw,
struct closure *cl = &c->uuid_write.cl;
struct uuid_entry *u;
unsigned i;
+ char buf[80];
BUG_ON(!parent);
closure_lock(&c->uuid_write, parent);
@@ -362,8 +366,8 @@ static void uuid_io(struct cache_set *c, unsigned long rw,
break;
}
- pr_debug("%s UUIDs at %s", rw & REQ_WRITE ? "wrote" : "read",
- pkey(&c->uuid_bucket));
+ bch_bkey_to_text(buf, sizeof(buf), k);
+ pr_debug("%s UUIDs at %s", rw & REQ_WRITE ? "wrote" : "read", buf);
for (u = c->uuids; u < c->uuids + c->nr_uuids; u++)
if (!bch_is_zero(u->uuid, 16))
@@ -543,7 +547,6 @@ void bch_prio_write(struct cache *ca)
pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free),
fifo_used(&ca->free_inc), fifo_used(&ca->unused));
- blktrace_msg(ca, "Starting priorities: " buckets_free(ca));
for (i = prio_buckets(ca) - 1; i >= 0; --i) {
long bucket;
@@ -704,7 +707,8 @@ static void bcache_device_detach(struct bcache_device *d)
atomic_set(&d->detaching, 0);
}
- bcache_device_unlink(d);
+ if (!d->flush_done)
+ bcache_device_unlink(d);
d->c->devices[d->id] = NULL;
closure_put(&d->c->caching);
@@ -743,13 +747,35 @@ static void bcache_device_free(struct bcache_device *d)
mempool_destroy(d->unaligned_bvec);
if (d->bio_split)
bioset_free(d->bio_split);
+ if (is_vmalloc_addr(d->stripe_sectors_dirty))
+ vfree(d->stripe_sectors_dirty);
+ else
+ kfree(d->stripe_sectors_dirty);
closure_debug_destroy(&d->cl);
}
-static int bcache_device_init(struct bcache_device *d, unsigned block_size)
+static int bcache_device_init(struct bcache_device *d, unsigned block_size,
+ sector_t sectors)
{
struct request_queue *q;
+ size_t n;
+
+ if (!d->stripe_size_bits)
+ d->stripe_size_bits = 31;
+
+ d->nr_stripes = round_up(sectors, 1 << d->stripe_size_bits) >>
+ d->stripe_size_bits;
+
+ if (!d->nr_stripes || d->nr_stripes > SIZE_MAX / sizeof(atomic_t))
+ return -ENOMEM;
+
+ n = d->nr_stripes * sizeof(atomic_t);
+ d->stripe_sectors_dirty = n < PAGE_SIZE << 6
+ ? kzalloc(n, GFP_KERNEL)
+ : vzalloc(n);
+ if (!d->stripe_sectors_dirty)
+ return -ENOMEM;
if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
!(d->unaligned_bvec = mempool_create_kmalloc_pool(1,
@@ -759,6 +785,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size)
!(q = blk_alloc_queue(GFP_KERNEL)))
return -ENOMEM;
+ set_capacity(d->disk, sectors);
snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", bcache_minor);
d->disk->major = bcache_major;
@@ -781,6 +808,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size)
set_bit(QUEUE_FLAG_NONROT, &d->disk->queue->queue_flags);
set_bit(QUEUE_FLAG_DISCARD, &d->disk->queue->queue_flags);
+ blk_queue_flush(q, REQ_FLUSH|REQ_FUA);
+
return 0;
}
@@ -800,6 +829,17 @@ static void calc_cached_dev_sectors(struct cache_set *c)
void bch_cached_dev_run(struct cached_dev *dc)
{
struct bcache_device *d = &dc->disk;
+ char buf[SB_LABEL_SIZE + 1];
+ char *env[] = {
+ "DRIVER=bcache",
+ kasprintf(GFP_KERNEL, "CACHED_UUID=%pU", dc->sb.uuid),
+ NULL,
+ NULL,
+ };
+
+ memcpy(buf, dc->sb.label, SB_LABEL_SIZE);
+ buf[SB_LABEL_SIZE] = '\0';
+ env[2] = kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf);
if (atomic_xchg(&dc->running, 1))
return;
@@ -816,10 +856,12 @@ void bch_cached_dev_run(struct cached_dev *dc)
add_disk(d->disk);
bd_link_disk_holder(dc->bdev, dc->disk.disk);
-#if 0
- char *env[] = { "SYMLINK=label" , NULL };
+ /* won't show up in the uevent file, use udevadm monitor -e instead
+ * only class / kset properties are persistent */
kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env);
-#endif
+ kfree(env[1]);
+ kfree(env[2]);
+
if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache"))
pr_debug("error creating sysfs link");
@@ -960,6 +1002,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
atomic_set(&dc->count, 1);
if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
+ bch_sectors_dirty_init(dc);
atomic_set(&dc->has_dirty, 1);
atomic_inc(&dc->count);
bch_writeback_queue(dc);
@@ -1014,6 +1057,14 @@ static void cached_dev_flush(struct closure *cl)
struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
struct bcache_device *d = &dc->disk;
+ mutex_lock(&bch_register_lock);
+ d->flush_done = 1;
+
+ if (d->c)
+ bcache_device_unlink(d);
+
+ mutex_unlock(&bch_register_lock);
+
bch_cache_accounting_destroy(&dc->accounting);
kobject_del(&d->kobj);
@@ -1045,7 +1096,8 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
hlist_add_head(&io->hash, dc->io_hash + RECENT_IO);
}
- ret = bcache_device_init(&dc->disk, block_size);
+ ret = bcache_device_init(&dc->disk, block_size,
+ dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
if (ret)
return ret;
@@ -1144,11 +1196,10 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
kobject_init(&d->kobj, &bch_flash_dev_ktype);
- if (bcache_device_init(d, block_bytes(c)))
+ if (bcache_device_init(d, block_bytes(c), u->sectors))
goto err;
bcache_device_attach(d, c, u - c->uuids);
- set_capacity(d->disk, u->sectors);
bch_flash_dev_request_init(d);
add_disk(d->disk);
@@ -1255,9 +1306,10 @@ static void cache_set_free(struct closure *cl)
free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));
free_pages((unsigned long) c->sort, ilog2(bucket_pages(c)));
- kfree(c->fill_iter);
if (c->bio_split)
bioset_free(c->bio_split);
+ if (c->fill_iter)
+ mempool_destroy(c->fill_iter);
if (c->bio_meta)
mempool_destroy(c->bio_meta);
if (c->search)
@@ -1278,11 +1330,9 @@ static void cache_set_free(struct closure *cl)
static void cache_set_flush(struct closure *cl)
{
struct cache_set *c = container_of(cl, struct cache_set, caching);
+ struct cache *ca;
struct btree *b;
-
- /* Shut down allocator threads */
- set_bit(CACHE_SET_STOPPING_2, &c->flags);
- wake_up(&c->alloc_wait);
+ unsigned i;
bch_cache_accounting_destroy(&c->accounting);
@@ -1295,7 +1345,11 @@ static void cache_set_flush(struct closure *cl)
/* Should skip this if we're unregistering because of an error */
list_for_each_entry(b, &c->btree_cache, list)
if (btree_node_dirty(b))
- bch_btree_write(b, true, NULL);
+ bch_btree_node_write(b, NULL);
+
+ for_each_cache(ca, c, i)
+ if (ca->alloc_thread)
+ kthread_stop(ca->alloc_thread);
closure_return(cl);
}
@@ -1303,18 +1357,22 @@ static void cache_set_flush(struct closure *cl)
static void __cache_set_unregister(struct closure *cl)
{
struct cache_set *c = container_of(cl, struct cache_set, caching);
- struct cached_dev *dc, *t;
+ struct cached_dev *dc;
size_t i;
mutex_lock(&bch_register_lock);
- if (test_bit(CACHE_SET_UNREGISTERING, &c->flags))
- list_for_each_entry_safe(dc, t, &c->cached_devs, list)
- bch_cached_dev_detach(dc);
-
for (i = 0; i < c->nr_uuids; i++)
- if (c->devices[i] && UUID_FLASH_ONLY(&c->uuids[i]))
- bcache_device_stop(c->devices[i]);
+ if (c->devices[i]) {
+ if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
+ test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
+ dc = container_of(c->devices[i],
+ struct cached_dev, disk);
+ bch_cached_dev_detach(dc);
+ } else {
+ bcache_device_stop(c->devices[i]);
+ }
+ }
mutex_unlock(&bch_register_lock);
@@ -1373,9 +1431,9 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
c->btree_pages = max_t(int, c->btree_pages / 4,
BTREE_MAX_PAGES);
- init_waitqueue_head(&c->alloc_wait);
+ c->sort_crit_factor = int_sqrt(c->btree_pages);
+
mutex_init(&c->bucket_lock);
- mutex_init(&c->fill_lock);
mutex_init(&c->sort_lock);
spin_lock_init(&c->sort_time_lock);
closure_init_unlocked(&c->sb_write);
@@ -1401,8 +1459,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
!(c->bio_meta = mempool_create_kmalloc_pool(2,
sizeof(struct bbio) + sizeof(struct bio_vec) *
bucket_pages(c))) ||
+ !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
!(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
- !(c->fill_iter = kmalloc(iter_size, GFP_KERNEL)) ||
!(c->sort = alloc_bucket_pages(GFP_KERNEL, c)) ||
!(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
bch_journal_alloc(c) ||
@@ -1410,8 +1468,6 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
bch_open_buckets_alloc(c))
goto err;
- c->fill_iter->size = sb->bucket_size / sb->block_size;
-
c->congested_read_threshold_us = 2000;
c->congested_write_threshold_us = 20000;
c->error_limit = 8 << IO_ERROR_SHIFT;
@@ -1496,9 +1552,10 @@ static void run_cache_set(struct cache_set *c)
*/
bch_journal_next(&c->journal);
+ err = "error starting allocator thread";
for_each_cache(ca, c, i)
- closure_call(&ca->alloc, bch_allocator_thread,
- system_wq, &c->cl);
+ if (bch_cache_allocator_start(ca))
+ goto err;
/*
* First place it's safe to allocate: btree_check() and
@@ -1531,17 +1588,16 @@ static void run_cache_set(struct cache_set *c)
bch_btree_gc_finish(c);
+ err = "error starting allocator thread";
for_each_cache(ca, c, i)
- closure_call(&ca->alloc, bch_allocator_thread,
- ca->alloc_workqueue, &c->cl);
+ if (bch_cache_allocator_start(ca))
+ goto err;
mutex_lock(&c->bucket_lock);
for_each_cache(ca, c, i)
bch_prio_write(ca);
mutex_unlock(&c->bucket_lock);
- wake_up(&c->alloc_wait);
-
err = "cannot allocate new UUID bucket";
if (__uuid_write(c))
goto err_unlock_gc;
@@ -1552,7 +1608,7 @@ static void run_cache_set(struct cache_set *c)
goto err_unlock_gc;
bkey_copy_key(&c->root->key, &MAX_KEY);
- bch_btree_write(c->root, true, &op);
+ bch_btree_node_write(c->root, &op.cl);
bch_btree_set_root(c->root);
rw_unlock(true, c->root);
@@ -1673,9 +1729,6 @@ void bch_cache_release(struct kobject *kobj)
bio_split_pool_free(&ca->bio_split_hook);
- if (ca->alloc_workqueue)
- destroy_workqueue(ca->alloc_workqueue);
-
free_pages((unsigned long) ca->disk_buckets, ilog2(bucket_pages(ca)));
kfree(ca->prio_buckets);
vfree(ca->buckets);
@@ -1723,7 +1776,6 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
!(ca->prio_buckets = kzalloc(sizeof(uint64_t) * prio_buckets(ca) *
2, GFP_KERNEL)) ||
!(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca)) ||
- !(ca->alloc_workqueue = alloc_workqueue("bch_allocator", 0, 1)) ||
bio_split_pool_init(&ca->bio_split_hook))
return -ENOMEM;
@@ -1786,6 +1838,36 @@ static ssize_t register_bcache(struct kobject *, struct kobj_attribute *,
kobj_attribute_write(register, register_bcache);
kobj_attribute_write(register_quiet, register_bcache);
+static bool bch_is_open_backing(struct block_device *bdev) {
+ struct cache_set *c, *tc;
+ struct cached_dev *dc, *t;
+
+ list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
+ list_for_each_entry_safe(dc, t, &c->cached_devs, list)
+ if (dc->bdev == bdev)
+ return true;
+ list_for_each_entry_safe(dc, t, &uncached_devices, list)
+ if (dc->bdev == bdev)
+ return true;
+ return false;
+}
+
+static bool bch_is_open_cache(struct block_device *bdev) {
+ struct cache_set *c, *tc;
+ struct cache *ca;
+ unsigned i;
+
+ list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
+ for_each_cache(ca, c, i)
+ if (ca->bdev == bdev)
+ return true;
+ return false;
+}
+
+static bool bch_is_open(struct block_device *bdev) {
+ return bch_is_open_cache(bdev) || bch_is_open_backing(bdev);
+}
+
static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
const char *buffer, size_t size)
{
@@ -1810,8 +1892,13 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
FMODE_READ|FMODE_WRITE|FMODE_EXCL,
sb);
if (IS_ERR(bdev)) {
- if (bdev == ERR_PTR(-EBUSY))
- err = "device busy";
+ if (bdev == ERR_PTR(-EBUSY)) {
+ bdev = lookup_bdev(strim(path));
+ if (!IS_ERR(bdev) && bch_is_open(bdev))
+ err = "device already registered";
+ else
+ err = "device busy";
+ }
goto err;
}