diff options
Diffstat (limited to 'drivers/block/drbd')
-rw-r--r-- | drivers/block/drbd/drbd_actlog.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_bitmap.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 31 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 113 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 60 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_proc.c | 10 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 60 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 86 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.h | 6 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_state.c | 48 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_state.h | 8 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 48 |
12 files changed, 335 insertions, 139 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index e02c45cd3c5a..5f0eaee8c8a7 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -151,7 +151,7 @@ static int _drbd_md_sync_page_io(struct drbd_device *device, op_flags |= REQ_SYNC; bio = bio_alloc_drbd(GFP_NOIO); - bio->bi_bdev = bdev->md_bdev; + bio_set_dev(bio, bdev->md_bdev); bio->bi_iter.bi_sector = sector; err = -EIO; if (bio_add_page(bio, device->md_io.page, size, 0) != size) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 809fd245c3dc..bd97908c766f 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1019,7 +1019,7 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho bm_store_page_idx(page, page_nr); } else page = b->bm_pages[page_nr]; - bio->bi_bdev = device->ldev->md_bdev; + bio_set_dev(bio, device->ldev->md_bdev); bio->bi_iter.bi_sector = on_disk_sector; /* bio_add_page of a single page to an empty bio will always succeed, * according to api. Do we want to assert that? */ diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index d17b6e6393c7..7e8589ce631c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -63,19 +63,15 @@ # define __must_hold(x) #endif -/* module parameter, defined in drbd_main.c */ -extern unsigned int minor_count; -extern bool disable_sendpage; -extern bool allow_oos; -void tl_abort_disk_io(struct drbd_device *device); - +/* shared module parameters, defined in drbd_main.c */ #ifdef CONFIG_DRBD_FAULT_INJECTION -extern int enable_faults; -extern int fault_rate; -extern int fault_devs; +extern int drbd_enable_faults; +extern int drbd_fault_rate; #endif -extern char usermode_helper[]; +extern unsigned int drbd_minor_count; +extern char drbd_usermode_helper[]; +extern int drbd_proc_details; /* This is used to stop/restart our threads. @@ -181,8 +177,8 @@ _drbd_insert_fault(struct drbd_device *device, unsigned int type); static inline int drbd_insert_fault(struct drbd_device *device, unsigned int type) { #ifdef CONFIG_DRBD_FAULT_INJECTION - return fault_rate && - (enable_faults & (1<<type)) && + return drbd_fault_rate && + (drbd_enable_faults & (1<<type)) && _drbd_insert_fault(device, type); #else return 0; @@ -745,6 +741,8 @@ struct drbd_connection { unsigned current_tle_writes; /* writes seen within this tl epoch */ unsigned long last_reconnect_jif; + /* empty member on older kernels without blk_start_plug() */ + struct blk_plug receiver_plug; struct drbd_thread receiver; struct drbd_thread worker; struct drbd_thread ack_receiver; @@ -1131,7 +1129,8 @@ extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_sta extern int drbd_send_rs_deallocated(struct drbd_peer_device *, struct drbd_peer_request *); extern void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev); extern void drbd_device_cleanup(struct drbd_device *device); -void drbd_print_uuids(struct drbd_device *device, const char *text); +extern void drbd_print_uuids(struct drbd_device *device, const char *text); +extern void drbd_queue_unplug(struct drbd_device *device); extern void conn_md_sync(struct drbd_connection *connection); extern void drbd_md_write(struct drbd_device *device, void *buffer); @@ -1463,8 +1462,6 @@ extern struct drbd_resource *drbd_find_resource(const char *name); extern void drbd_destroy_resource(struct kref *kref); extern void conn_free_crypto(struct drbd_connection *connection); -extern int proc_details; - /* drbd_req */ extern void do_submit(struct work_struct *ws); extern void __drbd_make_request(struct drbd_device *, struct bio *, unsigned long); @@ -1628,8 +1625,8 @@ static inline void drbd_generic_make_request(struct drbd_device *device, int fault_type, struct bio *bio) { __release(local); - if (!bio->bi_bdev) { - drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n"); + if (!bio->bi_disk) { + drbd_err(device, "drbd_generic_make_request: bio->bi_disk == NULL\n"); bio->bi_status = BLK_STS_IOERR; bio_endio(bio); return; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index e2ed28d45ce1..8cb3791898ae 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -77,41 +77,41 @@ MODULE_PARM_DESC(minor_count, "Approximate number of drbd devices (" MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR); #include <linux/moduleparam.h> -/* allow_open_on_secondary */ -MODULE_PARM_DESC(allow_oos, "DONT USE!"); /* thanks to these macros, if compiled into the kernel (not-module), - * this becomes the boot parameter drbd.minor_count */ -module_param(minor_count, uint, 0444); -module_param(disable_sendpage, bool, 0644); -module_param(allow_oos, bool, 0); -module_param(proc_details, int, 0644); + * these become boot parameters (e.g., drbd.minor_count) */ #ifdef CONFIG_DRBD_FAULT_INJECTION -int enable_faults; -int fault_rate; -static int fault_count; -int fault_devs; +int drbd_enable_faults; +int drbd_fault_rate; +static int drbd_fault_count; +static int drbd_fault_devs; /* bitmap of enabled faults */ -module_param(enable_faults, int, 0664); +module_param_named(enable_faults, drbd_enable_faults, int, 0664); /* fault rate % value - applies to all enabled faults */ -module_param(fault_rate, int, 0664); +module_param_named(fault_rate, drbd_fault_rate, int, 0664); /* count of faults inserted */ -module_param(fault_count, int, 0664); +module_param_named(fault_count, drbd_fault_count, int, 0664); /* bitmap of devices to insert faults on */ -module_param(fault_devs, int, 0644); +module_param_named(fault_devs, drbd_fault_devs, int, 0644); #endif -/* module parameter, defined */ -unsigned int minor_count = DRBD_MINOR_COUNT_DEF; -bool disable_sendpage; -bool allow_oos; -int proc_details; /* Detail level in proc drbd*/ - +/* module parameters we can keep static */ +static bool drbd_allow_oos; /* allow_open_on_secondary */ +static bool drbd_disable_sendpage; +MODULE_PARM_DESC(allow_oos, "DONT USE!"); +module_param_named(allow_oos, drbd_allow_oos, bool, 0); +module_param_named(disable_sendpage, drbd_disable_sendpage, bool, 0644); + +/* module parameters we share */ +int drbd_proc_details; /* Detail level in proc drbd*/ +module_param_named(proc_details, drbd_proc_details, int, 0644); +/* module parameters shared with defaults */ +unsigned int drbd_minor_count = DRBD_MINOR_COUNT_DEF; /* Module parameter for setting the user mode helper program * to run. Default is /sbin/drbdadm */ -char usermode_helper[80] = "/sbin/drbdadm"; - -module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0644); +char drbd_usermode_helper[80] = "/sbin/drbdadm"; +module_param_named(minor_count, drbd_minor_count, uint, 0444); +module_param_string(usermode_helper, drbd_usermode_helper, sizeof(drbd_usermode_helper), 0644); /* in 2.6.x, our device mapping and config info contains our virtual gendisks * as member "struct gendisk *vdisk;" @@ -923,7 +923,9 @@ void drbd_gen_and_send_sync_uuid(struct drbd_peer_device *peer_device) } /* communicated if (agreed_features & DRBD_FF_WSAME) */ -void assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p, struct request_queue *q) +static void +assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p, + struct request_queue *q) { if (q) { p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q)); @@ -1560,7 +1562,7 @@ static int _drbd_send_page(struct drbd_peer_device *peer_device, struct page *pa * put_page(); and would cause either a VM_BUG directly, or * __page_cache_release a page that would actually still be referenced * by someone, leading to some obscure delayed Oops somewhere else. */ - if (disable_sendpage || (page_count(page) < 1) || PageSlab(page)) + if (drbd_disable_sendpage || (page_count(page) < 1) || PageSlab(page)) return _drbd_no_send_page(peer_device, page, offset, size, msg_flags); msg_flags |= MSG_NOSIGNAL; @@ -1932,7 +1934,7 @@ static int drbd_open(struct block_device *bdev, fmode_t mode) if (device->state.role != R_PRIMARY) { if (mode & FMODE_WRITE) rv = -EROFS; - else if (!allow_oos) + else if (!drbd_allow_oos) rv = -EMEDIUMTYPE; } @@ -1952,6 +1954,19 @@ static void drbd_release(struct gendisk *gd, fmode_t mode) mutex_unlock(&drbd_main_mutex); } +/* need to hold resource->req_lock */ +void drbd_queue_unplug(struct drbd_device *device) +{ + if (device->state.pdsk >= D_INCONSISTENT && device->state.conn >= C_CONNECTED) { + D_ASSERT(device, device->state.role == R_PRIMARY); + if (test_and_clear_bit(UNPLUG_REMOTE, &device->flags)) { + drbd_queue_work_if_unqueued( + &first_peer_device(device)->connection->sender_work, + &device->unplug_work); + } + } +} + static void drbd_set_defaults(struct drbd_device *device) { /* Beware! The actual layout differs @@ -2008,18 +2023,14 @@ void drbd_init_set_defaults(struct drbd_device *device) device->unplug_work.cb = w_send_write_hint; device->bm_io_work.w.cb = w_bitmap_io; - init_timer(&device->resync_timer); - init_timer(&device->md_sync_timer); - init_timer(&device->start_resync_timer); - init_timer(&device->request_timer); - device->resync_timer.function = resync_timer_fn; - device->resync_timer.data = (unsigned long) device; - device->md_sync_timer.function = md_sync_timer_fn; - device->md_sync_timer.data = (unsigned long) device; - device->start_resync_timer.function = start_resync_timer_fn; - device->start_resync_timer.data = (unsigned long) device; - device->request_timer.function = request_timer_fn; - device->request_timer.data = (unsigned long) device; + setup_timer(&device->resync_timer, resync_timer_fn, + (unsigned long)device); + setup_timer(&device->md_sync_timer, md_sync_timer_fn, + (unsigned long)device); + setup_timer(&device->start_resync_timer, start_resync_timer_fn, + (unsigned long)device); + setup_timer(&device->request_timer, request_timer_fn, + (unsigned long)device); init_waitqueue_head(&device->misc_wait); init_waitqueue_head(&device->state_wait); @@ -2131,7 +2142,7 @@ static void drbd_destroy_mempools(void) static int drbd_create_mempools(void) { struct page *page; - const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count; + const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count; int i; /* prepare our caches and mempools */ @@ -2167,13 +2178,12 @@ static int drbd_create_mempools(void) goto Enomem; /* mempools */ - drbd_io_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_RESCUER); + drbd_io_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0); if (drbd_io_bio_set == NULL) goto Enomem; drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0, - BIOSET_NEED_BVECS | - BIOSET_NEED_RESCUER); + BIOSET_NEED_BVECS); if (drbd_md_io_bio_set == NULL) goto Enomem; @@ -2409,7 +2419,6 @@ static void drbd_cleanup(void) destroy_workqueue(retry.wq); drbd_genl_unregister(); - drbd_debugfs_cleanup(); idr_for_each_entry(&drbd_devices, device, i) drbd_delete_device(device); @@ -2420,6 +2429,8 @@ static void drbd_cleanup(void) drbd_free_resource(resource); } + drbd_debugfs_cleanup(); + drbd_destroy_mempools(); unregister_blkdev(DRBD_MAJOR, "drbd"); @@ -2972,12 +2983,12 @@ static int __init drbd_init(void) { int err; - if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) { - pr_err("invalid minor_count (%d)\n", minor_count); + if (drbd_minor_count < DRBD_MINOR_COUNT_MIN || drbd_minor_count > DRBD_MINOR_COUNT_MAX) { + pr_err("invalid minor_count (%d)\n", drbd_minor_count); #ifdef MODULE return -EINVAL; #else - minor_count = DRBD_MINOR_COUNT_DEF; + drbd_minor_count = DRBD_MINOR_COUNT_DEF; #endif } @@ -3900,12 +3911,12 @@ _drbd_insert_fault(struct drbd_device *device, unsigned int type) static struct fault_random_state rrs = {0, 0}; unsigned int ret = ( - (fault_devs == 0 || - ((1 << device_to_minor(device)) & fault_devs) != 0) && - (((_drbd_fault_random(&rrs) % 100) + 1) <= fault_rate)); + (drbd_fault_devs == 0 || + ((1 << device_to_minor(device)) & drbd_fault_devs) != 0) && + (((_drbd_fault_random(&rrs) % 100) + 1) <= drbd_fault_rate)); if (ret) { - fault_count++; + drbd_fault_count++; if (__ratelimit(&drbd_ratelimit_state)) drbd_warn(device, "***Simulating %s failure\n", diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ad0fcb43e45c..a12f77e6891e 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -344,7 +344,7 @@ int drbd_khelper(struct drbd_device *device, char *cmd) (char[60]) { }, /* address */ NULL }; char mb[14]; - char *argv[] = {usermode_helper, cmd, mb, NULL }; + char *argv[] = {drbd_usermode_helper, cmd, mb, NULL }; struct drbd_connection *connection = first_peer_device(device)->connection; struct sib_info sib; int ret; @@ -359,19 +359,19 @@ int drbd_khelper(struct drbd_device *device, char *cmd) * write out any unsynced meta data changes now */ drbd_md_sync(device); - drbd_info(device, "helper command: %s %s %s\n", usermode_helper, cmd, mb); + drbd_info(device, "helper command: %s %s %s\n", drbd_usermode_helper, cmd, mb); sib.sib_reason = SIB_HELPER_PRE; sib.helper_name = cmd; drbd_bcast_event(device, &sib); notify_helper(NOTIFY_CALL, device, connection, cmd, 0); - ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC); + ret = call_usermodehelper(drbd_usermode_helper, argv, envp, UMH_WAIT_PROC); if (ret) drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n", - usermode_helper, cmd, mb, + drbd_usermode_helper, cmd, mb, (ret >> 8) & 0xff, ret); else drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n", - usermode_helper, cmd, mb, + drbd_usermode_helper, cmd, mb, (ret >> 8) & 0xff, ret); sib.sib_reason = SIB_HELPER_POST; sib.helper_exit_code = ret; @@ -396,24 +396,24 @@ enum drbd_peer_state conn_khelper(struct drbd_connection *connection, char *cmd) (char[60]) { }, /* address */ NULL }; char *resource_name = connection->resource->name; - char *argv[] = {usermode_helper, cmd, resource_name, NULL }; + char *argv[] = {drbd_usermode_helper, cmd, resource_name, NULL }; int ret; setup_khelper_env(connection, envp); conn_md_sync(connection); - drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name); + drbd_info(connection, "helper command: %s %s %s\n", drbd_usermode_helper, cmd, resource_name); /* TODO: conn_bcast_event() ?? */ notify_helper(NOTIFY_CALL, NULL, connection, cmd, 0); - ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC); + ret = call_usermodehelper(drbd_usermode_helper, argv, envp, UMH_WAIT_PROC); if (ret) drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n", - usermode_helper, cmd, resource_name, + drbd_usermode_helper, cmd, resource_name, (ret >> 8) & 0xff, ret); else drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n", - usermode_helper, cmd, resource_name, + drbd_usermode_helper, cmd, resource_name, (ret >> 8) & 0xff, ret); /* TODO: conn_bcast_event() ?? */ notify_helper(NOTIFY_RESPONSE, NULL, connection, cmd, ret); @@ -1236,12 +1236,18 @@ static void fixup_discard_if_not_supported(struct request_queue *q) static void decide_on_write_same_support(struct drbd_device *device, struct request_queue *q, - struct request_queue *b, struct o_qlim *o) + struct request_queue *b, struct o_qlim *o, + bool disable_write_same) { struct drbd_peer_device *peer_device = first_peer_device(device); struct drbd_connection *connection = peer_device->connection; bool can_do = b ? b->limits.max_write_same_sectors : true; + if (can_do && disable_write_same) { + can_do = false; + drbd_info(peer_device, "WRITE_SAME disabled by config\n"); + } + if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_WSAME)) { can_do = false; drbd_info(peer_device, "peer does not support WRITE_SAME\n"); @@ -1302,6 +1308,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi struct request_queue *b = NULL; struct disk_conf *dc; bool discard_zeroes_if_aligned = true; + bool disable_write_same = false; if (bdev) { b = bdev->backing_bdev->bd_disk->queue; @@ -1311,6 +1318,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi dc = rcu_dereference(device->ldev->disk_conf); max_segments = dc->max_bio_bvecs; discard_zeroes_if_aligned = dc->discard_zeroes_if_aligned; + disable_write_same = dc->disable_write_same; rcu_read_unlock(); blk_set_stacking_limits(&q->limits); @@ -1321,7 +1329,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); blk_queue_segment_boundary(q, PAGE_SIZE-1); decide_on_discard_support(device, q, b, discard_zeroes_if_aligned); - decide_on_write_same_support(device, q, b, o); + decide_on_write_same_support(device, q, b, o, disable_write_same); if (b) { blk_queue_stack_limits(q, b); @@ -1612,7 +1620,8 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) if (write_ordering_changed(old_disk_conf, new_disk_conf)) drbd_bump_write_ordering(device->resource, NULL, WO_BDEV_FLUSH); - if (old_disk_conf->discard_zeroes_if_aligned != new_disk_conf->discard_zeroes_if_aligned) + if (old_disk_conf->discard_zeroes_if_aligned != new_disk_conf->discard_zeroes_if_aligned + || old_disk_conf->disable_write_same != new_disk_conf->disable_write_same) drbd_reconsider_queue_parameters(device, device->ldev, NULL); drbd_md_sync(device); @@ -2140,34 +2149,13 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) static int adm_detach(struct drbd_device *device, int force) { - enum drbd_state_rv retcode; - void *buffer; - int ret; - if (force) { set_bit(FORCE_DETACH, &device->flags); drbd_force_state(device, NS(disk, D_FAILED)); - retcode = SS_SUCCESS; - goto out; + return SS_SUCCESS; } - drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */ - buffer = drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */ - if (buffer) { - retcode = drbd_request_state(device, NS(disk, D_FAILED)); - drbd_md_put_buffer(device); - } else /* already <= D_FAILED */ - retcode = SS_NOTHING_TO_DO; - /* D_FAILED will transition to DISKLESS. */ - drbd_resume_io(device); - ret = wait_event_interruptible(device->misc_wait, - device->state.disk != D_FAILED); - if ((int)retcode == (int)SS_IS_DISKLESS) - retcode = SS_NOTHING_TO_DO; - if (ret) - retcode = ERR_INTR; -out: - return retcode; + return drbd_request_detach_interruptible(device); } /* Detaching the disk is a process in multiple stages. First we need to lock diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 8378142f7a55..582caeb0de86 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -127,7 +127,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se seq_putc(seq, '='); seq_putc(seq, '>'); for (i = 0; i < y; i++) - seq_printf(seq, "."); + seq_putc(seq, '.'); seq_puts(seq, "] "); if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T) @@ -179,7 +179,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se seq_printf_with_thousands_grouping(seq, dbdt); seq_puts(seq, " ("); /* ------------------------- ~3s average ------------------------ */ - if (proc_details >= 1) { + if (drbd_proc_details >= 1) { /* this is what drbd_rs_should_slow_down() uses */ i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS; dt = (jiffies - device->rs_mark_time[i]) / HZ; @@ -209,7 +209,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se } seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : ""); - if (proc_details >= 1) { + if (drbd_proc_details >= 1) { /* 64 bit: * we convert to sectors in the display below. */ unsigned long bm_bits = drbd_bm_bits(device); @@ -332,13 +332,13 @@ static int drbd_seq_show(struct seq_file *seq, void *v) state.conn == C_VERIFY_T) drbd_syncer_progress(device, seq, state); - if (proc_details >= 1 && get_ldev_if_state(device, D_FAILED)) { + if (drbd_proc_details >= 1 && get_ldev_if_state(device, D_FAILED)) { lc_seq_printf_stats(seq, device->resync); lc_seq_printf_stats(seq, device->act_log); put_ldev(device); } - if (proc_details >= 2) + if (drbd_proc_details >= 2) seq_printf(seq, "\tblocked on activity log: %d\n", atomic_read(&device->ap_actlog_cnt)); } rcu_read_unlock(); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c7e95e6380fb..796eaf347dc0 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -332,7 +332,7 @@ static void drbd_free_pages(struct drbd_device *device, struct page *page, int i if (page == NULL) return; - if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count) + if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count) i = page_chain_free(page); else { struct page *tmp; @@ -1100,7 +1100,10 @@ randomize: idr_for_each_entry(&connection->peer_devices, peer_device, vnr) mutex_lock(peer_device->device->state_mutex); + /* avoid a race with conn_request_state( C_DISCONNECTING ) */ + spin_lock_irq(&connection->resource->req_lock); set_bit(STATE_SENT, &connection->flags); + spin_unlock_irq(&connection->resource->req_lock); idr_for_each_entry(&connection->peer_devices, peer_device, vnr) mutex_unlock(peer_device->device->state_mutex); @@ -1194,6 +1197,14 @@ static int decode_header(struct drbd_connection *connection, void *header, struc return 0; } +static void drbd_unplug_all_devices(struct drbd_connection *connection) +{ + if (current->plug == &connection->receiver_plug) { + blk_finish_plug(&connection->receiver_plug); + blk_start_plug(&connection->receiver_plug); + } /* else: maybe just schedule() ?? */ +} + static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi) { void *buffer = connection->data.rbuf; @@ -1209,6 +1220,36 @@ static int drbd_recv_header(struct drbd_connection *connection, struct packet_in return err; } +static int drbd_recv_header_maybe_unplug(struct drbd_connection *connection, struct packet_info *pi) +{ + void *buffer = connection->data.rbuf; + unsigned int size = drbd_header_size(connection); + int err; + + err = drbd_recv_short(connection->data.socket, buffer, size, MSG_NOSIGNAL|MSG_DONTWAIT); + if (err != size) { + /* If we have nothing in the receive buffer now, to reduce + * application latency, try to drain the backend queues as + * quickly as possible, and let remote TCP know what we have + * received so far. */ + if (err == -EAGAIN) { + drbd_tcp_quickack(connection->data.socket); + drbd_unplug_all_devices(connection); + } + if (err > 0) { + buffer += err; + size -= err; + } + err = drbd_recv_all_warn(connection, buffer, size); + if (err) + return err; + } + + err = decode_header(connection, connection->data.rbuf, pi); + connection->last_received = jiffies; + + return err; +} /* This is blkdev_issue_flush, but asynchronous. * We want to submit to all component volumes in parallel, * then wait for all completions. @@ -1223,7 +1264,7 @@ struct one_flush_context { struct issue_flush_context *ctx; }; -void one_flush_endio(struct bio *bio) +static void one_flush_endio(struct bio *bio) { struct one_flush_context *octx = bio->bi_private; struct drbd_device *device = octx->device; @@ -1265,7 +1306,7 @@ static void submit_one_flush(struct drbd_device *device, struct issue_flush_cont octx->device = device; octx->ctx = ctx; - bio->bi_bdev = device->ldev->backing_bdev; + bio_set_dev(bio, device->ldev->backing_bdev); bio->bi_private = octx; bio->bi_end_io = one_flush_endio; bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH; @@ -1548,7 +1589,7 @@ next_bio: } /* > peer_req->i.sector, unless this is the first bio */ bio->bi_iter.bi_sector = sector; - bio->bi_bdev = device->ldev->backing_bdev; + bio_set_dev(bio, device->ldev->backing_bdev); bio_set_op_attrs(bio, op, op_flags); bio->bi_private = peer_req; bio->bi_end_io = drbd_peer_request_endio; @@ -4085,7 +4126,7 @@ static int receive_uuids(struct drbd_connection *connection, struct packet_info return config_unknown_volume(connection, pi); device = peer_device->device; - p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO); + p_uuid = kmalloc_array(UI_EXTENDED_SIZE, sizeof(*p_uuid), GFP_NOIO); if (!p_uuid) { drbd_err(device, "kmalloc of p_uuid failed\n"); return false; @@ -4882,8 +4923,8 @@ static void drbdd(struct drbd_connection *connection) struct data_cmd const *cmd; drbd_thread_current_set_cpu(&connection->receiver); - update_receiver_timing_details(connection, drbd_recv_header); - if (drbd_recv_header(connection, &pi)) + update_receiver_timing_details(connection, drbd_recv_header_maybe_unplug); + if (drbd_recv_header_maybe_unplug(connection, &pi)) goto err_out; cmd = &drbd_cmd_handler[pi.cmd]; @@ -5375,8 +5416,11 @@ int drbd_receiver(struct drbd_thread *thi) } } while (h == 0); - if (h > 0) + if (h > 0) { + blk_start_plug(&connection->receiver_plug); drbdd(connection); + blk_finish_plug(&connection->receiver_plug); + } conn_disconnect(connection); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index f6e865b2d543..de8566e55334 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -36,14 +36,18 @@ static bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector, /* Update disk stats at start of I/O request */ static void _drbd_start_io_acct(struct drbd_device *device, struct drbd_request *req) { - generic_start_io_acct(bio_data_dir(req->master_bio), req->i.size >> 9, - &device->vdisk->part0); + struct request_queue *q = device->rq_queue; + + generic_start_io_acct(q, bio_data_dir(req->master_bio), + req->i.size >> 9, &device->vdisk->part0); } /* Update disk stats when completing request upwards */ static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *req) { - generic_end_io_acct(bio_data_dir(req->master_bio), + struct request_queue *q = device->rq_queue; + + generic_end_io_acct(q, bio_data_dir(req->master_bio), &device->vdisk->part0, req->start_jif); } @@ -1175,7 +1179,7 @@ drbd_submit_req_private_bio(struct drbd_request *req) else type = DRBD_FAULT_DT_RD; - bio->bi_bdev = device->ldev->backing_bdev; + bio_set_dev(bio, device->ldev->backing_bdev); /* State may have changed since we grabbed our reference on the * ->ldev member. Double check, and short-circuit to endio. @@ -1275,6 +1279,57 @@ static bool may_do_writes(struct drbd_device *device) return s.disk == D_UP_TO_DATE || s.pdsk == D_UP_TO_DATE; } +struct drbd_plug_cb { + struct blk_plug_cb cb; + struct drbd_request *most_recent_req; + /* do we need more? */ +}; + +static void drbd_unplug(struct blk_plug_cb *cb, bool from_schedule) +{ + struct drbd_plug_cb *plug = container_of(cb, struct drbd_plug_cb, cb); + struct drbd_resource *resource = plug->cb.data; + struct drbd_request *req = plug->most_recent_req; + + kfree(cb); + if (!req) + return; + + spin_lock_irq(&resource->req_lock); + /* In case the sender did not process it yet, raise the flag to + * have it followed with P_UNPLUG_REMOTE just after. */ + req->rq_state |= RQ_UNPLUG; + /* but also queue a generic unplug */ + drbd_queue_unplug(req->device); + kref_put(&req->kref, drbd_req_destroy); + spin_unlock_irq(&resource->req_lock); +} + +static struct drbd_plug_cb* drbd_check_plugged(struct drbd_resource *resource) +{ + /* A lot of text to say + * return (struct drbd_plug_cb*)blk_check_plugged(); */ + struct drbd_plug_cb *plug; + struct blk_plug_cb *cb = blk_check_plugged(drbd_unplug, resource, sizeof(*plug)); + + if (cb) + plug = container_of(cb, struct drbd_plug_cb, cb); + else + plug = NULL; + return plug; +} + +static void drbd_update_plug(struct drbd_plug_cb *plug, struct drbd_request *req) +{ + struct drbd_request *tmp = plug->most_recent_req; + /* Will be sent to some peer. + * Remember to tag it with UNPLUG_REMOTE on unplug */ + kref_get(&req->kref); + plug->most_recent_req = req; + if (tmp) + kref_put(&tmp->kref, drbd_req_destroy); +} + static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req) { struct drbd_resource *resource = device->resource; @@ -1347,6 +1402,12 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request no_remote = true; } + if (no_remote == false) { + struct drbd_plug_cb *plug = drbd_check_plugged(resource); + if (plug) + drbd_update_plug(plug, req); + } + /* If it took the fast path in drbd_request_prepare, add it here. * The slow path has added it already. */ if (list_empty(&req->req_pending_master_completion)) @@ -1395,7 +1456,10 @@ void __drbd_make_request(struct drbd_device *device, struct bio *bio, unsigned l static void submit_fast_path(struct drbd_device *device, struct list_head *incoming) { + struct blk_plug plug; struct drbd_request *req, *tmp; + + blk_start_plug(&plug); list_for_each_entry_safe(req, tmp, incoming, tl_requests) { const int rw = bio_data_dir(req->master_bio); @@ -1413,6 +1477,7 @@ static void submit_fast_path(struct drbd_device *device, struct list_head *incom list_del_init(&req->tl_requests); drbd_send_and_submit(device, req); } + blk_finish_plug(&plug); } static bool prepare_al_transaction_nonblock(struct drbd_device *device, @@ -1420,12 +1485,12 @@ static bool prepare_al_transaction_nonblock(struct drbd_device *device, struct list_head *pending, struct list_head *later) { - struct drbd_request *req, *tmp; + struct drbd_request *req; int wake = 0; int err; spin_lock_irq(&device->al_lock); - list_for_each_entry_safe(req, tmp, incoming, tl_requests) { + while ((req = list_first_entry_or_null(incoming, struct drbd_request, tl_requests))) { err = drbd_al_begin_io_nonblock(device, &req->i); if (err == -ENOBUFS) break; @@ -1442,17 +1507,20 @@ static bool prepare_al_transaction_nonblock(struct drbd_device *device, return !list_empty(pending); } -void send_and_submit_pending(struct drbd_device *device, struct list_head *pending) +static void send_and_submit_pending(struct drbd_device *device, struct list_head *pending) { - struct drbd_request *req, *tmp; + struct blk_plug plug; + struct drbd_request *req; - list_for_each_entry_safe(req, tmp, pending, tl_requests) { + blk_start_plug(&plug); + while ((req = list_first_entry_or_null(pending, struct drbd_request, tl_requests))) { req->rq_state |= RQ_IN_ACT_LOG; req->in_actlog_jif = jiffies; atomic_dec(&device->ap_actlog_cnt); list_del_init(&req->tl_requests); drbd_send_and_submit(device, req); } + blk_finish_plug(&plug); } void do_submit(struct work_struct *ws) diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 9e1866ab238f..a2254f825601 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -212,6 +212,11 @@ enum drbd_req_state_bits { /* Should call drbd_al_complete_io() for this request... */ __RQ_IN_ACT_LOG, + /* This was the most recent request during some blk_finish_plug() + * or its implicit from-schedule equivalent. + * We may use it as hint to send a P_UNPLUG_REMOTE */ + __RQ_UNPLUG, + /* The peer has sent a retry ACK */ __RQ_POSTPONED, @@ -249,6 +254,7 @@ enum drbd_req_state_bits { #define RQ_WSAME (1UL << __RQ_WSAME) #define RQ_UNMAP (1UL << __RQ_UNMAP) #define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG) +#define RQ_UNPLUG (1UL << __RQ_UNPLUG) #define RQ_POSTPONED (1UL << __RQ_POSTPONED) #define RQ_COMPLETION_SUSP (1UL << __RQ_COMPLETION_SUSP) #define RQ_EXP_RECEIVE_ACK (1UL << __RQ_EXP_RECEIVE_ACK) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index eea0c4aec978..0813c654c893 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -346,7 +346,7 @@ static enum drbd_role min_role(enum drbd_role role1, enum drbd_role role2) enum drbd_role conn_highest_role(struct drbd_connection *connection) { - enum drbd_role role = R_UNKNOWN; + enum drbd_role role = R_SECONDARY; struct drbd_peer_device *peer_device; int vnr; @@ -579,11 +579,14 @@ drbd_req_state(struct drbd_device *device, union drbd_state mask, unsigned long flags; union drbd_state os, ns; enum drbd_state_rv rv; + void *buffer = NULL; init_completion(&done); if (f & CS_SERIALIZE) mutex_lock(device->state_mutex); + if (f & CS_INHIBIT_MD_IO) + buffer = drbd_md_get_buffer(device, __func__); spin_lock_irqsave(&device->resource->req_lock, flags); os = drbd_read_state(device); @@ -636,6 +639,8 @@ drbd_req_state(struct drbd_device *device, union drbd_state mask, } abort: + if (buffer) + drbd_md_put_buffer(device); if (f & CS_SERIALIZE) mutex_unlock(device->state_mutex); @@ -664,6 +669,47 @@ _drbd_request_state(struct drbd_device *device, union drbd_state mask, return rv; } +/* + * We grab drbd_md_get_buffer(), because we don't want to "fail" the disk while + * there is IO in-flight: the transition into D_FAILED for detach purposes + * may get misinterpreted as actual IO error in a confused endio function. + * + * We wrap it all into wait_event(), to retry in case the drbd_req_state() + * returns SS_IN_TRANSIENT_STATE. + * + * To avoid potential deadlock with e.g. the receiver thread trying to grab + * drbd_md_get_buffer() while trying to get out of the "transient state", we + * need to grab and release the meta data buffer inside of that wait_event loop. + */ +static enum drbd_state_rv +request_detach(struct drbd_device *device) +{ + return drbd_req_state(device, NS(disk, D_FAILED), + CS_VERBOSE | CS_ORDERED | CS_INHIBIT_MD_IO); +} + +enum drbd_state_rv +drbd_request_detach_interruptible(struct drbd_device *device) +{ + enum drbd_state_rv rv; + int ret; + + drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */ + wait_event_interruptible(device->state_wait, + (rv = request_detach(device)) != SS_IN_TRANSIENT_STATE); + drbd_resume_io(device); + + ret = wait_event_interruptible(device->misc_wait, + device->state.disk != D_FAILED); + + if (rv == SS_IS_DISKLESS) + rv = SS_NOTHING_TO_DO; + if (ret) + rv = ERR_INTR; + + return rv; +} + enum drbd_state_rv _drbd_request_state_holding_state_mutex(struct drbd_device *device, union drbd_state mask, union drbd_state val, enum chg_state_flags f) diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index 6c9d5d4a8a75..0276c98fbbdd 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -71,6 +71,10 @@ enum chg_state_flags { CS_DC_SUSP = 1 << 10, CS_DC_MASK = CS_DC_ROLE + CS_DC_PEER + CS_DC_CONN + CS_DC_DISK + CS_DC_PDSK, CS_IGN_OUTD_FAIL = 1 << 11, + + /* Make sure no meta data IO is in flight, by calling + * drbd_md_get_buffer(). Used for graceful detach. */ + CS_INHIBIT_MD_IO = 1 << 12, }; /* drbd_dev_state and drbd_state are different types. This is to stress the @@ -156,6 +160,10 @@ static inline int drbd_request_state(struct drbd_device *device, return _drbd_request_state(device, mask, val, CS_VERBOSE + CS_ORDERED); } +/* for use in adm_detach() (drbd_adm_detach(), drbd_adm_down()) */ +enum drbd_state_rv +drbd_request_detach_interruptible(struct drbd_device *device); + enum drbd_role conn_highest_role(struct drbd_connection *connection); enum drbd_role conn_highest_peer(struct drbd_connection *connection); enum drbd_disk_state conn_highest_disk(struct drbd_connection *connection); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 1d8726a8df34..03471b3fce86 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -65,6 +65,11 @@ void drbd_md_endio(struct bio *bio) device = bio->bi_private; device->md_io.error = blk_status_to_errno(bio->bi_status); + /* special case: drbd_md_read() during drbd_adm_attach() */ + if (device->ldev) + put_ldev(device); + bio_put(bio); + /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able * to timeout on the lower level device, and eventually detach from it. * If this io completion runs after that timeout expired, this @@ -79,9 +84,6 @@ void drbd_md_endio(struct bio *bio) drbd_md_put_buffer(device); device->md_io.done = 1; wake_up(&device->misc_wait); - bio_put(bio); - if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */ - put_ldev(device); } /* reads on behalf of the partner, @@ -128,6 +130,14 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l block_id = peer_req->block_id; peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO; + if (peer_req->flags & EE_WAS_ERROR) { + /* In protocol != C, we usually do not send write acks. + * In case of a write error, send the neg ack anyways. */ + if (!__test_and_set_bit(__EE_SEND_WRITE_ACK, &peer_req->flags)) + inc_unacked(device); + drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size); + } + spin_lock_irqsave(&device->resource->req_lock, flags); device->writ_cnt += peer_req->i.size >> 9; list_move_tail(&peer_req->w.list, &device->done_ee); @@ -195,7 +205,8 @@ void drbd_peer_request_endio(struct bio *bio) } } -void drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device) +static void +drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device) { panic("drbd%u %s/%u potential random memory corruption caused by delayed completion of aborted local request\n", device->minor, device->resource->name, device->vnr); @@ -1382,18 +1393,22 @@ static int drbd_send_barrier(struct drbd_connection *connection) return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0); } +static int pd_send_unplug_remote(struct drbd_peer_device *pd) +{ + struct drbd_socket *sock = &pd->connection->data; + if (!drbd_prepare_command(pd, sock)) + return -EIO; + return drbd_send_command(pd, sock, P_UNPLUG_REMOTE, 0, NULL, 0); +} + int w_send_write_hint(struct drbd_work *w, int cancel) { struct drbd_device *device = container_of(w, struct drbd_device, unplug_work); - struct drbd_socket *sock; if (cancel) return 0; - sock = &first_peer_device(device)->connection->data; - if (!drbd_prepare_command(first_peer_device(device), sock)) - return -EIO; - return drbd_send_command(first_peer_device(device), sock, P_UNPLUG_REMOTE, 0, NULL, 0); + return pd_send_unplug_remote(first_peer_device(device)); } static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch) @@ -1455,6 +1470,7 @@ int w_send_dblock(struct drbd_work *w, int cancel) struct drbd_device *device = req->device; struct drbd_peer_device *const peer_device = first_peer_device(device); struct drbd_connection *connection = peer_device->connection; + bool do_send_unplug = req->rq_state & RQ_UNPLUG; int err; if (unlikely(cancel)) { @@ -1470,6 +1486,9 @@ int w_send_dblock(struct drbd_work *w, int cancel) err = drbd_send_dblock(peer_device, req); req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); + if (do_send_unplug && !err) + pd_send_unplug_remote(peer_device); + return err; } @@ -1484,6 +1503,7 @@ int w_send_read_req(struct drbd_work *w, int cancel) struct drbd_device *device = req->device; struct drbd_peer_device *const peer_device = first_peer_device(device); struct drbd_connection *connection = peer_device->connection; + bool do_send_unplug = req->rq_state & RQ_UNPLUG; int err; if (unlikely(cancel)) { @@ -1501,6 +1521,9 @@ int w_send_read_req(struct drbd_work *w, int cancel) req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); + if (do_send_unplug && !err) + pd_send_unplug_remote(peer_device); + return err; } @@ -1513,7 +1536,7 @@ int w_restart_disk_io(struct drbd_work *w, int cancel) drbd_al_begin_io(device, &req->i); drbd_req_make_private_bio(req, req->master_bio); - req->private_bio->bi_bdev = device->ldev->backing_bdev; + bio_set_dev(req->private_bio, device->ldev->backing_bdev); generic_make_request(req->private_bio); return 0; @@ -1733,6 +1756,11 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) return; } + if (!connection) { + drbd_err(device, "No connection to peer, aborting!\n"); + return; + } + if (!test_bit(B_RS_H_DONE, &device->flags)) { if (side == C_SYNC_TARGET) { /* Since application IO was locked out during C_WF_BITMAP_T and |