diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-11 11:43:44 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-11 11:43:44 -0700 |
commit | 1ddeeb2a058d7b2a58ed9e820396b4ceb715d529 (patch) | |
tree | 32a27b8eb1c538239b641292d77dc1a8cee8ee97 /drivers/block/drbd | |
parent | d2c84bdce25a678c1e1f116d65b58790bd241af0 (diff) | |
parent | 5205a4aa8fc9454853b705b69611c80e9c644283 (diff) |
Merge tag 'for-6.9/block-20240310' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- MD pull requests via Song:
- Cleanup redundant checks (Yu Kuai)
- Remove deprecated headers (Marc Zyngier, Song Liu)
- Concurrency fixes (Li Lingfeng)
- Memory leak fix (Li Nan)
- Refactor raid1 read_balance (Yu Kuai, Paul Luse)
- Clean up and fix for md_ioctl (Li Nan)
- Other small fixes (Gui-Dong Han, Heming Zhao)
- MD atomic limits (Christoph)
- NVMe pull request via Keith:
- RDMA target enhancements (Max)
- Fabrics fixes (Max, Guixin, Hannes)
- Atomic queue_limits usage (Christoph)
- Const use for class_register (Ricardo)
- Identification error handling fixes (Shin'ichiro, Keith)
- Improvement and cleanup for cached request handling (Christoph)
- Moving towards atomic queue limits. Core changes and driver bits so
far (Christoph)
- Fix UAF issues in aoeblk (Chun-Yi)
- Zoned fix and cleanups (Damien)
- s390 dasd cleanups and fixes (Jan, Miroslav)
- Block issue timestamp caching (me)
- noio scope guarding for zoned IO (Johannes)
- block/nvme PI improvements (Kanchan)
- Ability to terminate long running discard loop (Keith)
- bdev revalidation fix (Li)
- Get rid of old nr_queues hack for kdump kernels (Ming)
- Support for async deletion of ublk (Ming)
- Improve IRQ bio recycling (Pavel)
- Factor in CPU capacity for remote vs local completion (Qais)
- Add shared_tags configfs entry for null_blk (Shin'ichiro
- Fix for a regression in page refcounts introduced by the folio
unification (Tony)
- Misc fixes and cleanups (Arnd, Colin, John, Kunwu, Li, Navid,
Ricardo, Roman, Tang, Uwe)
* tag 'for-6.9/block-20240310' of git://git.kernel.dk/linux: (221 commits)
block: partitions: only define function mac_fix_string for CONFIG_PPC_PMAC
block/swim: Convert to platform remove callback returning void
cdrom: gdrom: Convert to platform remove callback returning void
block: remove disk_stack_limits
md: remove mddev->queue
md: don't initialize queue limits
md/raid10: use the atomic queue limit update APIs
md/raid5: use the atomic queue limit update APIs
md/raid1: use the atomic queue limit update APIs
md/raid0: use the atomic queue limit update APIs
md: add queue limit helpers
md: add a mddev_is_dm helper
md: add a mddev_add_trace_msg helper
md: add a mddev_trace_remap helper
bcache: move calculation of stripe_size and io_opt into bcache_device_init
virtio_blk: Do not use disk_set_max_open/active_zones()
aoe: fix the potential use-after-free problem in aoecmd_cfg_pkts
block: move capacity validation to blkpg_do_ioctl()
block: prevent division by zero in blk_rq_stat_sum()
drbd: atomically update queue limits in drbd_reconsider_queue_parameters
...
Diffstat (limited to 'drivers/block/drbd')
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 17 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 210 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_state.c | 24 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_state_change.h | 8 |
4 files changed, 131 insertions, 128 deletions
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 6bc86106c7b2..113b441d4d36 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2690,6 +2690,14 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig int id; int vnr = adm_ctx->volume; enum drbd_ret_code err = ERR_NOMEM; + struct queue_limits lim = { + /* + * Setting the max_hw_sectors to an odd value of 8kibyte here. + * This triggers a max_bio_size message upon first attach or + * connect. + */ + .max_hw_sectors = DRBD_MAX_BIO_SIZE_SAFE >> 8, + }; device = minor_to_device(minor); if (device) @@ -2708,9 +2716,11 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_init_set_defaults(device); - disk = blk_alloc_disk(NUMA_NO_NODE); - if (!disk) + disk = blk_alloc_disk(&lim, NUMA_NO_NODE); + if (IS_ERR(disk)) { + err = PTR_ERR(disk); goto out_no_disk; + } device->vdisk = disk; device->rq_queue = disk->queue; @@ -2727,9 +2737,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue); blk_queue_write_cache(disk->queue, true, true); - /* Setting the max_hw_sectors to an odd value of 8kibyte here - This triggers a max_bio_size message upon first attach or connect */ - blk_queue_max_hw_sectors(disk->queue, DRBD_MAX_BIO_SIZE_SAFE >> 8); device->md_io.page = alloc_page(GFP_KERNEL); if (!device->md_io.page) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 6aed67278e8b..5d65c9754d83 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1189,9 +1189,31 @@ static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc) return 0; } -static void blk_queue_discard_granularity(struct request_queue *q, unsigned int granularity) +static unsigned int drbd_max_peer_bio_size(struct drbd_device *device) { - q->limits.discard_granularity = granularity; + /* + * We may ignore peer limits if the peer is modern enough. From 8.3.8 + * onwards the peer can use multiple BIOs for a single peer_request. + */ + if (device->state.conn < C_WF_REPORT_PARAMS) + return device->peer_max_bio_size; + + if (first_peer_device(device)->connection->agreed_pro_version < 94) + return min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); + + /* + * Correct old drbd (up to 8.3.7) if it believes it can do more than + * 32KiB. + */ + if (first_peer_device(device)->connection->agreed_pro_version == 94) + return DRBD_MAX_SIZE_H80_PACKET; + + /* + * drbd 8.3.8 onwards, before 8.4.0 + */ + if (first_peer_device(device)->connection->agreed_pro_version < 100) + return DRBD_MAX_BIO_SIZE_P95; + return DRBD_MAX_BIO_SIZE; } static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection) @@ -1204,149 +1226,119 @@ static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection) return AL_EXTENT_SIZE >> 9; } -static void decide_on_discard_support(struct drbd_device *device, +static bool drbd_discard_supported(struct drbd_connection *connection, struct drbd_backing_dev *bdev) { - struct drbd_connection *connection = - first_peer_device(device)->connection; - struct request_queue *q = device->rq_queue; - unsigned int max_discard_sectors; - if (bdev && !bdev_max_discard_sectors(bdev->backing_bdev)) - goto not_supported; + return false; if (connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) { drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n"); - goto not_supported; + return false; } - /* - * We don't care for the granularity, really. - * - * Stacking limits below should fix it for the local device. Whether or - * not it is a suitable granularity on the remote device is not our - * problem, really. If you care, you need to use devices with similar - * topology on all peers. - */ - blk_queue_discard_granularity(q, 512); - max_discard_sectors = drbd_max_discard_sectors(connection); - blk_queue_max_discard_sectors(q, max_discard_sectors); - blk_queue_max_write_zeroes_sectors(q, max_discard_sectors); - return; - -not_supported: - blk_queue_discard_granularity(q, 0); - blk_queue_max_discard_sectors(q, 0); + return true; } -static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q) +/* This is the workaround for "bio would need to, but cannot, be split" */ +static unsigned int drbd_backing_dev_max_segments(struct drbd_device *device) { - /* Fixup max_write_zeroes_sectors after blk_stack_limits(): - * if we can handle "zeroes" efficiently on the protocol, - * we want to do that, even if our backend does not announce - * max_write_zeroes_sectors itself. */ - struct drbd_connection *connection = first_peer_device(device)->connection; - /* If the peer announces WZEROES support, use it. Otherwise, rather - * send explicit zeroes than rely on some discard-zeroes-data magic. */ - if (connection->agreed_features & DRBD_FF_WZEROES) - q->limits.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS; - else - q->limits.max_write_zeroes_sectors = 0; -} + unsigned int max_segments; -static void fixup_discard_support(struct drbd_device *device, struct request_queue *q) -{ - unsigned int max_discard = device->rq_queue->limits.max_discard_sectors; - unsigned int discard_granularity = - device->rq_queue->limits.discard_granularity >> SECTOR_SHIFT; + rcu_read_lock(); + max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs; + rcu_read_unlock(); - if (discard_granularity > max_discard) { - blk_queue_discard_granularity(q, 0); - blk_queue_max_discard_sectors(q, 0); - } + if (!max_segments) + return BLK_MAX_SEGMENTS; + return max_segments; } -static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev, - unsigned int max_bio_size, struct o_qlim *o) +void drbd_reconsider_queue_parameters(struct drbd_device *device, + struct drbd_backing_dev *bdev, struct o_qlim *o) { + struct drbd_connection *connection = + first_peer_device(device)->connection; struct request_queue * const q = device->rq_queue; - unsigned int max_hw_sectors = max_bio_size >> 9; - unsigned int max_segments = 0; + unsigned int now = queue_max_hw_sectors(q) << 9; + struct queue_limits lim; struct request_queue *b = NULL; - struct disk_conf *dc; + unsigned int new; if (bdev) { b = bdev->backing_bdev->bd_disk->queue; - max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); - rcu_read_lock(); - dc = rcu_dereference(device->ldev->disk_conf); - max_segments = dc->max_bio_bvecs; - rcu_read_unlock(); - - blk_set_stacking_limits(&q->limits); + device->local_max_bio_size = + queue_max_hw_sectors(b) << SECTOR_SHIFT; } - blk_queue_max_hw_sectors(q, max_hw_sectors); - /* This is the workaround for "bio would need to, but cannot, be split" */ - blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); - blk_queue_segment_boundary(q, PAGE_SIZE-1); - decide_on_discard_support(device, bdev); - - if (b) { - blk_stack_limits(&q->limits, &b->limits, 0); - disk_update_readahead(device->vdisk); + /* + * We may later detach and re-attach on a disconnected Primary. Avoid + * decreasing the value in this case. + * + * We want to store what we know the peer DRBD can handle, not what the + * peer IO backend can handle. + */ + new = min3(DRBD_MAX_BIO_SIZE, device->local_max_bio_size, + max(drbd_max_peer_bio_size(device), device->peer_max_bio_size)); + if (new != now) { + if (device->state.role == R_PRIMARY && new < now) + drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", + new, now); + drbd_info(device, "max BIO size = %u\n", new); } - fixup_write_zeroes(device, q); - fixup_discard_support(device, q); -} - -void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o) -{ - unsigned int now, new, local, peer; - - now = queue_max_hw_sectors(device->rq_queue) << 9; - local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */ - peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */ + lim = queue_limits_start_update(q); if (bdev) { - local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9; - device->local_max_bio_size = local; + blk_set_stacking_limits(&lim); + lim.max_segments = drbd_backing_dev_max_segments(device); + } else { + lim.max_segments = BLK_MAX_SEGMENTS; } - local = min(local, DRBD_MAX_BIO_SIZE); - /* We may ignore peer limits if the peer is modern enough. - Because new from 8.3.8 onwards the peer can use multiple - BIOs for a single peer_request */ - if (device->state.conn >= C_WF_REPORT_PARAMS) { - if (first_peer_device(device)->connection->agreed_pro_version < 94) - peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); - /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */ - else if (first_peer_device(device)->connection->agreed_pro_version == 94) - peer = DRBD_MAX_SIZE_H80_PACKET; - else if (first_peer_device(device)->connection->agreed_pro_version < 100) - peer = DRBD_MAX_BIO_SIZE_P95; /* drbd 8.3.8 onwards, before 8.4.0 */ - else - peer = DRBD_MAX_BIO_SIZE; + lim.max_hw_sectors = new >> SECTOR_SHIFT; + lim.seg_boundary_mask = PAGE_SIZE - 1; - /* We may later detach and re-attach on a disconnected Primary. - * Avoid this setting to jump back in that case. - * We want to store what we know the peer DRBD can handle, - * not what the peer IO backend can handle. */ - if (peer > device->peer_max_bio_size) - device->peer_max_bio_size = peer; + /* + * We don't care for the granularity, really. + * + * Stacking limits below should fix it for the local device. Whether or + * not it is a suitable granularity on the remote device is not our + * problem, really. If you care, you need to use devices with similar + * topology on all peers. + */ + if (drbd_discard_supported(connection, bdev)) { + lim.discard_granularity = 512; + lim.max_hw_discard_sectors = + drbd_max_discard_sectors(connection); + } else { + lim.discard_granularity = 0; + lim.max_hw_discard_sectors = 0; } - new = min(local, peer); - if (device->state.role == R_PRIMARY && new < now) - drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now); + if (bdev) + blk_stack_limits(&lim, &b->limits, 0); - if (new != now) - drbd_info(device, "max BIO size = %u\n", new); + /* + * If we can handle "zeroes" efficiently on the protocol, we want to do + * that, even if our backend does not announce max_write_zeroes_sectors + * itself. + */ + if (connection->agreed_features & DRBD_FF_WZEROES) + lim.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS; + else + lim.max_write_zeroes_sectors = 0; + + if ((lim.discard_granularity >> SECTOR_SHIFT) > + lim.max_hw_discard_sectors) { + lim.discard_granularity = 0; + lim.max_hw_discard_sectors = 0; + } - drbd_setup_queue_param(device, bdev, new, o); + if (queue_limits_commit_update(q, &lim)) + drbd_err(device, "setting new queue limits failed\n"); } /* Starts the worker thread */ diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 287a8d1d3f70..e858e7e0383f 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1542,9 +1542,10 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device, int notify_resource_state_change(struct sk_buff *skb, unsigned int seq, - struct drbd_resource_state_change *resource_state_change, + void *state_change, enum drbd_notification_type type) { + struct drbd_resource_state_change *resource_state_change = state_change; struct drbd_resource *resource = resource_state_change->resource; struct resource_info resource_info = { .res_role = resource_state_change->role[NEW], @@ -1558,13 +1559,14 @@ int notify_resource_state_change(struct sk_buff *skb, int notify_connection_state_change(struct sk_buff *skb, unsigned int seq, - struct drbd_connection_state_change *connection_state_change, + void *state_change, enum drbd_notification_type type) { - struct drbd_connection *connection = connection_state_change->connection; + struct drbd_connection_state_change *p = state_change; + struct drbd_connection *connection = p->connection; struct connection_info connection_info = { - .conn_connection_state = connection_state_change->cstate[NEW], - .conn_role = connection_state_change->peer_role[NEW], + .conn_connection_state = p->cstate[NEW], + .conn_role = p->peer_role[NEW], }; return notify_connection_state(skb, seq, connection, &connection_info, type); @@ -1572,9 +1574,10 @@ int notify_connection_state_change(struct sk_buff *skb, int notify_device_state_change(struct sk_buff *skb, unsigned int seq, - struct drbd_device_state_change *device_state_change, + void *state_change, enum drbd_notification_type type) { + struct drbd_device_state_change *device_state_change = state_change; struct drbd_device *device = device_state_change->device; struct device_info device_info = { .dev_disk_state = device_state_change->disk_state[NEW], @@ -1585,9 +1588,10 @@ int notify_device_state_change(struct sk_buff *skb, int notify_peer_device_state_change(struct sk_buff *skb, unsigned int seq, - struct drbd_peer_device_state_change *p, + void *state_change, enum drbd_notification_type type) { + struct drbd_peer_device_state_change *p = state_change; struct drbd_peer_device *peer_device = p->peer_device; struct peer_device_info peer_device_info = { .peer_repl_state = p->repl_state[NEW], @@ -1605,8 +1609,8 @@ static void broadcast_state_change(struct drbd_state_change *state_change) struct drbd_resource_state_change *resource_state_change = &state_change->resource[0]; bool resource_state_has_changed; unsigned int n_device, n_connection, n_peer_device, n_peer_devices; - int (*last_func)(struct sk_buff *, unsigned int, void *, - enum drbd_notification_type) = NULL; + int (*last_func)(struct sk_buff *, unsigned int, + void *, enum drbd_notification_type) = NULL; void *last_arg = NULL; #define HAS_CHANGED(state) ((state)[OLD] != (state)[NEW]) @@ -1616,7 +1620,7 @@ static void broadcast_state_change(struct drbd_state_change *state_change) }) #define REMEMBER_STATE_CHANGE(func, arg, type) \ ({ FINAL_STATE_CHANGE(type | NOTIFY_CONTINUES); \ - last_func = (typeof(last_func))func; \ + last_func = func; \ last_arg = arg; \ }) diff --git a/drivers/block/drbd/drbd_state_change.h b/drivers/block/drbd/drbd_state_change.h index 9d78d8e3912e..a56a57d67686 100644 --- a/drivers/block/drbd/drbd_state_change.h +++ b/drivers/block/drbd/drbd_state_change.h @@ -46,19 +46,19 @@ extern void forget_state_change(struct drbd_state_change *); extern int notify_resource_state_change(struct sk_buff *, unsigned int, - struct drbd_resource_state_change *, + void *, enum drbd_notification_type type); extern int notify_connection_state_change(struct sk_buff *, unsigned int, - struct drbd_connection_state_change *, + void *, enum drbd_notification_type type); extern int notify_device_state_change(struct sk_buff *, unsigned int, - struct drbd_device_state_change *, + void *, enum drbd_notification_type type); extern int notify_peer_device_state_change(struct sk_buff *, unsigned int, - struct drbd_peer_device_state_change *, + void *, enum drbd_notification_type type); #endif /* DRBD_STATE_CHANGE_H */ |