summaryrefslogtreecommitdiff
path: root/drivers/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-10-07 09:19:14 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-10-07 09:19:14 -0700
commit513389809e138ae903b6ef43c1d5d2ffaf4dca17 (patch)
treec71e478fab1568da4706868b14eb67a75c148a8b /drivers/block
parent0a78a376ef3c2f3d397df48909f00cd75f92137a (diff)
parent30514bd2dd4e86a3ecfd6a93a3eadf7b9ea164a0 (diff)
Merge tag 'for-6.1/block-2022-10-03' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: - NVMe pull requests via Christoph: - handle number of queue changes in the TCP and RDMA drivers (Daniel Wagner) - allow changing the number of queues in nvmet (Daniel Wagner) - also consider host_iface when checking ip options (Daniel Wagner) - don't map pages which can't come from HIGHMEM (Fabio M. De Francesco) - avoid unnecessary flush bios in nvmet (Guixin Liu) - shrink and better pack the nvme_iod structure (Keith Busch) - add comment for unaligned "fake" nqn (Linjun Bao) - print actual source IP address through sysfs "address" attr (Martin Belanger) - various cleanups (Jackie Liu, Wolfram Sang, Genjian Zhang) - handle effects after freeing the request (Keith Busch) - copy firmware_rev on each init (Keith Busch) - restrict management ioctls to admin (Keith Busch) - ensure subsystem reset is single threaded (Keith Busch) - report the actual number of tagset maps in nvme-pci (Keith Busch) - small fabrics authentication fixups (Christoph Hellwig) - add common code for tagset allocation and freeing (Christoph Hellwig) - stop using the request_queue in nvmet (Christoph Hellwig) - set min_align_mask before calculating max_hw_sectors (Rishabh Bhatnagar) - send a rediscover uevent when a persistent discovery controller reconnects (Sagi Grimberg) - misc nvmet-tcp fixes (Varun Prakash, zhenwei pi) - MD pull request via Song: - Various raid5 fix and clean up, by Logan Gunthorpe and David Sloan. - Raid10 performance optimization, by Yu Kuai. - sbitmap wakeup hang fixes (Hugh, Keith, Jan, Yu) - IO scheduler switching quisce fix (Keith) - s390/dasd block driver updates (Stefan) - support for recovery for the ublk driver (ZiyangZhang) - rnbd drivers fixes and updates (Guoqing, Santosh, ye, Christoph) - blk-mq and null_blk map fixes (Bart) - various bcache fixes (Coly, Jilin, Jules) - nbd signal hang fix (Shigeru) - block writeback throttling fix (Yu) - optimize the passthrough mapping handling (me) - prepare block cgroups to being gendisk based (Christoph) - get rid of an old PSI hack in the block layer, moving it to the callers instead where it belongs (Christoph) - blk-throttle fixes and cleanups (Yu) - misc fixes and cleanups (Liu Shixin, Liu Song, Miaohe, Pankaj, Ping-Xiang, Wolfram, Saurabh, Li Jinlin, Li Lei, Lin, Li zeming, Miaohe, Bart, Coly, Gaosheng * tag 'for-6.1/block-2022-10-03' of git://git.kernel.dk/linux: (162 commits) sbitmap: fix lockup while swapping block: add rationale for not using blk_mq_plug() when applicable block: adapt blk_mq_plug() to not plug for writes that require a zone lock s390/dasd: use blk_mq_alloc_disk blk-cgroup: don't update the blkg lookup hint in blkg_conf_prep nvmet: don't look at the request_queue in nvmet_bdev_set_limits nvmet: don't look at the request_queue in nvmet_bdev_zone_mgmt_emulate_all blk-mq: use quiesced elevator switch when reinitializing queues block: replace blk_queue_nowait with bdev_nowait nvme: remove nvme_ctrl_init_connect_q nvme-loop: use the tagset alloc/free helpers nvme-loop: store the generic nvme_ctrl in set->driver_data nvme-loop: initialize sqsize later nvme-fc: use the tagset alloc/free helpers nvme-fc: store the generic nvme_ctrl in set->driver_data nvme-fc: keep ctrl->sqsize in sync with opts->queue_size nvme-rdma: use the tagset alloc/free helpers nvme-rdma: store the generic nvme_ctrl in set->driver_data nvme-tcp: use the tagset alloc/free helpers nvme-tcp: store the generic nvme_ctrl in set->driver_data ...
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/aoe/aoeblk.c15
-rw-r--r--drivers/block/brd.c2
-rw-r--r--drivers/block/drbd/drbd_int.h1
-rw-r--r--drivers/block/drbd/drbd_nl.c2
-rw-r--r--drivers/block/drbd/drbd_receiver.c3
-rw-r--r--drivers/block/drbd/drbd_req.h2
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c12
-rw-r--r--drivers/block/nbd.c6
-rw-r--r--drivers/block/null_blk/main.c8
-rw-r--r--drivers/block/ps3vram.c2
-rw-r--r--drivers/block/rnbd/Makefile6
-rw-r--r--drivers/block/rnbd/rnbd-clt.c8
-rw-r--r--drivers/block/rnbd/rnbd-srv-dev.c43
-rw-r--r--drivers/block/rnbd/rnbd-srv-dev.h64
-rw-r--r--drivers/block/rnbd/rnbd-srv-trace.c17
-rw-r--r--drivers/block/rnbd/rnbd-srv-trace.h207
-rw-r--r--drivers/block/rnbd/rnbd-srv.c123
-rw-r--r--drivers/block/rnbd/rnbd-srv.h2
-rw-r--r--drivers/block/ublk_drv.c302
-rw-r--r--drivers/block/virtio_blk.c4
-rw-r--r--drivers/block/zram/zram_drv.c6
21 files changed, 598 insertions, 237 deletions
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 12b3ca8f6f4a..128722cf6c3c 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -108,7 +108,7 @@ static ssize_t aoedisk_show_payload(struct device *dev,
return sysfs_emit(page, "%lu\n", d->maxbcnt);
}
-static int aoedisk_debugfs_show(struct seq_file *s, void *ignored)
+static int aoe_debugfs_show(struct seq_file *s, void *ignored)
{
struct aoedev *d;
struct aoetgt **t, **te;
@@ -151,11 +151,7 @@ static int aoedisk_debugfs_show(struct seq_file *s, void *ignored)
return 0;
}
-
-static int aoe_debugfs_open(struct inode *inode, struct file *file)
-{
- return single_open(file, aoedisk_debugfs_show, inode->i_private);
-}
+DEFINE_SHOW_ATTRIBUTE(aoe_debugfs);
static DEVICE_ATTR(state, 0444, aoedisk_show_state, NULL);
static DEVICE_ATTR(mac, 0444, aoedisk_show_mac, NULL);
@@ -184,13 +180,6 @@ static const struct attribute_group *aoe_attr_groups[] = {
NULL,
};
-static const struct file_operations aoe_debugfs_fops = {
- .open = aoe_debugfs_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static void
aoedisk_add_debugfs(struct aoedev *d)
{
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 859499cd1ff8..20acc4a1fd6d 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -397,7 +397,7 @@ static int brd_alloc(int i)
disk->minors = max_part;
disk->fops = &brd_fops;
disk->private_data = brd;
- strlcpy(disk->disk_name, buf, DISK_NAME_LEN);
+ strscpy(disk->disk_name, buf, DISK_NAME_LEN);
set_capacity(disk, rd_size * 2);
/*
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index f15f2f041596..4d661282ff41 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1529,7 +1529,6 @@ extern int w_send_read_req(struct drbd_work *, int);
extern int w_e_reissue(struct drbd_work *, int);
extern int w_restart_disk_io(struct drbd_work *, int);
extern int w_send_out_of_sync(struct drbd_work *, int);
-extern int w_start_resync(struct drbd_work *, int);
extern void resync_timer_fn(struct timer_list *t);
extern void start_resync_timer_fn(struct timer_list *t);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 013d355a2033..864c98e74875 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -4752,7 +4752,7 @@ void notify_helper(enum drbd_notification_type type,
struct drbd_genlmsghdr *dh;
int err;
- strlcpy(helper_info.helper_name, name, sizeof(helper_info.helper_name));
+ strscpy(helper_info.helper_name, name, sizeof(helper_info.helper_name));
helper_info.helper_name_len = min(strlen(name), sizeof(helper_info.helper_name));
helper_info.helper_status = status;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index af4c7d65490b..c897c4572036 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2113,9 +2113,6 @@ static int receive_DataReply(struct drbd_connection *connection, struct packet_i
if (unlikely(!req))
return -EIO;
- /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
- * special casing it there for the various failure cases.
- * still no race with drbd_fail_pending_reads */
err = recv_dless_read(peer_device, req, sector, pi->size);
if (!err)
req_mod(req, DATA_RECEIVED);
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 511f39a08de4..6237fa1dcb0e 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -266,8 +266,6 @@ struct bio_and_error {
extern void start_new_tl_epoch(struct drbd_connection *connection);
extern void drbd_req_destroy(struct kref *kref);
-extern void _req_may_be_done(struct drbd_request *req,
- struct bio_and_error *m);
extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
struct bio_and_error *m);
extern void complete_master_bio(struct drbd_device *device,
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 562725d222a7..815d77ba6381 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -1397,15 +1397,15 @@ static void mtip_dump_identify(struct mtip_port *port)
if (!port->identify_valid)
return;
- strlcpy(cbuf, (char *)(port->identify+10), 21);
+ strscpy(cbuf, (char *)(port->identify + 10), 21);
dev_info(&port->dd->pdev->dev,
"Serial No.: %s\n", cbuf);
- strlcpy(cbuf, (char *)(port->identify+23), 9);
+ strscpy(cbuf, (char *)(port->identify + 23), 9);
dev_info(&port->dd->pdev->dev,
"Firmware Ver.: %s\n", cbuf);
- strlcpy(cbuf, (char *)(port->identify+27), 41);
+ strscpy(cbuf, (char *)(port->identify + 27), 41);
dev_info(&port->dd->pdev->dev, "Model: %s\n", cbuf);
dev_info(&port->dd->pdev->dev, "Security: %04x %s\n",
@@ -1421,13 +1421,13 @@ static void mtip_dump_identify(struct mtip_port *port)
pci_read_config_word(port->dd->pdev, PCI_REVISION_ID, &revid);
switch (revid & 0xFF) {
case 0x1:
- strlcpy(cbuf, "A0", 3);
+ strscpy(cbuf, "A0", 3);
break;
case 0x3:
- strlcpy(cbuf, "A2", 3);
+ strscpy(cbuf, "A2", 3);
break;
default:
- strlcpy(cbuf, "?", 2);
+ strscpy(cbuf, "?", 2);
break;
}
dev_info(&port->dd->pdev->dev,
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 6cec9ce23fd3..4762a03e1ffe 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1413,10 +1413,12 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd)
mutex_unlock(&nbd->config_lock);
ret = wait_event_interruptible(config->recv_wq,
atomic_read(&config->recv_threads) == 0);
- if (ret)
+ if (ret) {
sock_shutdown(nbd);
- flush_workqueue(nbd->recv_workq);
+ nbd_clear_que(nbd);
+ }
+ flush_workqueue(nbd->recv_workq);
mutex_lock(&nbd->config_lock);
nbd_bdev_reset(nbd);
/* user requested, ignore socket errors */
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index c451c477978f..1f154f92f4c2 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1528,7 +1528,7 @@ static bool should_requeue_request(struct request *rq)
return false;
}
-static int null_map_queues(struct blk_mq_tag_set *set)
+static void null_map_queues(struct blk_mq_tag_set *set)
{
struct nullb *nullb = set->driver_data;
int i, qoff;
@@ -1555,7 +1555,9 @@ static int null_map_queues(struct blk_mq_tag_set *set)
} else {
pr_warn("tag set has unexpected nr_hw_queues: %d\n",
set->nr_hw_queues);
- return -EINVAL;
+ WARN_ON_ONCE(true);
+ submit_queues = 1;
+ poll_queues = 0;
}
}
@@ -1577,8 +1579,6 @@ static int null_map_queues(struct blk_mq_tag_set *set)
qoff += map->nr_queues;
blk_mq_map_queues(map);
}
-
- return 0;
}
static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index e1d080f680ed..c76e0148eada 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -745,7 +745,7 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
gendisk->flags |= GENHD_FL_NO_PART;
gendisk->fops = &ps3vram_fops;
gendisk->private_data = dev;
- strlcpy(gendisk->disk_name, DEVICE_NAME, sizeof(gendisk->disk_name));
+ strscpy(gendisk->disk_name, DEVICE_NAME, sizeof(gendisk->disk_name));
set_capacity(gendisk, priv->size >> 9);
blk_queue_max_segments(gendisk->queue, BLK_MAX_SEGMENTS);
blk_queue_max_segment_size(gendisk->queue, BLK_MAX_SEGMENT_SIZE);
diff --git a/drivers/block/rnbd/Makefile b/drivers/block/rnbd/Makefile
index 5bb1a7ad1ada..40b31630822c 100644
--- a/drivers/block/rnbd/Makefile
+++ b/drivers/block/rnbd/Makefile
@@ -6,10 +6,12 @@ rnbd-client-y := rnbd-clt.o \
rnbd-clt-sysfs.o \
rnbd-common.o
+CFLAGS_rnbd-srv-trace.o = -I$(src)
+
rnbd-server-y := rnbd-common.o \
rnbd-srv.o \
- rnbd-srv-dev.o \
- rnbd-srv-sysfs.o
+ rnbd-srv-sysfs.o \
+ rnbd-srv-trace.o
obj-$(CONFIG_BLK_DEV_RNBD_CLIENT) += rnbd-client.o
obj-$(CONFIG_BLK_DEV_RNBD_SERVER) += rnbd-server.o
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index 04da33a22ef4..78334da74d8b 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -1159,13 +1159,11 @@ static int rnbd_rdma_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
{
struct rnbd_queue *q = hctx->driver_data;
struct rnbd_clt_dev *dev = q->dev;
- int cnt;
- cnt = rtrs_clt_rdma_cq_direct(dev->sess->rtrs, hctx->queue_num);
- return cnt;
+ return rtrs_clt_rdma_cq_direct(dev->sess->rtrs, hctx->queue_num);
}
-static int rnbd_rdma_map_queues(struct blk_mq_tag_set *set)
+static void rnbd_rdma_map_queues(struct blk_mq_tag_set *set)
{
struct rnbd_clt_session *sess = set->driver_data;
@@ -1194,8 +1192,6 @@ static int rnbd_rdma_map_queues(struct blk_mq_tag_set *set)
set->map[HCTX_TYPE_DEFAULT].nr_queues,
set->map[HCTX_TYPE_READ].nr_queues);
}
-
- return 0;
}
static struct blk_mq_ops rnbd_mq_ops = {
diff --git a/drivers/block/rnbd/rnbd-srv-dev.c b/drivers/block/rnbd/rnbd-srv-dev.c
deleted file mode 100644
index c63017f6e421..000000000000
--- a/drivers/block/rnbd/rnbd-srv-dev.c
+++ /dev/null
@@ -1,43 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RDMA Network Block Driver
- *
- * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved.
- * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved.
- * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved.
- */
-#undef pr_fmt
-#define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt
-
-#include "rnbd-srv-dev.h"
-#include "rnbd-log.h"
-
-struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags)
-{
- struct rnbd_dev *dev;
- int ret;
-
- dev = kzalloc(sizeof(*dev), GFP_KERNEL);
- if (!dev)
- return ERR_PTR(-ENOMEM);
-
- dev->blk_open_flags = flags;
- dev->bdev = blkdev_get_by_path(path, flags, THIS_MODULE);
- ret = PTR_ERR_OR_ZERO(dev->bdev);
- if (ret)
- goto err;
-
- dev->blk_open_flags = flags;
-
- return dev;
-
-err:
- kfree(dev);
- return ERR_PTR(ret);
-}
-
-void rnbd_dev_close(struct rnbd_dev *dev)
-{
- blkdev_put(dev->bdev, dev->blk_open_flags);
- kfree(dev);
-}
diff --git a/drivers/block/rnbd/rnbd-srv-dev.h b/drivers/block/rnbd/rnbd-srv-dev.h
deleted file mode 100644
index 8407d12f70af..000000000000
--- a/drivers/block/rnbd/rnbd-srv-dev.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * RDMA Network Block Driver
- *
- * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved.
- * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved.
- * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved.
- */
-#ifndef RNBD_SRV_DEV_H
-#define RNBD_SRV_DEV_H
-
-#include <linux/fs.h>
-#include "rnbd-proto.h"
-
-struct rnbd_dev {
- struct block_device *bdev;
- fmode_t blk_open_flags;
-};
-
-/**
- * rnbd_dev_open() - Open a device
- * @path: path to open
- * @flags: open flags
- */
-struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags);
-
-/**
- * rnbd_dev_close() - Close a device
- */
-void rnbd_dev_close(struct rnbd_dev *dev);
-
-void rnbd_endio(void *priv, int error);
-
-static inline int rnbd_dev_get_max_segs(const struct rnbd_dev *dev)
-{
- return queue_max_segments(bdev_get_queue(dev->bdev));
-}
-
-static inline int rnbd_dev_get_max_hw_sects(const struct rnbd_dev *dev)
-{
- return queue_max_hw_sectors(bdev_get_queue(dev->bdev));
-}
-
-static inline int rnbd_dev_get_secure_discard(const struct rnbd_dev *dev)
-{
- return bdev_max_secure_erase_sectors(dev->bdev);
-}
-
-static inline int rnbd_dev_get_max_discard_sects(const struct rnbd_dev *dev)
-{
- return bdev_max_discard_sectors(dev->bdev);
-}
-
-static inline int rnbd_dev_get_discard_granularity(const struct rnbd_dev *dev)
-{
- return bdev_get_queue(dev->bdev)->limits.discard_granularity;
-}
-
-static inline int rnbd_dev_get_discard_alignment(const struct rnbd_dev *dev)
-{
- return bdev_discard_alignment(dev->bdev);
-}
-
-#endif /* RNBD_SRV_DEV_H */
diff --git a/drivers/block/rnbd/rnbd-srv-trace.c b/drivers/block/rnbd/rnbd-srv-trace.c
new file mode 100644
index 000000000000..30f0895c18f5
--- /dev/null
+++ b/drivers/block/rnbd/rnbd-srv-trace.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * RDMA Network Block Driver
+ *
+ * Copyright (c) 2022 1&1 IONOS SE. All rights reserved.
+ */
+#include "rtrs.h"
+#include "rtrs-srv.h"
+#include "rnbd-srv.h"
+#include "rnbd-proto.h"
+
+/*
+ * We include this last to have the helpers above available for the trace
+ * event implementations.
+ */
+#define CREATE_TRACE_POINTS
+#include "rnbd-srv-trace.h"
diff --git a/drivers/block/rnbd/rnbd-srv-trace.h b/drivers/block/rnbd/rnbd-srv-trace.h
new file mode 100644
index 000000000000..8dedf73bdd28
--- /dev/null
+++ b/drivers/block/rnbd/rnbd-srv-trace.h
@@ -0,0 +1,207 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * RDMA Network Block Driver
+ *
+ * Copyright (c) 2022 1&1 IONOS SE. All rights reserved.
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rnbd_srv
+
+#if !defined(_TRACE_RNBD_SRV_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_RNBD_SRV_H
+
+#include <linux/tracepoint.h>
+
+struct rnbd_srv_session;
+struct rtrs_srv_op;
+
+DECLARE_EVENT_CLASS(rnbd_srv_link_class,
+ TP_PROTO(struct rnbd_srv_session *srv),
+
+ TP_ARGS(srv),
+
+ TP_STRUCT__entry(
+ __field(int, qdepth)
+ __string(sessname, srv->sessname)
+ ),
+
+ TP_fast_assign(
+ __entry->qdepth = srv->queue_depth;
+ __assign_str(sessname, srv->sessname);
+ ),
+
+ TP_printk("sessname: %s qdepth: %d",
+ __get_str(sessname),
+ __entry->qdepth
+ )
+);
+
+#define DEFINE_LINK_EVENT(name) \
+DEFINE_EVENT(rnbd_srv_link_class, name, \
+ TP_PROTO(struct rnbd_srv_session *srv), \
+ TP_ARGS(srv))
+
+DEFINE_LINK_EVENT(create_sess);
+DEFINE_LINK_EVENT(destroy_sess);
+
+TRACE_DEFINE_ENUM(RNBD_OP_READ);
+TRACE_DEFINE_ENUM(RNBD_OP_WRITE);
+TRACE_DEFINE_ENUM(RNBD_OP_FLUSH);
+TRACE_DEFINE_ENUM(RNBD_OP_DISCARD);
+TRACE_DEFINE_ENUM(RNBD_OP_SECURE_ERASE);
+TRACE_DEFINE_ENUM(RNBD_F_SYNC);
+TRACE_DEFINE_ENUM(RNBD_F_FUA);
+
+#define show_rnbd_rw_flags(x) \
+ __print_flags(x, "|", \
+ { RNBD_OP_READ, "READ" }, \
+ { RNBD_OP_WRITE, "WRITE" }, \
+ { RNBD_OP_FLUSH, "FLUSH" }, \
+ { RNBD_OP_DISCARD, "DISCARD" }, \
+ { RNBD_OP_SECURE_ERASE, "SECURE_ERASE" }, \
+ { RNBD_F_SYNC, "SYNC" }, \
+ { RNBD_F_FUA, "FUA" })
+
+TRACE_EVENT(process_rdma,
+ TP_PROTO(struct rnbd_srv_session *srv,
+ const struct rnbd_msg_io *msg,
+ struct rtrs_srv_op *id,
+ u32 datalen,
+ size_t usrlen),
+
+ TP_ARGS(srv, msg, id, datalen, usrlen),
+
+ TP_STRUCT__entry(
+ __string(sessname, srv->sessname)
+ __field(u8, dir)
+ __field(u8, ver)
+ __field(u32, device_id)
+ __field(u64, sector)
+ __field(u32, flags)
+ __field(u32, bi_size)
+ __field(u16, ioprio)
+ __field(u32, datalen)
+ __field(size_t, usrlen)
+ ),
+
+ TP_fast_assign(
+ __assign_str(sessname, srv->sessname);
+ __entry->dir = id->dir;
+ __entry->ver = srv->ver;
+ __entry->device_id = le32_to_cpu(msg->device_id);
+ __entry->sector = le64_to_cpu(msg->sector);
+ __entry->bi_size = le32_to_cpu(msg->bi_size);
+ __entry->flags = le32_to_cpu(msg->rw);
+ __entry->ioprio = le16_to_cpu(msg->prio);
+ __entry->datalen = datalen;
+ __entry->usrlen = usrlen;
+ ),
+
+ TP_printk("I/O req: sess: %s, type: %s, ver: %d, devid: %u, sector: %llu, bsize: %u, flags: %s, ioprio: %d, datalen: %u, usrlen: %zu",
+ __get_str(sessname),
+ __print_symbolic(__entry->dir,
+ { READ, "READ" },
+ { WRITE, "WRITE" }),
+ __entry->ver,
+ __entry->device_id,
+ __entry->sector,
+ __entry->bi_size,
+ show_rnbd_rw_flags(__entry->flags),
+ __entry->ioprio,
+ __entry->datalen,
+ __entry->usrlen
+ )
+);
+
+TRACE_EVENT(process_msg_sess_info,
+ TP_PROTO(struct rnbd_srv_session *srv,
+ const struct rnbd_msg_sess_info *msg),
+
+ TP_ARGS(srv, msg),
+
+ TP_STRUCT__entry(
+ __field(u8, proto_ver)
+ __field(u8, clt_ver)
+ __field(u8, srv_ver)
+ __string(sessname, srv->sessname)
+ ),
+
+ TP_fast_assign(
+ __entry->proto_ver = srv->ver;
+ __entry->clt_ver = msg->ver;
+ __entry->srv_ver = RNBD_PROTO_VER_MAJOR;
+ __assign_str(sessname, srv->sessname);
+ ),
+
+ TP_printk("Session %s using proto-ver %d (clt-ver: %d, srv-ver: %d)",
+ __get_str(sessname),
+ __entry->proto_ver,
+ __entry->clt_ver,
+ __entry->srv_ver
+ )
+);
+
+TRACE_DEFINE_ENUM(RNBD_ACCESS_RO);
+TRACE_DEFINE_ENUM(RNBD_ACCESS_RW);
+TRACE_DEFINE_ENUM(RNBD_ACCESS_MIGRATION);
+
+#define show_rnbd_access_mode(x) \
+ __print_symbolic(x, \
+ { RNBD_ACCESS_RO, "RO" }, \
+ { RNBD_ACCESS_RW, "RW" }, \
+ { RNBD_ACCESS_MIGRATION, "MIGRATION" })
+
+TRACE_EVENT(process_msg_open,
+ TP_PROTO(struct rnbd_srv_session *srv,
+ const struct rnbd_msg_open *msg),
+
+ TP_ARGS(srv, msg),
+
+ TP_STRUCT__entry(
+ __field(u8, access_mode)
+ __string(sessname, srv->sessname)
+ __string(dev_name, msg->dev_name)
+ ),
+
+ TP_fast_assign(
+ __entry->access_mode = msg->access_mode;
+ __assign_str(sessname, srv->sessname);
+ __assign_str(dev_name, msg->dev_name);
+ ),
+
+ TP_printk("Open message received: session='%s' path='%s' access_mode=%s",
+ __get_str(sessname),
+ __get_str(dev_name),
+ show_rnbd_access_mode(__entry->access_mode)
+ )
+);
+
+TRACE_EVENT(process_msg_close,
+ TP_PROTO(struct rnbd_srv_session *srv,
+ const struct rnbd_msg_close *msg),
+
+ TP_ARGS(srv, msg),
+
+ TP_STRUCT__entry(
+ __field(u32, device_id)
+ __string(sessname, srv->sessname)
+ ),
+
+ TP_fast_assign(
+ __entry->device_id = le32_to_cpu(msg->device_id);
+ __assign_str(sessname, srv->sessname);
+ ),
+
+ TP_printk("Close message received: session='%s' device id='%d'",
+ __get_str(sessname),
+ __entry->device_id
+ )
+);
+
+#endif /* _TRACE_RNBD_SRV_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE rnbd-srv-trace
+#include <trace/define_trace.h>
+
diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c
index 5e08da277ddf..08b041159cd3 100644
--- a/drivers/block/rnbd/rnbd-srv.c
+++ b/drivers/block/rnbd/rnbd-srv.c
@@ -13,7 +13,7 @@
#include <linux/blkdev.h>
#include "rnbd-srv.h"
-#include "rnbd-srv-dev.h"
+#include "rnbd-srv-trace.h"
MODULE_DESCRIPTION("RDMA Network Block Device Server");
MODULE_LICENSE("GPL");
@@ -84,18 +84,6 @@ static inline void rnbd_put_sess_dev(struct rnbd_srv_sess_dev *sess_dev)
kref_put(&sess_dev->kref, rnbd_sess_dev_release);
}
-void rnbd_endio(void *priv, int error)
-{
- struct rnbd_io_private *rnbd_priv = priv;
- struct rnbd_srv_sess_dev *sess_dev = rnbd_priv->sess_dev;
-
- rnbd_put_sess_dev(sess_dev);
-
- rtrs_srv_resp_rdma(rnbd_priv->id, error);
-
- kfree(priv);
-}
-
static struct rnbd_srv_sess_dev *
rnbd_get_sess_dev(int dev_id, struct rnbd_srv_session *srv_sess)
{
@@ -116,7 +104,13 @@ rnbd_get_sess_dev(int dev_id, struct rnbd_srv_session *srv_sess)
static void rnbd_dev_bi_end_io(struct bio *bio)
{
- rnbd_endio(bio->bi_private, blk_status_to_errno(bio->bi_status));
+ struct rnbd_io_private *rnbd_priv = bio->bi_private;
+ struct rnbd_srv_sess_dev *sess_dev = rnbd_priv->sess_dev;
+
+ rnbd_put_sess_dev(sess_dev);
+ rtrs_srv_resp_rdma(rnbd_priv->id, blk_status_to_errno(bio->bi_status));
+
+ kfree(rnbd_priv);
bio_put(bio);
}
@@ -132,6 +126,8 @@ static int process_rdma(struct rnbd_srv_session *srv_sess,
struct bio *bio;
short prio;
+ trace_process_rdma(srv_sess, msg, id, datalen, usrlen);
+
priv = kmalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
@@ -149,7 +145,7 @@ static int process_rdma(struct rnbd_srv_session *srv_sess,
priv->sess_dev = sess_dev;
priv->id = id;
- bio = bio_alloc(sess_dev->rnbd_dev->bdev, 1,
+ bio = bio_alloc(sess_dev->bdev, 1,
rnbd_to_bio_flags(le32_to_cpu(msg->rw)), GFP_KERNEL);
if (bio_add_page(bio, virt_to_page(data), datalen,
offset_in_page(data)) != datalen) {
@@ -223,7 +219,7 @@ void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev, bool keep_id)
rnbd_put_sess_dev(sess_dev);
wait_for_completion(&dc); /* wait for inflights to drop to zero */
- rnbd_dev_close(sess_dev->rnbd_dev);
+ blkdev_put(sess_dev->bdev, sess_dev->open_flags);
mutex_lock(&sess_dev->dev->lock);
list_del(&sess_dev->dev_list);
if (sess_dev->open_flags & FMODE_WRITE)
@@ -244,6 +240,8 @@ static void destroy_sess(struct rnbd_srv_session *srv_sess)
if (xa_empty(&srv_sess->index_idr))
goto out;
+ trace_destroy_sess(srv_sess);
+
mutex_lock(&srv_sess->lock);
xa_for_each(&srv_sess->index_idr, index, sess_dev)
rnbd_srv_destroy_dev_session_sysfs(sess_dev);
@@ -290,6 +288,8 @@ static int create_sess(struct rtrs_srv_sess *rtrs)
rtrs_srv_set_sess_priv(rtrs, srv_sess);
+ trace_create_sess(srv_sess);
+
return 0;
}
@@ -332,23 +332,24 @@ void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev,
mutex_unlock(&sess->lock);
}
-static int process_msg_close(struct rnbd_srv_session *srv_sess,
+static void process_msg_close(struct rnbd_srv_session *srv_sess,
void *data, size_t datalen, const void *usr,
size_t usrlen)
{
const struct rnbd_msg_close *close_msg = usr;
struct rnbd_srv_sess_dev *sess_dev;
+ trace_process_msg_close(srv_sess, close_msg);
+
sess_dev = rnbd_get_sess_dev(le32_to_cpu(close_msg->device_id),
srv_sess);
if (IS_ERR(sess_dev))
- return 0;
+ return;
rnbd_put_sess_dev(sess_dev);
mutex_lock(&srv_sess->lock);
rnbd_srv_destroy_dev_session_sysfs(sess_dev);
mutex_unlock(&srv_sess->lock);
- return 0;
}
static int process_msg_open(struct rnbd_srv_session *srv_sess,
@@ -378,7 +379,7 @@ static int rnbd_srv_rdma_ev(void *priv,
case RNBD_MSG_IO:
return process_rdma(srv_sess, id, data, datalen, usr, usrlen);
case RNBD_MSG_CLOSE:
- ret = process_msg_close(srv_sess, data, datalen, usr, usrlen);
+ process_msg_close(srv_sess, data, datalen, usr, usrlen);
break;
case RNBD_MSG_OPEN:
ret = process_msg_open(srv_sess, usr, usrlen, data, datalen);
@@ -393,6 +394,11 @@ static int rnbd_srv_rdma_ev(void *priv,
return -EINVAL;
}
+ /*
+ * Since ret is passed to rtrs to handle the failure case, we
+ * just return 0 at the end otherwise callers in rtrs would call
+ * send_io_resp_imm again to print redundant err message.
+ */
rtrs_srv_resp_rdma(id, ret);
return 0;
}
@@ -504,14 +510,14 @@ static int rnbd_srv_check_update_open_perm(struct rnbd_srv_dev *srv_dev,
}
static struct rnbd_srv_dev *
-rnbd_srv_get_or_create_srv_dev(struct rnbd_dev *rnbd_dev,
+rnbd_srv_get_or_create_srv_dev(struct block_device *bdev,
struct rnbd_srv_session *srv_sess,
enum rnbd_access_mode access_mode)
{
int ret;
struct rnbd_srv_dev *new_dev, *dev;
- new_dev = rnbd_srv_init_srv_dev(rnbd_dev->bdev);
+ new_dev = rnbd_srv_init_srv_dev(bdev);
if (IS_ERR(new_dev))
return new_dev;
@@ -531,41 +537,32 @@ rnbd_srv_get_or_create_srv_dev(struct rnbd_dev *rnbd_dev,
static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp,
struct rnbd_srv_sess_dev *sess_dev)
{
- struct rnbd_dev *rnbd_dev = sess_dev->rnbd_dev;
+ struct block_device *bdev = sess_dev->bdev;
rsp->hdr.type = cpu_to_le16(RNBD_MSG_OPEN_RSP);
- rsp->device_id =
- cpu_to_le32(sess_dev->device_id);
- rsp->nsectors =
- cpu_to_le64(get_capacity(rnbd_dev->bdev->bd_disk));
- rsp->logical_block_size =
- cpu_to_le16(bdev_logical_block_size(rnbd_dev->bdev));
- rsp->physical_block_size =
- cpu_to_le16(bdev_physical_block_size(rnbd_dev->bdev));
- rsp->max_segments =
- cpu_to_le16(rnbd_dev_get_max_segs(rnbd_dev));
+ rsp->device_id = cpu_to_le32(sess_dev->device_id);
+ rsp->nsectors = cpu_to_le64(bdev_nr_sectors(bdev));
+ rsp->logical_block_size = cpu_to_le16(bdev_logical_block_size(bdev));
+ rsp->physical_block_size = cpu_to_le16(bdev_physical_block_size(bdev));
+ rsp->max_segments = cpu_to_le16(bdev_max_segments(bdev));
rsp->max_hw_sectors =
- cpu_to_le32(rnbd_dev_get_max_hw_sects(rnbd_dev));
+ cpu_to_le32(queue_max_hw_sectors(bdev_get_queue(bdev)));
rsp->max_write_same_sectors = 0;
- rsp->max_discard_sectors =
- cpu_to_le32(rnbd_dev_get_max_discard_sects(rnbd_dev));
- rsp->discard_granularity =
- cpu_to_le32(rnbd_dev_get_discard_granularity(rnbd_dev));
- rsp->discard_alignment =
- cpu_to_le32(rnbd_dev_get_discard_alignment(rnbd_dev));
- rsp->secure_discard =
- cpu_to_le16(rnbd_dev_get_secure_discard(rnbd_dev));
+ rsp->max_discard_sectors = cpu_to_le32(bdev_max_discard_sectors(bdev));
+ rsp->discard_granularity = cpu_to_le32(bdev_discard_granularity(bdev));
+ rsp->discard_alignment = cpu_to_le32(bdev_discard_alignment(bdev));
+ rsp->secure_discard = cpu_to_le16(bdev_max_secure_erase_sectors(bdev));
rsp->cache_policy = 0;
- if (bdev_write_cache(rnbd_dev->bdev))
+ if (bdev_write_cache(bdev))
rsp->cache_policy |= RNBD_WRITEBACK;
- if (bdev_fua(rnbd_dev->bdev))
+ if (bdev_fua(bdev))
rsp->cache_policy |= RNBD_FUA;
}
static struct rnbd_srv_sess_dev *
rnbd_srv_create_set_sess_dev(struct rnbd_srv_session *srv_sess,
const struct rnbd_msg_open *open_msg,
- struct rnbd_dev *rnbd_dev, fmode_t open_flags,
+ struct block_device *bdev, fmode_t open_flags,
struct rnbd_srv_dev *srv_dev)
{
struct rnbd_srv_sess_dev *sdev = rnbd_sess_dev_alloc(srv_sess);
@@ -577,7 +574,7 @@ rnbd_srv_create_set_sess_dev(struct rnbd_srv_session *srv_sess,
strscpy(sdev->pathname, open_msg->dev_name, sizeof(sdev->pathname));
- sdev->rnbd_dev = rnbd_dev;
+ sdev->bdev = bdev;
sdev->sess = srv_sess;
sdev->dev = srv_dev;
sdev->open_flags = open_flags;
@@ -643,9 +640,8 @@ static int process_msg_sess_info(struct rnbd_srv_session *srv_sess,
struct rnbd_msg_sess_info_rsp *rsp = data;
srv_sess->ver = min_t(u8, sess_info_msg->ver, RNBD_PROTO_VER_MAJOR);
- pr_debug("Session %s using protocol version %d (client version: %d, server version: %d)\n",
- srv_sess->sessname, srv_sess->ver,
- sess_info_msg->ver, RNBD_PROTO_VER_MAJOR);
+
+ trace_process_msg_sess_info(srv_sess, sess_info_msg);
rsp->hdr.type = cpu_to_le16(RNBD_MSG_SESS_INFO_RSP);
rsp->ver = srv_sess->ver;
@@ -685,14 +681,13 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
struct rnbd_srv_dev *srv_dev;
struct rnbd_srv_sess_dev *srv_sess_dev;
const struct rnbd_msg_open *open_msg = msg;
+ struct block_device *bdev;
fmode_t open_flags;
char *full_path;
- struct rnbd_dev *rnbd_dev;
struct rnbd_msg_open_rsp *rsp = data;
- pr_debug("Open message received: session='%s' path='%s' access_mode=%d\n",
- srv_sess->sessname, open_msg->dev_name,
- open_msg->access_mode);
+ trace_process_msg_open(srv_sess, open_msg);
+
open_flags = FMODE_READ;
if (open_msg->access_mode != RNBD_ACCESS_RO)
open_flags |= FMODE_WRITE;
@@ -725,25 +720,25 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
goto reject;
}
- rnbd_dev = rnbd_dev_open(full_path, open_flags);
- if (IS_ERR(rnbd_dev)) {
- pr_err("Opening device '%s' on session %s failed, failed to open the block device, err: %ld\n",
- full_path, srv_sess->sessname, PTR_ERR(rnbd_dev));
- ret = PTR_ERR(rnbd_dev);
+ bdev = blkdev_get_by_path(full_path, open_flags, THIS_MODULE);
+ if (IS_ERR(bdev)) {
+ ret = PTR_ERR(bdev);
+ pr_err("Opening device '%s' on session %s failed, failed to open the block device, err: %d\n",
+ full_path, srv_sess->sessname, ret);
goto free_path;
}
- srv_dev = rnbd_srv_get_or_create_srv_dev(rnbd_dev, srv_sess,
+ srv_dev = rnbd_srv_get_or_create_srv_dev(bdev, srv_sess,
open_msg->access_mode);
if (IS_ERR(srv_dev)) {
pr_err("Opening device '%s' on session %s failed, creating srv_dev failed, err: %ld\n",
full_path, srv_sess->sessname, PTR_ERR(srv_dev));
ret = PTR_ERR(srv_dev);
- goto rnbd_dev_close;
+ goto blkdev_put;
}
srv_sess_dev = rnbd_srv_create_set_sess_dev(srv_sess, open_msg,
- rnbd_dev, open_flags,
+ bdev, open_flags,
srv_dev);
if (IS_ERR(srv_sess_dev)) {
pr_err("Opening device '%s' on session %s failed, creating sess_dev failed, err: %ld\n",
@@ -758,7 +753,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
*/
mutex_lock(&srv_dev->lock);
if (!srv_dev->dev_kobj.state_in_sysfs) {
- ret = rnbd_srv_create_dev_sysfs(srv_dev, rnbd_dev->bdev);
+ ret = rnbd_srv_create_dev_sysfs(srv_dev, bdev);
if (ret) {
mutex_unlock(&srv_dev->lock);
rnbd_srv_err(srv_sess_dev,
@@ -800,8 +795,8 @@ srv_dev_put:
mutex_unlock(&srv_dev->lock);
}
rnbd_put_srv_dev(srv_dev);
-rnbd_dev_close:
- rnbd_dev_close(rnbd_dev);
+blkdev_put:
+ blkdev_put(bdev, open_flags);
free_path:
kfree(full_path);
reject:
diff --git a/drivers/block/rnbd/rnbd-srv.h b/drivers/block/rnbd/rnbd-srv.h
index 081bceaf4ae9..f5962fd31d62 100644
--- a/drivers/block/rnbd/rnbd-srv.h
+++ b/drivers/block/rnbd/rnbd-srv.h
@@ -46,7 +46,7 @@ struct rnbd_srv_dev {
struct rnbd_srv_sess_dev {
/* Entry inside rnbd_srv_dev struct */
struct list_head dev_list;
- struct rnbd_dev *rnbd_dev;
+ struct block_device *bdev;
struct rnbd_srv_session *sess;
struct rnbd_srv_dev *dev;
struct kobject kobj;
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 6a4a94b4cdf4..2651bf41dde3 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -49,7 +49,9 @@
/* All UBLK_F_* have to be included into UBLK_F_ALL */
#define UBLK_F_ALL (UBLK_F_SUPPORT_ZERO_COPY \
| UBLK_F_URING_CMD_COMP_IN_TASK \
- | UBLK_F_NEED_GET_DATA)
+ | UBLK_F_NEED_GET_DATA \
+ | UBLK_F_USER_RECOVERY \
+ | UBLK_F_USER_RECOVERY_REISSUE)
/* All UBLK_PARAM_TYPE_* should be included here */
#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD)
@@ -119,7 +121,7 @@ struct ublk_queue {
unsigned long io_addr; /* mapped vm address */
unsigned int max_io_sz;
- bool abort_work_pending;
+ bool force_abort;
unsigned short nr_io_ready; /* how many ios setup */
struct ublk_device *dev;
struct ublk_io ios[0];
@@ -161,6 +163,7 @@ struct ublk_device {
* monitor each queue's daemon periodically
*/
struct delayed_work monitor_work;
+ struct work_struct quiesce_work;
struct work_struct stop_work;
};
@@ -323,6 +326,30 @@ static inline int ublk_queue_cmd_buf_size(struct ublk_device *ub, int q_id)
PAGE_SIZE);
}
+static inline bool ublk_queue_can_use_recovery_reissue(
+ struct ublk_queue *ubq)
+{
+ if ((ubq->flags & UBLK_F_USER_RECOVERY) &&
+ (ubq->flags & UBLK_F_USER_RECOVERY_REISSUE))
+ return true;
+ return false;
+}
+
+static inline bool ublk_queue_can_use_recovery(
+ struct ublk_queue *ubq)
+{
+ if (ubq->flags & UBLK_F_USER_RECOVERY)
+ return true;
+ return false;
+}
+
+static inline bool ublk_can_use_recovery(struct ublk_device *ub)
+{
+ if (ub->dev_info.flags & UBLK_F_USER_RECOVERY)
+ return true;
+ return false;
+}
+
static void ublk_free_disk(struct gendisk *disk)
{
struct ublk_device *ub = disk->private_data;
@@ -612,13 +639,17 @@ static void ublk_complete_rq(struct request *req)
* Also aborting may not be started yet, keep in mind that one failed
* request may be issued by block layer again.
*/
-static void __ublk_fail_req(struct ublk_io *io, struct request *req)
+static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io,
+ struct request *req)
{
WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE);
if (!(io->flags & UBLK_IO_FLAG_ABORTED)) {
io->flags |= UBLK_IO_FLAG_ABORTED;
- blk_mq_end_request(req, BLK_STS_IOERR);
+ if (ublk_queue_can_use_recovery_reissue(ubq))
+ blk_mq_requeue_request(req, false);
+ else
+ blk_mq_end_request(req, BLK_STS_IOERR);
}
}
@@ -639,22 +670,40 @@ static void ubq_complete_io_cmd(struct ublk_io *io, int res)
#define UBLK_REQUEUE_DELAY_MS 3
+static inline void __ublk_abort_rq(struct ublk_queue *ubq,
+ struct request *rq)
+{
+ /* We cannot process this rq so just requeue it. */
+ if (ublk_queue_can_use_recovery(ubq))
+ blk_mq_requeue_request(rq, false);
+ else
+ blk_mq_end_request(rq, BLK_STS_IOERR);
+
+ mod_delayed_work(system_wq, &ubq->dev->monitor_work, 0);
+}
+
static inline void __ublk_rq_task_work(struct request *req)
{
struct ublk_queue *ubq = req->mq_hctx->driver_data;
- struct ublk_device *ub = ubq->dev;
int tag = req->tag;
struct ublk_io *io = &ubq->ios[tag];
- bool task_exiting = current != ubq->ubq_daemon || ubq_daemon_is_dying(ubq);
unsigned int mapped_bytes;
pr_devel("%s: complete: op %d, qid %d tag %d io_flags %x addr %llx\n",
__func__, io->cmd->cmd_op, ubq->q_id, req->tag, io->flags,
ublk_get_iod(ubq, req->tag)->addr);
- if (unlikely(task_exiting)) {
- blk_mq_end_request(req, BLK_STS_IOERR);
- mod_delayed_work(system_wq, &ub->monitor_work, 0);
+ /*
+ * Task is exiting if either:
+ *
+ * (1) current != ubq_daemon.
+ * io_uring_cmd_complete_in_task() tries to run task_work
+ * in a workqueue if ubq_daemon(cmd's task) is PF_EXITING.
+ *
+ * (2) current->flags & PF_EXITING.
+ */
+ if (unlikely(current != ubq->ubq_daemon || current->flags & PF_EXITING)) {
+ __ublk_abort_rq(ubq, req);
return;
}
@@ -739,13 +788,24 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
res = ublk_setup_iod(ubq, rq);
if (unlikely(res != BLK_STS_OK))
return BLK_STS_IOERR;
+ /* With recovery feature enabled, force_abort is set in
+ * ublk_stop_dev() before calling del_gendisk(). We have to
+ * abort all requeued and new rqs here to let del_gendisk()
+ * move on. Besides, we cannot not call io_uring_cmd_complete_in_task()
+ * to avoid UAF on io_uring ctx.
+ *
+ * Note: force_abort is guaranteed to be seen because it is set
+ * before request queue is unqiuesced.
+ */
+ if (ublk_queue_can_use_recovery(ubq) && unlikely(ubq->force_abort))
+ return BLK_STS_IOERR;
blk_mq_start_request(bd->rq);
if (unlikely(ubq_daemon_is_dying(ubq))) {
fail:
- mod_delayed_work(system_wq, &ubq->dev->monitor_work, 0);
- return BLK_STS_IOERR;
+ __ublk_abort_rq(ubq, rq);
+ return BLK_STS_OK;
}
if (ublk_can_use_task_work(ubq)) {
@@ -916,7 +976,7 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
*/
rq = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], i);
if (rq)
- __ublk_fail_req(io, rq);
+ __ublk_fail_req(ubq, io, rq);
}
}
ublk_put_device(ub);
@@ -932,7 +992,10 @@ static void ublk_daemon_monitor_work(struct work_struct *work)
struct ublk_queue *ubq = ublk_get_queue(ub, i);
if (ubq_daemon_is_dying(ubq)) {
- schedule_work(&ub->stop_work);
+ if (ublk_queue_can_use_recovery(ubq))
+ schedule_work(&ub->quiesce_work);
+ else
+ schedule_work(&ub->stop_work);
/* abort queue is for making forward progress */
ublk_abort_queue(ub, ubq);
@@ -940,12 +1003,13 @@ static void ublk_daemon_monitor_work(struct work_struct *work)
}
/*
- * We can't schedule monitor work after ublk_remove() is started.
+ * We can't schedule monitor work after ub's state is not UBLK_S_DEV_LIVE.
+ * after ublk_remove() or __ublk_quiesce_dev() is started.
*
* No need ub->mutex, monitor work are canceled after state is marked
- * as DEAD, so DEAD state is observed reliably.
+ * as not LIVE, so new state is observed reliably.
*/
- if (ub->dev_info.state != UBLK_S_DEV_DEAD)
+ if (ub->dev_info.state == UBLK_S_DEV_LIVE)
schedule_delayed_work(&ub->monitor_work,
UBLK_DAEMON_MONITOR_PERIOD);
}
@@ -982,12 +1046,97 @@ static void ublk_cancel_dev(struct ublk_device *ub)
ublk_cancel_queue(ublk_get_queue(ub, i));
}
-static void ublk_stop_dev(struct ublk_device *ub)
+static bool ublk_check_inflight_rq(struct request *rq, void *data)
+{
+ bool *idle = data;
+
+ if (blk_mq_request_started(rq)) {
+ *idle = false;
+ return false;
+ }
+ return true;
+}
+
+static void ublk_wait_tagset_rqs_idle(struct ublk_device *ub)
{
+ bool idle;
+
+ WARN_ON_ONCE(!blk_queue_quiesced(ub->ub_disk->queue));
+ while (true) {
+ idle = true;
+ blk_mq_tagset_busy_iter(&ub->tag_set,
+ ublk_check_inflight_rq, &idle);
+ if (idle)
+ break;
+ msleep(UBLK_REQUEUE_DELAY_MS);
+ }
+}
+
+static void __ublk_quiesce_dev(struct ublk_device *ub)
+{
+ pr_devel("%s: quiesce ub: dev_id %d state %s\n",
+ __func__, ub->dev_info.dev_id,
+ ub->dev_info.state == UBLK_S_DEV_LIVE ?
+ "LIVE" : "QUIESCED");
+ blk_mq_quiesce_queue(ub->ub_disk->queue);
+ ublk_wait_tagset_rqs_idle(ub);
+ ub->dev_info.state = UBLK_S_DEV_QUIESCED;
+ ublk_cancel_dev(ub);
+ /* we are going to release task_struct of ubq_daemon and resets
+ * ->ubq_daemon to NULL. So in monitor_work, check on ubq_daemon causes UAF.
+ * Besides, monitor_work is not necessary in QUIESCED state since we have
+ * already scheduled quiesce_work and quiesced all ubqs.
+ *
+ * Do not let monitor_work schedule itself if state it QUIESCED. And we cancel
+ * it here and re-schedule it in END_USER_RECOVERY to avoid UAF.
+ */
+ cancel_delayed_work_sync(&ub->monitor_work);
+}
+
+static void ublk_quiesce_work_fn(struct work_struct *work)
+{
+ struct ublk_device *ub =
+ container_of(work, struct ublk_device, quiesce_work);
+
mutex_lock(&ub->mutex);
if (ub->dev_info.state != UBLK_S_DEV_LIVE)
goto unlock;
+ __ublk_quiesce_dev(ub);
+ unlock:
+ mutex_unlock(&ub->mutex);
+}
+
+static void ublk_unquiesce_dev(struct ublk_device *ub)
+{
+ int i;
+
+ pr_devel("%s: unquiesce ub: dev_id %d state %s\n",
+ __func__, ub->dev_info.dev_id,
+ ub->dev_info.state == UBLK_S_DEV_LIVE ?
+ "LIVE" : "QUIESCED");
+ /* quiesce_work has run. We let requeued rqs be aborted
+ * before running fallback_wq. "force_abort" must be seen
+ * after request queue is unqiuesced. Then del_gendisk()
+ * can move on.
+ */
+ for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
+ ublk_get_queue(ub, i)->force_abort = true;
+
+ blk_mq_unquiesce_queue(ub->ub_disk->queue);
+ /* We may have requeued some rqs in ublk_quiesce_queue() */
+ blk_mq_kick_requeue_list(ub->ub_disk->queue);
+}
+static void ublk_stop_dev(struct ublk_device *ub)
+{
+ mutex_lock(&ub->mutex);
+ if (ub->dev_info.state == UBLK_S_DEV_DEAD)
+ goto unlock;
+ if (ublk_can_use_recovery(ub)) {
+ if (ub->dev_info.state == UBLK_S_DEV_LIVE)
+ __ublk_quiesce_dev(ub);
+ ublk_unquiesce_dev(ub);
+ }
del_gendisk(ub->ub_disk);
ub->dev_info.state = UBLK_S_DEV_DEAD;
ub->dev_info.ublksrv_pid = -1;
@@ -1311,6 +1460,7 @@ static void ublk_remove(struct ublk_device *ub)
{
ublk_stop_dev(ub);
cancel_work_sync(&ub->stop_work);
+ cancel_work_sync(&ub->quiesce_work);
cdev_device_del(&ub->cdev, &ub->cdev_dev);
put_device(&ub->cdev_dev);
}
@@ -1487,6 +1637,7 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
goto out_unlock;
mutex_init(&ub->mutex);
spin_lock_init(&ub->mm_lock);
+ INIT_WORK(&ub->quiesce_work, ublk_quiesce_work_fn);
INIT_WORK(&ub->stop_work, ublk_stop_work_fn);
INIT_DELAYED_WORK(&ub->monitor_work, ublk_daemon_monitor_work);
@@ -1607,6 +1758,7 @@ static int ublk_ctrl_stop_dev(struct io_uring_cmd *cmd)
ublk_stop_dev(ub);
cancel_work_sync(&ub->stop_work);
+ cancel_work_sync(&ub->quiesce_work);
ublk_put_device(ub);
return 0;
@@ -1709,6 +1861,116 @@ static int ublk_ctrl_set_params(struct io_uring_cmd *cmd)
return ret;
}
+static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
+{
+ int i;
+
+ WARN_ON_ONCE(!(ubq->ubq_daemon && ubq_daemon_is_dying(ubq)));
+ /* All old ioucmds have to be completed */
+ WARN_ON_ONCE(ubq->nr_io_ready);
+ /* old daemon is PF_EXITING, put it now */
+ put_task_struct(ubq->ubq_daemon);
+ /* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */
+ ubq->ubq_daemon = NULL;
+
+ for (i = 0; i < ubq->q_depth; i++) {
+ struct ublk_io *io = &ubq->ios[i];
+
+ /* forget everything now and be ready for new FETCH_REQ */
+ io->flags = 0;
+ io->cmd = NULL;
+ io->addr = 0;
+ }
+}
+
+static int ublk_ctrl_start_recovery(struct io_uring_cmd *cmd)
+{
+ struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ struct ublk_device *ub;
+ int ret = -EINVAL;
+ int i;
+
+ ub = ublk_get_device_from_id(header->dev_id);
+ if (!ub)
+ return ret;
+
+ mutex_lock(&ub->mutex);
+ if (!ublk_can_use_recovery(ub))
+ goto out_unlock;
+ /*
+ * START_RECOVERY is only allowd after:
+ *
+ * (1) UB_STATE_OPEN is not set, which means the dying process is exited
+ * and related io_uring ctx is freed so file struct of /dev/ublkcX is
+ * released.
+ *
+ * (2) UBLK_S_DEV_QUIESCED is set, which means the quiesce_work:
+ * (a)has quiesced request queue
+ * (b)has requeued every inflight rqs whose io_flags is ACTIVE
+ * (c)has requeued/aborted every inflight rqs whose io_flags is NOT ACTIVE
+ * (d)has completed/camceled all ioucmds owned by ther dying process
+ */
+ if (test_bit(UB_STATE_OPEN, &ub->state) ||
+ ub->dev_info.state != UBLK_S_DEV_QUIESCED) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ pr_devel("%s: start recovery for dev id %d.\n", __func__, header->dev_id);
+ for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
+ ublk_queue_reinit(ub, ublk_get_queue(ub, i));
+ /* set to NULL, otherwise new ubq_daemon cannot mmap the io_cmd_buf */
+ ub->mm = NULL;
+ ub->nr_queues_ready = 0;
+ init_completion(&ub->completion);
+ ret = 0;
+ out_unlock:
+ mutex_unlock(&ub->mutex);
+ ublk_put_device(ub);
+ return ret;
+}
+
+static int ublk_ctrl_end_recovery(struct io_uring_cmd *cmd)
+{
+ struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ int ublksrv_pid = (int)header->data[0];
+ struct ublk_device *ub;
+ int ret = -EINVAL;
+
+ ub = ublk_get_device_from_id(header->dev_id);
+ if (!ub)
+ return ret;
+
+ pr_devel("%s: Waiting for new ubq_daemons(nr: %d) are ready, dev id %d...\n",
+ __func__, ub->dev_info.nr_hw_queues, header->dev_id);
+ /* wait until new ubq_daemon sending all FETCH_REQ */
+ wait_for_completion_interruptible(&ub->completion);
+ pr_devel("%s: All new ubq_daemons(nr: %d) are ready, dev id %d\n",
+ __func__, ub->dev_info.nr_hw_queues, header->dev_id);
+
+ mutex_lock(&ub->mutex);
+ if (!ublk_can_use_recovery(ub))
+ goto out_unlock;
+
+ if (ub->dev_info.state != UBLK_S_DEV_QUIESCED) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ ub->dev_info.ublksrv_pid = ublksrv_pid;
+ pr_devel("%s: new ublksrv_pid %d, dev id %d\n",
+ __func__, ublksrv_pid, header->dev_id);
+ blk_mq_unquiesce_queue(ub->ub_disk->queue);
+ pr_devel("%s: queue unquiesced, dev id %d.\n",
+ __func__, header->dev_id);
+ blk_mq_kick_requeue_list(ub->ub_disk->queue);
+ ub->dev_info.state = UBLK_S_DEV_LIVE;
+ schedule_delayed_work(&ub->monitor_work, UBLK_DAEMON_MONITOR_PERIOD);
+ ret = 0;
+ out_unlock:
+ mutex_unlock(&ub->mutex);
+ ublk_put_device(ub);
+ return ret;
+}
+
static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
unsigned int issue_flags)
{
@@ -1750,6 +2012,12 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
case UBLK_CMD_SET_PARAMS:
ret = ublk_ctrl_set_params(cmd);
break;
+ case UBLK_CMD_START_USER_RECOVERY:
+ ret = ublk_ctrl_start_recovery(cmd);
+ break;
+ case UBLK_CMD_END_USER_RECOVERY:
+ ret = ublk_ctrl_end_recovery(cmd);
+ break;
default:
break;
}
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index dd9a05174726..3f4739d52268 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -801,7 +801,7 @@ static const struct attribute_group *virtblk_attr_groups[] = {
NULL,
};
-static int virtblk_map_queues(struct blk_mq_tag_set *set)
+static void virtblk_map_queues(struct blk_mq_tag_set *set)
{
struct virtio_blk *vblk = set->driver_data;
int i, qoff;
@@ -826,8 +826,6 @@ static int virtblk_map_queues(struct blk_mq_tag_set *set)
else
blk_mq_virtio_map_queues(&set->map[i], vblk->vdev, 0);
}
-
- return 0;
}
static void virtblk_complete_batch(struct io_comp_batch *iob)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 226ea76cc819..e551433cd107 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -499,7 +499,7 @@ static ssize_t backing_dev_store(struct device *dev,
goto out;
}
- strlcpy(file_name, buf, PATH_MAX);
+ strscpy(file_name, buf, PATH_MAX);
/* ignore trailing newline */
sz = strlen(file_name);
if (sz > 0 && file_name[sz - 1] == '\n')
@@ -1031,7 +1031,7 @@ static ssize_t comp_algorithm_store(struct device *dev,
char compressor[ARRAY_SIZE(zram->compressor)];
size_t sz;
- strlcpy(compressor, buf, sizeof(compressor));
+ strscpy(compressor, buf, sizeof(compressor));
/* ignore trailing newline */
sz = strlen(compressor);
if (sz > 0 && compressor[sz - 1] == '\n')
@@ -1974,7 +1974,7 @@ static int zram_add(void)
if (ret)
goto out_cleanup_disk;
- strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
+ strscpy(zram->compressor, default_compressor, sizeof(zram->compressor));
zram_debugfs_register(zram);
pr_info("Added device: %s\n", zram->disk->disk_name);