diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-02-20 14:27:21 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-02-20 14:27:21 -0800 |
commit | 5b0ed5964928b0aaf0d644c17c886c7f5ea4bb3f (patch) | |
tree | 02df7848b8c28552039bf463e0034f5d5518b2a9 /drivers | |
parent | 553637f73c314c742243b8dc5ef072e9dadbe581 (diff) | |
parent | 0aa2988e4fd23c0c8b33999d7b47dfbc5e6bf24b (diff) |
Merge tag 'for-6.3/block-2023-02-16' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- NVMe updates via Christoph:
- Small improvements to the logging functionality (Amit Engel)
- Authentication cleanups (Hannes Reinecke)
- Cleanup and optimize the DMA mapping cod in the PCIe driver
(Keith Busch)
- Work around the command effects for Format NVM (Keith Busch)
- Misc cleanups (Keith Busch, Christoph Hellwig)
- Fix and cleanup freeing single sgl (Keith Busch)
- MD updates via Song:
- Fix a rare crash during the takeover process
- Don't update recovery_cp when curr_resync is ACTIVE
- Free writes_pending in md_stop
- Change active_io to percpu
- Updates to drbd, inching us closer to unifying the out-of-tree driver
with the in-tree one (Andreas, Christoph, Lars, Robert)
- BFQ update adding support for multi-actuator drives (Paolo, Federico,
Davide)
- Make brd compliant with REQ_NOWAIT (me)
- Fix for IOPOLL and queue entering, fixing stalled IO waiting on
timeouts (me)
- Fix for REQ_NOWAIT with multiple bios (me)
- Fix memory leak in blktrace cleanup (Greg)
- Clean up sbitmap and fix a potential hang (Kemeng)
- Clean up some bits in BFQ, and fix a bug in the request injection
(Kemeng)
- Clean up the request allocation and issue code, and fix some bugs
related to that (Kemeng)
- ublk updates and fixes:
- Add support for unprivileged ublk (Ming)
- Improve device deletion handling (Ming)
- Misc (Liu, Ziyang)
- s390 dasd fixes (Alexander, Qiheng)
- Improve utility of request caching and fixes (Anuj, Xiao)
- zoned cleanups (Pankaj)
- More constification for kobjs (Thomas)
- blk-iocost cleanups (Yu)
- Remove bio splitting from drivers that don't need it (Christoph)
- Switch blk-cgroups to use struct gendisk. Some of this is now
incomplete as select late reverts were done. (Christoph)
- Add bvec initialization helpers, and convert callers to use that
rather than open-coding it (Christoph)
- Misc fixes and cleanups (Jinke, Keith, Arnd, Bart, Li, Martin,
Matthew, Ulf, Zhong)
* tag 'for-6.3/block-2023-02-16' of git://git.kernel.dk/linux: (169 commits)
brd: use radix_tree_maybe_preload instead of radix_tree_preload
block: use proper return value from bio_failfast()
block: bio-integrity: Copy flags when bio_integrity_payload is cloned
block: Fix io statistics for cgroup in throttle path
brd: mark as nowait compatible
brd: check for REQ_NOWAIT and set correct page allocation mask
brd: return 0/-error from brd_insert_page()
block: sync mixed merged request's failfast with 1st bio's
Revert "blk-cgroup: pin the gendisk in struct blkcg_gq"
Revert "blk-cgroup: pass a gendisk to blkg_lookup"
Revert "blk-cgroup: delay blk-cgroup initialization until add_disk"
Revert "blk-cgroup: delay calling blkcg_exit_disk until disk_release"
Revert "blk-cgroup: move the cgroup information to struct gendisk"
nvme-pci: remove iod use_sgls
nvme-pci: fix freeing single sgl
block: ublk: check IO buffer based on flag need_get_data
s390/dasd: Fix potential memleak in dasd_eckd_init()
s390/dasd: sort out physical vs virtual pointers usage
block: Remove the ALLOC_CACHE_SLACK constant
block: make kobj_type structures constant
...
Diffstat (limited to 'drivers')
41 files changed, 709 insertions, 509 deletions
diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 20acc4a1fd6d..a8a77a1efe1e 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -78,32 +78,25 @@ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector) } /* - * Look up and return a brd's page for a given sector. - * If one does not exist, allocate an empty page, and insert that. Then - * return it. + * Insert a new page for a given sector, if one does not already exist. */ -static struct page *brd_insert_page(struct brd_device *brd, sector_t sector) +static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp) { pgoff_t idx; struct page *page; - gfp_t gfp_flags; + int ret = 0; page = brd_lookup_page(brd, sector); if (page) - return page; + return 0; - /* - * Must use NOIO because we don't want to recurse back into the - * block or filesystem layers from page reclaim. - */ - gfp_flags = GFP_NOIO | __GFP_ZERO | __GFP_HIGHMEM; - page = alloc_page(gfp_flags); + page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM); if (!page) - return NULL; + return -ENOMEM; - if (radix_tree_preload(GFP_NOIO)) { + if (radix_tree_maybe_preload(gfp)) { __free_page(page); - return NULL; + return -ENOMEM; } spin_lock(&brd->brd_lock); @@ -112,16 +105,17 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector) if (radix_tree_insert(&brd->brd_pages, idx, page)) { __free_page(page); page = radix_tree_lookup(&brd->brd_pages, idx); - BUG_ON(!page); - BUG_ON(page->index != idx); + if (!page) + ret = -ENOMEM; + else if (page->index != idx) + ret = -EIO; } else { brd->brd_nr_pages++; } spin_unlock(&brd->brd_lock); radix_tree_preload_end(); - - return page; + return ret; } /* @@ -170,20 +164,22 @@ static void brd_free_pages(struct brd_device *brd) /* * copy_to_brd_setup must be called before copy_to_brd. It may sleep. */ -static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n) +static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n, + gfp_t gfp) { unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; size_t copy; + int ret; copy = min_t(size_t, n, PAGE_SIZE - offset); - if (!brd_insert_page(brd, sector)) - return -ENOSPC; + ret = brd_insert_page(brd, sector, gfp); + if (ret) + return ret; if (copy < n) { sector += copy >> SECTOR_SHIFT; - if (!brd_insert_page(brd, sector)) - return -ENOSPC; + ret = brd_insert_page(brd, sector, gfp); } - return 0; + return ret; } /* @@ -256,20 +252,26 @@ static void copy_from_brd(void *dst, struct brd_device *brd, * Process a single bvec of a bio. */ static int brd_do_bvec(struct brd_device *brd, struct page *page, - unsigned int len, unsigned int off, enum req_op op, + unsigned int len, unsigned int off, blk_opf_t opf, sector_t sector) { void *mem; int err = 0; - if (op_is_write(op)) { - err = copy_to_brd_setup(brd, sector, len); + if (op_is_write(opf)) { + /* + * Must use NOIO because we don't want to recurse back into the + * block or filesystem layers from page reclaim. + */ + gfp_t gfp = opf & REQ_NOWAIT ? GFP_NOWAIT : GFP_NOIO; + + err = copy_to_brd_setup(brd, sector, len, gfp); if (err) goto out; } mem = kmap_atomic(page); - if (!op_is_write(op)) { + if (!op_is_write(opf)) { copy_from_brd(mem + off, brd, sector, len); flush_dcache_page(page); } else { @@ -298,8 +300,12 @@ static void brd_submit_bio(struct bio *bio) (len & (SECTOR_SIZE - 1))); err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset, - bio_op(bio), sector); + bio->bi_opf, sector); if (err) { + if (err == -ENOMEM && bio->bi_opf & REQ_NOWAIT) { + bio_wouldblock_error(bio); + return; + } bio_io_error(bio); return; } @@ -412,6 +418,7 @@ static int brd_alloc(int i) /* Tell the block layer that this is not a rotational device */ blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); + blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue); err = add_disk(disk); if (err) goto out_cleanup_disk; diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile index c93e462130ff..67a8b352a1d5 100644 --- a/drivers/block/drbd/Makefile +++ b/drivers/block/drbd/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -drbd-y := drbd_bitmap.o drbd_proc.o +drbd-y := drbd_buildtag.o drbd_bitmap.o drbd_proc.o drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o drbd-y += drbd_main.o drbd_strings.o drbd_nl.o drbd-y += drbd_interval.o drbd_state.o diff --git a/drivers/block/drbd/drbd_buildtag.c b/drivers/block/drbd/drbd_buildtag.c new file mode 100644 index 000000000000..cb1aa66d7d5d --- /dev/null +++ b/drivers/block/drbd/drbd_buildtag.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/drbd_config.h> +#include <linux/module.h> + +const char *drbd_buildtag(void) +{ + /* DRBD built from external sources has here a reference to the + * git hash of the source code. + */ + + static char buildtag[38] = "\0uilt-in"; + + if (buildtag[0] == 0) { +#ifdef MODULE + sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion); +#else + buildtag[0] = 'b'; +#endif + } + + return buildtag; +} diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c index a72c096aa5b1..12460b584bcb 100644 --- a/drivers/block/drbd/drbd_debugfs.c +++ b/drivers/block/drbd/drbd_debugfs.c @@ -844,7 +844,7 @@ static int drbd_version_show(struct seq_file *m, void *ignored) { seq_printf(m, "# %s\n", drbd_buildtag()); seq_printf(m, "VERSION=%s\n", REL_VERSION); - seq_printf(m, "API_VERSION=%u\n", API_VERSION); + seq_printf(m, "API_VERSION=%u\n", GENL_MAGIC_VERSION); seq_printf(m, "PRO_VERSION_MIN=%u\n", PRO_VERSION_MIN); seq_printf(m, "PRO_VERSION_MAX=%u\n", PRO_VERSION_MAX); return 0; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index ae713338aa46..d89b7d03d4c8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -34,21 +34,12 @@ #include <linux/prefetch.h> #include <linux/drbd_genl_api.h> #include <linux/drbd.h> +#include <linux/drbd_config.h> #include "drbd_strings.h" #include "drbd_state.h" #include "drbd_protocol.h" #include "drbd_polymorph_printk.h" -#ifdef __CHECKER__ -# define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr"))) -# define __protected_read_by(x) __attribute__((require_context(x,1,999,"read"))) -# define __protected_write_by(x) __attribute__((require_context(x,1,999,"write"))) -#else -# define __protected_by(x) -# define __protected_read_by(x) -# define __protected_write_by(x) -#endif - /* shared module parameters, defined in drbd_main.c */ #ifdef CONFIG_DRBD_FAULT_INJECTION extern int drbd_enable_faults; @@ -774,7 +765,7 @@ struct drbd_device { unsigned long flags; /* configured by drbdsetup */ - struct drbd_backing_dev *ldev __protected_by(local); + struct drbd_backing_dev *ldev; sector_t p_size; /* partner's disk size */ struct request_queue *rq_queue; diff --git a/drivers/block/drbd/drbd_interval.c b/drivers/block/drbd/drbd_interval.c index 5024ffd6143d..873beda6de24 100644 --- a/drivers/block/drbd/drbd_interval.c +++ b/drivers/block/drbd/drbd_interval.c @@ -58,7 +58,7 @@ drbd_insert_interval(struct rb_root *root, struct drbd_interval *this) * drbd_contains_interval - check if a tree contains a given interval * @root: red black tree root * @sector: start sector of @interval - * @interval: may not be a valid pointer + * @interval: may be an invalid pointer * * Returns if the tree contains the node @interval with start sector @start. * Does not dereference @interval until @interval is known to be a valid object @@ -95,6 +95,10 @@ drbd_contains_interval(struct rb_root *root, sector_t sector, void drbd_remove_interval(struct rb_root *root, struct drbd_interval *this) { + /* avoid endless loop */ + if (drbd_interval_empty(this)) + return; + rb_erase_augmented(&this->rb, root, &augment_callbacks); } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index e43dfb9eb6ad..2c764f7ee4a7 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2899,7 +2899,7 @@ static int __init drbd_init(void) pr_info("initialized. " "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n", - API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX); + GENL_MAGIC_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX); pr_info("%s\n", drbd_buildtag()); pr_info("registered as block device major %d\n", DRBD_MAJOR); return 0; /* Success! */ @@ -3776,24 +3776,6 @@ _drbd_insert_fault(struct drbd_device *device, unsigned int type) } #endif -const char *drbd_buildtag(void) -{ - /* DRBD built from external sources has here a reference to the - git hash of the source code. */ - - static char buildtag[38] = "\0uilt-in"; - - if (buildtag[0] == 0) { -#ifdef MODULE - sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion); -#else - buildtag[0] = 'b'; -#endif - } - - return buildtag; -} - module_init(drbd_init) module_exit(drbd_cleanup) diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 2227fb0db1ce..1d0feafceadc 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -228,7 +228,7 @@ int drbd_seq_show(struct seq_file *seq, void *v) }; seq_printf(seq, "version: " REL_VERSION " (api:%d/proto:%d-%d)\n%s\n", - API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX, drbd_buildtag()); + GENL_MAGIC_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX, drbd_buildtag()); /* cs .. connection state diff --git a/drivers/block/drbd/drbd_vli.h b/drivers/block/drbd/drbd_vli.h index 1ee81e3c2152..941c511cc4da 100644 --- a/drivers/block/drbd/drbd_vli.h +++ b/drivers/block/drbd/drbd_vli.h @@ -327,7 +327,7 @@ static inline int bitstream_get_bits(struct bitstream *bs, u64 *out, int bits) */ static inline int vli_encode_bits(struct bitstream *bs, u64 in) { - u64 code = code; + u64 code; int bits = __vli_encode_bits(&code, in); if (bits <= 0) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 1518a6423279..5f04235e4ff7 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -90,7 +90,7 @@ struct loop_cmd { }; #define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ) -#define LOOP_DEFAULT_HW_Q_DEPTH (128) +#define LOOP_DEFAULT_HW_Q_DEPTH 128 static DEFINE_IDR(loop_index_idr); static DEFINE_MUTEX(loop_ctl_mutex); @@ -1792,9 +1792,15 @@ static int hw_queue_depth = LOOP_DEFAULT_HW_Q_DEPTH; static int loop_set_hw_queue_depth(const char *s, const struct kernel_param *p) { - int ret = kstrtoint(s, 10, &hw_queue_depth); + int qd, ret; - return (ret || (hw_queue_depth < 1)) ? -EINVAL : 0; + ret = kstrtoint(s, 0, &qd); + if (ret < 0) + return ret; + if (qd < 1) + return -EINVAL; + hw_queue_depth = qd; + return 0; } static const struct kernel_param_ops loop_hw_qdepth_param_ops = { @@ -1803,7 +1809,7 @@ static const struct kernel_param_ops loop_hw_qdepth_param_ops = { }; device_param_cb(hw_queue_depth, &loop_hw_qdepth_param_ops, &hw_queue_depth, 0444); -MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 128"); +MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: " __stringify(LOOP_DEFAULT_HW_Q_DEPTH)); MODULE_LICENSE("GPL"); MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR); diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 7d28e3aa406c..4c601ca9552a 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -2123,8 +2123,7 @@ static int null_add_dev(struct nullb_device *dev) blk_queue_physical_block_size(nullb->q, dev->blocksize); if (!dev->max_sectors) dev->max_sectors = queue_max_hw_sectors(nullb->q); - dev->max_sectors = min_t(unsigned int, dev->max_sectors, - BLK_DEF_MAX_SECTORS); + dev->max_sectors = min(dev->max_sectors, BLK_DEF_MAX_SECTORS); blk_queue_max_hw_sectors(nullb->q, dev->max_sectors); if (dev->virt_boundary) diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index 574e470b220b..38d42af01b25 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -586,10 +586,6 @@ static void ps3vram_submit_bio(struct bio *bio) dev_dbg(&dev->core, "%s\n", __func__); - bio = bio_split_to_limits(bio); - if (!bio) - return; - spin_lock_irq(&priv->lock); busy = !bio_list_empty(&priv->list); bio_list_add(&priv->list, bio); @@ -749,9 +745,6 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev) gendisk->private_data = dev; strscpy(gendisk->disk_name, DEVICE_NAME, sizeof(gendisk->disk_name)); set_capacity(gendisk, priv->size >> 9); - blk_queue_max_segments(gendisk->queue, BLK_MAX_SEGMENTS); - blk_queue_max_segment_size(gendisk->queue, BLK_MAX_SEGMENT_SIZE); - blk_queue_max_hw_sectors(gendisk->queue, BLK_SAFE_MAX_SECTORS); dev_info(&dev->core, "%s: Using %llu MiB of GPU memory\n", gendisk->disk_name, get_capacity(gendisk) >> 11); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 04453f4a319c..1faca7e07a4d 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3068,13 +3068,12 @@ static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap) for (i = 0; i < obj_req->copyup_bvec_count; i++) { unsigned int len = min(obj_overlap, (u64)PAGE_SIZE); + struct page *page = alloc_page(GFP_NOIO); - obj_req->copyup_bvecs[i].bv_page = alloc_page(GFP_NOIO); - if (!obj_req->copyup_bvecs[i].bv_page) + if (!page) return -ENOMEM; - obj_req->copyup_bvecs[i].bv_offset = 0; - obj_req->copyup_bvecs[i].bv_len = len; + bvec_set_page(&obj_req->copyup_bvecs[i], page, len, 0); obj_overlap -= len; } diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 6368b56eacf1..b9c759cef00e 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -42,6 +42,7 @@ #include <linux/mm.h> #include <asm/page.h> #include <linux/task_work.h> +#include <linux/namei.h> #include <uapi/linux/ublk_cmd.h> #define UBLK_MINORS (1U << MINORBITS) @@ -51,10 +52,12 @@ | UBLK_F_URING_CMD_COMP_IN_TASK \ | UBLK_F_NEED_GET_DATA \ | UBLK_F_USER_RECOVERY \ - | UBLK_F_USER_RECOVERY_REISSUE) + | UBLK_F_USER_RECOVERY_REISSUE \ + | UBLK_F_UNPRIVILEGED_DEV) /* All UBLK_PARAM_TYPE_* should be included here */ -#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD) +#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | \ + UBLK_PARAM_TYPE_DISCARD | UBLK_PARAM_TYPE_DEVT) struct ublk_rq_data { struct llist_node node; @@ -147,6 +150,7 @@ struct ublk_device { #define UB_STATE_OPEN 0 #define UB_STATE_USED 1 +#define UB_STATE_DELETED 2 unsigned long state; int ub_number; @@ -159,7 +163,7 @@ struct ublk_device { struct completion completion; unsigned int nr_queues_ready; - atomic_t nr_aborted_queues; + unsigned int nr_privileged_daemon; /* * Our ubq->daemon may be killed without any notification, so @@ -185,6 +189,15 @@ static wait_queue_head_t ublk_idr_wq; /* wait until one idr is freed */ static DEFINE_MUTEX(ublk_ctl_mutex); +/* + * Max ublk devices allowed to add + * + * It can be extended to one per-user limit in future or even controlled + * by cgroup. + */ +static unsigned int ublks_max = 64; +static unsigned int ublks_added; /* protected by ublk_ctl_mutex */ + static struct miscdevice ublk_misc; static void ublk_dev_param_basic_apply(struct ublk_device *ub) @@ -255,6 +268,10 @@ static int ublk_validate_params(const struct ublk_device *ub) return -EINVAL; } + /* dev_t is read-only */ + if (ub->params.types & UBLK_PARAM_TYPE_DEVT) + return -EINVAL; + return 0; } @@ -306,7 +323,7 @@ static inline struct ublk_queue *ublk_get_queue(struct ublk_device *dev, static inline bool ublk_rq_has_data(const struct request *rq) { - return rq->bio && bio_has_data(rq->bio); + return bio_has_data(rq->bio); } static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq, @@ -361,8 +378,50 @@ static void ublk_free_disk(struct gendisk *disk) put_device(&ub->cdev_dev); } +static void ublk_store_owner_uid_gid(unsigned int *owner_uid, + unsigned int *owner_gid) +{ + kuid_t uid; + kgid_t gid; + + current_uid_gid(&uid, &gid); + + *owner_uid = from_kuid(&init_user_ns, uid); + *owner_gid = from_kgid(&init_user_ns, gid); +} + +static int ublk_open(struct block_device *bdev, fmode_t mode) +{ + struct ublk_device *ub = bdev->bd_disk->private_data; + + if (capable(CAP_SYS_ADMIN)) + return 0; + + /* + * If it is one unprivileged device, only owner can open + * the disk. Otherwise it could be one trap made by one + * evil user who grants this disk's privileges to other + * users deliberately. + * + * This way is reasonable too given anyone can create + * unprivileged device, and no need other's grant. + */ + if (ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV) { + unsigned int curr_uid, curr_gid; + + ublk_store_owner_uid_gid(&curr_uid, &curr_gid); + + if (curr_uid != ub->dev_info.owner_uid || curr_gid != + ub->dev_info.owner_gid) + return -EPERM; + } + + return 0; +} + static const struct block_device_operations ub_fops = { .owner = THIS_MODULE, + .open = ublk_open, .free_disk = ublk_free_disk, }; @@ -607,7 +666,7 @@ static void ublk_complete_rq(struct request *req) } /* - * FLUSH or DISCARD usually won't return bytes returned, so end them + * FLUSH, DISCARD or WRITE_ZEROES usually won't return bytes returned, so end them * directly. * * Both the two needn't unmap. @@ -1179,6 +1238,9 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) ubq->ubq_daemon = current; get_task_struct(ubq->ubq_daemon); ub->nr_queues_ready++; + + if (capable(CAP_SYS_ADMIN)) + ub->nr_privileged_daemon++; } if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues) complete_all(&ub->completion); @@ -1203,6 +1265,7 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) u32 cmd_op = cmd->cmd_op; unsigned tag = ub_cmd->tag; int ret = -EINVAL; + struct request *req; pr_devel("%s: received: cmd op %d queue %d tag %d result %d\n", __func__, cmd->cmd_op, ub_cmd->q_id, tag, @@ -1253,8 +1316,8 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) */ if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV) goto out; - /* FETCH_RQ has to provide IO buffer */ - if (!ub_cmd->addr) + /* FETCH_RQ has to provide IO buffer if NEED GET DATA is not enabled */ + if (!ub_cmd->addr && !ublk_need_get_data(ubq)) goto out; io->cmd = cmd; io->flags |= UBLK_IO_FLAG_ACTIVE; @@ -1263,8 +1326,12 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) ublk_mark_io_ready(ub, ubq); break; case UBLK_IO_COMMIT_AND_FETCH_REQ: - /* FETCH_RQ has to provide IO buffer */ - if (!ub_cmd->addr) + req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag); + /* + * COMMIT_AND_FETCH_REQ has to provide IO buffer if NEED GET DATA is + * not enabled or it is Read IO. + */ + if (!ub_cmd->addr && (!ublk_need_get_data(ubq) || req_op(req) == REQ_OP_READ)) goto out; if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) goto out; @@ -1433,6 +1500,8 @@ static int ublk_add_chdev(struct ublk_device *ub) ret = cdev_device_add(&ub->cdev, dev); if (ret) goto fail; + + ublks_added++; return 0; fail: put_device(dev); @@ -1475,6 +1544,7 @@ static void ublk_remove(struct ublk_device *ub) cancel_work_sync(&ub->quiesce_work); cdev_device_del(&ub->cdev, &ub->cdev_dev); put_device(&ub->cdev_dev); + ublks_added--; } static struct ublk_device *ublk_get_device_from_id(int idx) @@ -1493,21 +1563,16 @@ static struct ublk_device *ublk_get_device_from_id(int idx) return ub; } -static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd) +static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) { struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; int ublksrv_pid = (int)header->data[0]; - struct ublk_device *ub; struct gendisk *disk; int ret = -EINVAL; if (ublksrv_pid <= 0) return -EINVAL; - ub = ublk_get_device_from_id(header->dev_id); - if (!ub) - return -EINVAL; - wait_for_completion_interruptible(&ub->completion); schedule_delayed_work(&ub->monitor_work, UBLK_DAEMON_MONITOR_PERIOD); @@ -1519,7 +1584,7 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd) goto out_unlock; } - disk = blk_mq_alloc_disk(&ub->tag_set, ub); + disk = blk_mq_alloc_disk(&ub->tag_set, NULL); if (IS_ERR(disk)) { ret = PTR_ERR(disk); goto out_unlock; @@ -1535,6 +1600,10 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd) if (ret) goto out_put_disk; + /* don't probe partitions if any one ubq daemon is un-trusted */ + if (ub->nr_privileged_daemon != ub->nr_queues_ready) + set_bit(GD_SUPPRESS_PART_SCAN, &disk->state); + get_device(&ub->cdev_dev); ret = add_disk(disk); if (ret) { @@ -1552,21 +1621,20 @@ out_put_disk: put_disk(disk); out_unlock: mutex_unlock(&ub->mutex); - ublk_put_device(ub); return ret; } -static int ublk_ctrl_get_queue_affinity(struct io_uring_cmd *cmd) +static int ublk_ctrl_get_queue_affinity(struct ublk_device *ub, + struct io_uring_cmd *cmd) { struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; void __user *argp = (void __user *)(unsigned long)header->addr; - struct ublk_device *ub; cpumask_var_t cpumask; unsigned long queue; unsigned int retlen; unsigned int i; - int ret = -EINVAL; - + int ret; + if (header->len * BITS_PER_BYTE < nr_cpu_ids) return -EINVAL; if (header->len & (sizeof(unsigned long)-1)) @@ -1574,17 +1642,12 @@ static int ublk_ctrl_get_queue_affinity(struct io_uring_cmd *cmd) if (!header->addr) return -EINVAL; - ub = ublk_get_device_from_id(header->dev_id); - if (!ub) - return -EINVAL; - queue = header->data[0]; if (queue >= ub->dev_info.nr_hw_queues) - goto out_put_device; + return -EINVAL; - ret = -ENOMEM; if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL)) - goto out_put_device; + return -ENOMEM; for_each_possible_cpu(i) { if (ub->tag_set.map[HCTX_TYPE_DEFAULT].mq_map[i] == queue) @@ -1602,8 +1665,6 @@ static int ublk_ctrl_get_queue_affinity(struct io_uring_cmd *cmd) ret = 0; out_free_cpumask: free_cpumask_var(cpumask); -out_put_device: - ublk_put_device(ub); return ret; } @@ -1630,19 +1691,34 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) __func__, header->queue_id); return -EINVAL; } + if (copy_from_user(&info, argp, sizeof(info))) return -EFAULT; - ublk_dump_dev_info(&info); + + if (capable(CAP_SYS_ADMIN)) + info.flags &= ~UBLK_F_UNPRIVILEGED_DEV; + else if (!(info.flags & UBLK_F_UNPRIVILEGED_DEV)) + return -EPERM; + + /* the created device is always owned by current user */ + ublk_store_owner_uid_gid(&info.owner_uid, &info.owner_gid); + if (header->dev_id != info.dev_id) { pr_warn("%s: dev id not match %u %u\n", __func__, header->dev_id, info.dev_id); return -EINVAL; } + ublk_dump_dev_info(&info); + ret = mutex_lock_killable(&ublk_ctl_mutex); if (ret) return ret; + ret = -EACCES; + if (ublks_added >= ublks_max) + goto out_unlock; + ret = -ENOMEM; ub = kzalloc(sizeof(*ub), GFP_KERNEL); if (!ub) @@ -1724,33 +1800,43 @@ static inline bool ublk_idr_freed(int id) return ptr == NULL; } -static int ublk_ctrl_del_dev(int idx) +static int ublk_ctrl_del_dev(struct ublk_device **p_ub) { - struct ublk_device *ub; + struct ublk_device *ub = *p_ub; + int idx = ub->ub_number; int ret; ret = mutex_lock_killable(&ublk_ctl_mutex); if (ret) return ret; - ub = ublk_get_device_from_id(idx); - if (ub) { + if (!test_bit(UB_STATE_DELETED, &ub->state)) { ublk_remove(ub); - ublk_put_device(ub); - ret = 0; - } else { - ret = -ENODEV; + set_bit(UB_STATE_DELETED, &ub->state); } + /* Mark the reference as consumed */ + *p_ub = NULL; + ublk_put_device(ub); + mutex_unlock(&ublk_ctl_mutex); + /* * Wait until the idr is removed, then it can be reused after * DEL_DEV command is returned. + * + * If we returns because of user interrupt, future delete command + * may come: + * + * - the device number isn't freed, this device won't or needn't + * be deleted again, since UB_STATE_DELETED is set, and device + * will be released after the last reference is dropped + * + * - the device number is freed already, we will not find this + * device via ublk_get_device_from_id() */ - if (!ret) - wait_event(ublk_idr_wq, ublk_idr_freed(idx)); - mutex_unlock(&ublk_ctl_mutex); + wait_event_interruptible(ublk_idr_wq, ublk_idr_freed(idx)); - return ret; + return 0; } static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd) @@ -1762,50 +1848,52 @@ static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd) header->data[0], header->addr, header->len); } -static int ublk_ctrl_stop_dev(struct io_uring_cmd *cmd) +static int ublk_ctrl_stop_dev(struct ublk_device *ub) { - struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; - struct ublk_device *ub; - - ub = ublk_get_device_from_id(header->dev_id); - if (!ub) - return -EINVAL; - ublk_stop_dev(ub); cancel_work_sync(&ub->stop_work); cancel_work_sync(&ub->quiesce_work); - ublk_put_device(ub); return 0; } -static int ublk_ctrl_get_dev_info(struct io_uring_cmd *cmd) +static int ublk_ctrl_get_dev_info(struct ublk_device *ub, + struct io_uring_cmd *cmd) { struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; void __user *argp = (void __user *)(unsigned long)header->addr; - struct ublk_device *ub; - int ret = 0; if (header->len < sizeof(struct ublksrv_ctrl_dev_info) || !header->addr) return -EINVAL; - ub = ublk_get_device_from_id(header->dev_id); - if (!ub) - return -EINVAL; - if (copy_to_user(argp, &ub->dev_info, sizeof(ub->dev_info))) - ret = -EFAULT; - ublk_put_device(ub); + return -EFAULT; - return ret; + return 0; +} + +/* TYPE_DEVT is readonly, so fill it up before returning to userspace */ +static void ublk_ctrl_fill_params_devt(struct ublk_device *ub) +{ + ub->params.devt.char_major = MAJOR(ub->cdev_dev.devt); + ub->params.devt.char_minor = MINOR(ub->cdev_dev.devt); + + if (ub->ub_disk) { + ub->params.devt.disk_major = MAJOR(disk_devt(ub->ub_disk)); + ub->params.devt.disk_minor = MINOR(disk_devt(ub->ub_disk)); + } else { + ub->params.devt.disk_major = 0; + ub->params.devt.disk_minor = 0; + } + ub->params.types |= UBLK_PARAM_TYPE_DEVT; } -static int ublk_ctrl_get_params(struct io_uring_cmd *cmd) +static int ublk_ctrl_get_params(struct ublk_device *ub, + struct io_uring_cmd *cmd) { struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; void __user *argp = (void __user *)(unsigned long)header->addr; struct ublk_params_header ph; - struct ublk_device *ub; int ret; if (header->len <= sizeof(ph) || !header->addr) @@ -1820,27 +1908,23 @@ static int ublk_ctrl_get_params(struct io_uring_cmd *cmd) if (ph.len > sizeof(struct ublk_params)) ph.len = sizeof(struct ublk_params); - ub = ublk_get_device_from_id(header->dev_id); - if (!ub) - return -EINVAL; - mutex_lock(&ub->mutex); + ublk_ctrl_fill_params_devt(ub); if (copy_to_user(argp, &ub->params, ph.len)) ret = -EFAULT; else ret = 0; mutex_unlock(&ub->mutex); - ublk_put_device(ub); return ret; } -static int ublk_ctrl_set_params(struct io_uring_cmd *cmd) +static int ublk_ctrl_set_params(struct ublk_device *ub, + struct io_uring_cmd *cmd) { struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; void __user *argp = (void __user *)(unsigned long)header->addr; struct ublk_params_header ph; - struct ublk_device *ub; int ret = -EFAULT; if (header->len <= sizeof(ph) || !header->addr) @@ -1855,10 +1939,6 @@ static int ublk_ctrl_set_params(struct io_uring_cmd *cmd) if (ph.len > sizeof(struct ublk_params)) ph.len = sizeof(struct ublk_params); - ub = ublk_get_device_from_id(header->dev_id); - if (!ub) - return -EINVAL; - /* parameters can only be changed when device isn't live */ mutex_lock(&ub->mutex); if (ub->dev_info.state == UBLK_S_DEV_LIVE) { @@ -1871,7 +1951,6 @@ static int ublk_ctrl_set_params(struct io_uring_cmd *cmd) ret = ublk_validate_params(ub); } mutex_unlock(&ub->mutex); - ublk_put_device(ub); return ret; } @@ -1898,17 +1977,13 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq) } } -static int ublk_ctrl_start_recovery(struct io_uring_cmd *cmd) +static int ublk_ctrl_start_recovery(struct ublk_device *ub, + struct io_uring_cmd *cmd) { struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; - struct ublk_device *ub; int ret = -EINVAL; int i; - ub = ublk_get_device_from_id(header->dev_id); - if (!ub) - return ret; - mutex_lock(&ub->mutex); if (!ublk_can_use_recovery(ub)) goto out_unlock; @@ -1936,25 +2011,21 @@ static int ublk_ctrl_start_recovery(struct io_uring_cmd *cmd) /* set to NULL, otherwise new ubq_daemon cannot mmap the io_cmd_buf */ ub->mm = NULL; ub->nr_queues_ready = 0; + ub->nr_privileged_daemon = 0; init_completion(&ub->completion); ret = 0; out_unlock: mutex_unlock(&ub->mutex); - ublk_put_device(ub); return ret; } -static int ublk_ctrl_end_recovery(struct io_uring_cmd *cmd) +static int ublk_ctrl_end_recovery(struct ublk_device *ub, + struct io_uring_cmd *cmd) { struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; int ublksrv_pid = (int)header->data[0]; - struct ublk_device *ub; int ret = -EINVAL; - ub = ublk_get_device_from_id(header->dev_id); - if (!ub) - return ret; - pr_devel("%s: Waiting for new ubq_daemons(nr: %d) are ready, dev id %d...\n", __func__, ub->dev_info.nr_hw_queues, header->dev_id); /* wait until new ubq_daemon sending all FETCH_REQ */ @@ -1982,7 +2053,115 @@ static int ublk_ctrl_end_recovery(struct io_uring_cmd *cmd) ret = 0; out_unlock: mutex_unlock(&ub->mutex); - ublk_put_device(ub); + return ret; +} + +/* + * All control commands are sent via /dev/ublk-control, so we have to check + * the destination device's permission + */ +static int ublk_char_dev_permission(struct ublk_device *ub, + const char *dev_path, int mask) +{ + int err; + struct path path; + struct kstat stat; + + err = kern_path(dev_path, LOOKUP_FOLLOW, &path); + if (err) + return err; + + err = vfs_getattr(&path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT); + if (err) + goto exit; + + err = -EPERM; + if (stat.rdev != ub->cdev_dev.devt || !S_ISCHR(stat.mode)) + goto exit; + + err = inode_permission(&nop_mnt_idmap, + d_backing_inode(path.dentry), mask); +exit: + path_put(&path); + return err; +} + +static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub, + struct io_uring_cmd *cmd) +{ + struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + bool unprivileged = ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV; + void __user *argp = (void __user *)(unsigned long)header->addr; + char *dev_path = NULL; + int ret = 0; + int mask; + + if (!unprivileged) { + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + /* + * The new added command of UBLK_CMD_GET_DEV_INFO2 includes + * char_dev_path in payload too, since userspace may not + * know if the specified device is created as unprivileged + * mode. + */ + if (cmd->cmd_op != UBLK_CMD_GET_DEV_INFO2) + return 0; + } + + /* + * User has to provide the char device path for unprivileged ublk + * + * header->addr always points to the dev path buffer, and + * header->dev_path_len records length of dev path buffer. + */ + if (!header->dev_path_len || header->dev_path_len > PATH_MAX) + return -EINVAL; + + if (header->len < header->dev_path_len) + return -EINVAL; + + dev_path = kmalloc(header->dev_path_len + 1, GFP_KERNEL); + if (!dev_path) + return -ENOMEM; + + ret = -EFAULT; + if (copy_from_user(dev_path, argp, header->dev_path_len)) + goto exit; + dev_path[header->dev_path_len] = 0; + + ret = -EINVAL; + switch (cmd->cmd_op) { + case UBLK_CMD_GET_DEV_INFO: + case UBLK_CMD_GET_DEV_INFO2: + case UBLK_CMD_GET_QUEUE_AFFINITY: + case UBLK_CMD_GET_PARAMS: + mask = MAY_READ; + break; + case UBLK_CMD_START_DEV: + case UBLK_CMD_STOP_DEV: + case UBLK_CMD_ADD_DEV: + case UBLK_CMD_DEL_DEV: + case UBLK_CMD_SET_PARAMS: + case UBLK_CMD_START_USER_RECOVERY: + case UBLK_CMD_END_USER_RECOVERY: + mask = MAY_READ | MAY_WRITE; + break; + default: + goto exit; + } + + ret = ublk_char_dev_permission(ub, dev_path, mask); + if (!ret) { + header->len -= header->dev_path_len; + header->addr += header->dev_path_len; + } + pr_devel("%s: dev id %d cmd_op %x uid %d gid %d path %s ret %d\n", + __func__, ub->ub_number, cmd->cmd_op, + ub->dev_info.owner_uid, ub->dev_info.owner_gid, + dev_path, ret); +exit: + kfree(dev_path); return ret; } @@ -1990,6 +2169,7 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) { struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + struct ublk_device *ub = NULL; int ret = -EINVAL; if (issue_flags & IO_URING_F_NONBLOCK) @@ -2000,45 +2180,61 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, if (!(issue_flags & IO_URING_F_SQE128)) goto out; - ret = -EPERM; - if (!capable(CAP_SYS_ADMIN)) - goto out; + if (cmd->cmd_op != UBLK_CMD_ADD_DEV) { + ret = -ENODEV; + ub = ublk_get_device_from_id(header->dev_id); + if (!ub) + goto out; + + ret = ublk_ctrl_uring_cmd_permission(ub, cmd); + } else { + /* ADD_DEV permission check is done in command handler */ + ret = 0; + } + + if (ret) + goto put_dev; - ret = -ENODEV; switch (cmd->cmd_op) { case UBLK_CMD_START_DEV: - ret = ublk_ctrl_start_dev(cmd); + ret = ublk_ctrl_start_dev(ub, cmd); break; case UBLK_CMD_STOP_DEV: - ret = ublk_ctrl_stop_dev(cmd); + ret = ublk_ctrl_stop_dev(ub); break; case UBLK_CMD_GET_DEV_INFO: - ret = ublk_ctrl_get_dev_info(cmd); + case UBLK_CMD_GET_DEV_INFO2: + ret = ublk_ctrl_get_dev_info(ub, cmd); break; case UBLK_CMD_ADD_DEV: ret = ublk_ctrl_add_dev(cmd); break; case UBLK_CMD_DEL_DEV: - ret = ublk_ctrl_del_dev(header->dev_id); + ret = ublk_ctrl_del_dev(&ub); break; case UBLK_CMD_GET_QUEUE_AFFINITY: - ret = ublk_ctrl_get_queue_affinity(cmd); + ret = ublk_ctrl_get_queue_affinity(ub, cmd); break; case UBLK_CMD_GET_PARAMS: - ret = ublk_ctrl_get_params(cmd); + ret = ublk_ctrl_get_params(ub, cmd); break; case UBLK_CMD_SET_PARAMS: - ret = ublk_ctrl_set_params(cmd); + ret = ublk_ctrl_set_params(ub, cmd); break; case UBLK_CMD_START_USER_RECOVERY: - ret = ublk_ctrl_start_recovery(cmd); + ret = ublk_ctrl_start_recovery(ub, cmd); break; case UBLK_CMD_END_USER_RECOVERY: - ret = ublk_ctrl_end_recovery(cmd); + ret = ublk_ctrl_end_recovery(ub, cmd); break; default: + ret = -ENOTSUPP; break; } + + put_dev: + if (ub) + ublk_put_device(ub); out: io_uring_cmd_done(cmd, ret, 0); pr_devel("%s: cmd done ret %d cmd_op %x, dev id %d qid %d\n", @@ -2105,5 +2301,8 @@ static void __exit ublk_exit(void) module_init(ublk_init); module_exit(ublk_exit); +module_param(ublks_max, int, 0444); +MODULE_PARM_DESC(ublks_max, "max number of ublk devices allowed to add(default: 64)"); + MODULE_AUTHOR("Ming Lei <ming.lei@redhat.com>"); MODULE_LICENSE("GPL"); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 6a77fa917428..dc6e9b989910 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -170,9 +170,7 @@ static int virtblk_setup_discard_write_zeroes_erase(struct request *req, bool un WARN_ON_ONCE(n != segments); - req->special_vec.bv_page = virt_to_page(range); - req->special_vec.bv_offset = offset_in_page(range); - req->special_vec.bv_len = sizeof(*range) * segments; + bvec_set_virt(&req->special_vec, range, sizeof(*range) * segments); req->rq_flags |= RQF_SPECIAL_PAYLOAD; return 0; diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index e290d6d97047..bd8ae4822dc3 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -703,9 +703,7 @@ static ssize_t writeback_store(struct device *dev, for (; nr_pages != 0; index++, nr_pages--) { struct bio_vec bvec; - bvec.bv_page = page; - bvec.bv_len = PAGE_SIZE; - bvec.bv_offset = 0; + bvec_set_page(&bvec, page, PAGE_SIZE, 0); spin_lock(&zram->wb_limit_lock); if (zram->wb_limit_enable && !zram->bd_wb_limit) { @@ -1380,12 +1378,9 @@ out: static int zram_bvec_read_from_bdev(struct zram *zram, struct page *page, u32 index, struct bio *bio, bool partial_io) { - struct bio_vec bvec = { - .bv_page = page, - .bv_len = PAGE_SIZE, - .bv_offset = 0, - }; + struct bio_vec bvec; + bvec_set_page(&bvec, page, PAGE_SIZE, 0); return read_from_bdev(zram, &bvec, zram_get_element(zram, index), bio, partial_io); } @@ -1652,9 +1647,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, memcpy_from_bvec(dst + offset, bvec); kunmap_atomic(dst); - vec.bv_page = page; - vec.bv_len = PAGE_SIZE; - vec.bv_offset = 0; + bvec_set_page(&vec, page, PAGE_SIZE, 0); } ret = __zram_bvec_write(zram, &vec, index, bio); diff --git a/drivers/md/md.c b/drivers/md/md.c index 02b0240e7c71..927a43db5dfb 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -380,6 +380,10 @@ EXPORT_SYMBOL_GPL(md_new_event); static LIST_HEAD(all_mddevs); static DEFINE_SPINLOCK(all_mddevs_lock); +static bool is_md_suspended(struct mddev *mddev) +{ + return percpu_ref_is_dying(&mddev->active_io); +} /* Rather than calling directly into the personality make_request function, * IO requests come here first so that we can check if the device is * being suspended pending a reconfiguration. @@ -389,7 +393,7 @@ static DEFINE_SPINLOCK(all_mddevs_lock); */ static bool is_suspended(struct mddev *mddev, struct bio *bio) { - if (mddev->suspended) + if (is_md_suspended(mddev)) return true; if (bio_data_dir(bio) != WRITE) return false; @@ -405,12 +409,10 @@ static bool is_suspended(struct mddev *mddev, struct bio *bio) void md_handle_request(struct mddev *mddev, struct bio *bio) { check_suspended: - rcu_read_lock(); if (is_suspended(mddev, bio)) { DEFINE_WAIT(__wait); /* Bail out if REQ_NOWAIT is set for the bio */ if (bio->bi_opf & REQ_NOWAIT) { - rcu_read_unlock(); bio_wouldblock_error(bio); return; } @@ -419,23 +421,19 @@ check_suspended: TASK_UNINTERRUPTIBLE); if (!is_suspended(mddev, bio)) break; - rcu_read_unlock(); schedule(); - rcu_read_lock(); } finish_wait(&mddev->sb_wait, &__wait); } - atomic_inc(&mddev->active_io); - rcu_read_unlock(); + if (!percpu_ref_tryget_live(&mddev->active_io)) + goto check_suspended; if (!mddev->pers->make_request(mddev, bio)) { - atomic_dec(&mddev->active_io); - wake_up(&mddev->sb_wait); + percpu_ref_put(&mddev->active_io); goto check_suspended; } - if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended) - wake_up(&mddev->sb_wait); + percpu_ref_put(&mddev->active_io); } EXPORT_SYMBOL(md_handle_request); @@ -483,11 +481,10 @@ void mddev_suspend(struct mddev *mddev) lockdep_assert_held(&mddev->reconfig_mutex); if (mddev->suspended++) return; - synchronize_rcu(); wake_up(&mddev->sb_wait); set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags); - smp_mb__after_atomic(); - wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0); + percpu_ref_kill(&mddev->active_io); + wait_event(mddev->sb_wait, percpu_ref_is_zero(&mddev->active_io)); mddev->pers->quiesce(mddev, 1); clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags); wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags)); @@ -505,6 +502,7 @@ void mddev_resume(struct mddev *mddev) lockdep_assert_held(&mddev->reconfig_mutex); if (--mddev->suspended) return; + percpu_ref_resurrect(&mddev->active_io); wake_up(&mddev->sb_wait); mddev->pers->quiesce(mddev, 0); @@ -683,7 +681,6 @@ void mddev_init(struct mddev *mddev) timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0); atomic_set(&mddev->active, 1); atomic_set(&mddev->openers, 0); - atomic_set(&mddev->active_io, 0); spin_lock_init(&mddev->lock); atomic_set(&mddev->flush_pending, 0); init_waitqueue_head(&mddev->sb_wait); @@ -5760,6 +5757,12 @@ static void md_safemode_timeout(struct timer_list *t) } static int start_dirty_degraded; +static void active_io_release(struct percpu_ref *ref) +{ + struct mddev *mddev = container_of(ref, struct mddev, active_io); + + wake_up(&mddev->sb_wait); +} int md_run(struct mddev *mddev) { @@ -5840,10 +5843,15 @@ int md_run(struct mddev *mddev) nowait = nowait && bdev_nowait(rdev->bdev); } + err = percpu_ref_init(&mddev->active_io, active_io_release, + PERCPU_REF_ALLOW_REINIT, GFP_KERNEL); + if (err) + return err; + if (!bioset_initialized(&mddev->bio_set)) { err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); if (err) - return err; + goto exit_active_io; } if (!bioset_initialized(&mddev->sync_set)) { err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); @@ -6031,6 +6039,8 @@ abort: bioset_exit(&mddev->sync_set); exit_bio_set: bioset_exit(&mddev->bio_set); +exit_active_io: + percpu_ref_exit(&mddev->active_io); return err; } EXPORT_SYMBOL_GPL(md_run); @@ -6156,7 +6166,7 @@ static void md_clean(struct mddev *mddev) mddev->new_level = LEVEL_NONE; mddev->new_layout = 0; mddev->new_chunk_sectors = 0; - mddev->curr_resync = 0; + mddev->curr_resync = MD_RESYNC_NONE; atomic64_set(&mddev->resync_mismatches, 0); mddev->suspend_lo = mddev->suspend_hi = 0; mddev->sync_speed_min = mddev->sync_speed_max = 0; @@ -6219,7 +6229,7 @@ EXPORT_SYMBOL_GPL(md_stop_writes); static void mddev_detach(struct mddev *mddev) { md_bitmap_wait_behind_writes(mddev); - if (mddev->pers && mddev->pers->quiesce && !mddev->suspended) { + if (mddev->pers && mddev->pers->quiesce && !is_md_suspended(mddev)) { mddev->pers->quiesce(mddev, 1); mddev->pers->quiesce(mddev, 0); } @@ -6255,6 +6265,8 @@ void md_stop(struct mddev *mddev) */ __md_stop_writes(mddev); __md_stop(mddev); + percpu_ref_exit(&mddev->writes_pending); + percpu_ref_exit(&mddev->active_io); bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); } @@ -7828,6 +7840,7 @@ static void md_free_disk(struct gendisk *disk) struct mddev *mddev = disk->private_data; percpu_ref_exit(&mddev->writes_pending); + percpu_ref_exit(&mddev->active_io); bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); @@ -8531,7 +8544,7 @@ bool md_write_start(struct mddev *mddev, struct bio *bi) return true; wait_event(mddev->sb_wait, !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) || - mddev->suspended); + is_md_suspended(mddev)); if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) { percpu_ref_put(&mddev->writes_pending); return false; @@ -8615,12 +8628,15 @@ static void md_end_io_acct(struct bio *bio) { struct md_io_acct *md_io_acct = bio->bi_private; struct bio *orig_bio = md_io_acct->orig_bio; + struct mddev *mddev = md_io_acct->mddev; orig_bio->bi_status = bio->bi_status; bio_end_io_acct(orig_bio, md_io_acct->start_time); bio_put(bio); bio_endio(orig_bio); + + percpu_ref_put(&mddev->active_io); } /* @@ -8636,10 +8652,13 @@ void md_account_bio(struct mddev *mddev, struct bio **bio) if (!blk_queue_io_stat(bdev->bd_disk->queue)) return; + percpu_ref_get(&mddev->active_io); + clone = bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_acct_set); md_io_acct = container_of(clone, struct md_io_acct, bio_clone); md_io_acct->orig_bio = *bio; md_io_acct->start_time = bio_start_io_acct(*bio); + md_io_acct->mddev = mddev; clone->bi_end_io = md_end_io_acct; clone->bi_private = md_io_acct; @@ -8883,7 +8902,7 @@ void md_do_sync(struct md_thread *thread) atomic_set(&mddev->recovery_active, 0); last_check = 0; - if (j>2) { + if (j >= MD_RESYNC_ACTIVE) { pr_debug("md: resuming %s of %s from checkpoint.\n", desc, mdname(mddev)); mddev->curr_resync = j; @@ -8955,7 +8974,7 @@ void md_do_sync(struct md_thread *thread) if (j > max_sectors) /* when skipping, extra large numbers can be returned. */ j = max_sectors; - if (j > 2) + if (j >= MD_RESYNC_ACTIVE) mddev->curr_resync = j; mddev->curr_mark_cnt = io_sectors; if (last_check == 0) @@ -9030,7 +9049,7 @@ void md_do_sync(struct md_thread *thread) mddev->pers->sync_request(mddev, max_sectors, &skipped); if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && - mddev->curr_resync >= MD_RESYNC_ACTIVE) { + mddev->curr_resync > MD_RESYNC_ACTIVE) { if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { if (mddev->curr_resync >= mddev->recovery_cp) { @@ -9259,7 +9278,7 @@ void md_check_recovery(struct mddev *mddev) wake_up(&mddev->sb_wait); } - if (mddev->suspended) + if (is_md_suspended(mddev)) return; if (mddev->bitmap) diff --git a/drivers/md/md.h b/drivers/md/md.h index 554a9026669a..e148e3c83b0d 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -315,7 +315,7 @@ struct mddev { unsigned long sb_flags; int suspended; - atomic_t active_io; + struct percpu_ref active_io; int ro; int sysfs_active; /* set when sysfs deletes * are happening, so run/ @@ -710,9 +710,10 @@ struct md_thread { }; struct md_io_acct { - struct bio *orig_bio; - unsigned long start_time; - struct bio bio_clone; + struct mddev *mddev; + struct bio *orig_bio; + unsigned long start_time; + struct bio bio_clone; }; #define THREAD_WAKEUP 0 diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index bdb97496ba2d..901c59145811 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -160,7 +160,7 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl, if (size > CHAP_BUF_SIZE) { chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD; - return NVME_SC_INVALID_FIELD; + return -EINVAL; } hmac_name = nvme_auth_hmac_name(data->hashid); @@ -169,7 +169,7 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl, "qid %d: invalid HASH ID %d\n", chap->qid, data->hashid); chap->status = NVME_AUTH_DHCHAP_FAILURE_HASH_UNUSABLE; - return NVME_SC_INVALID_FIELD; + return -EPROTO; } if (chap->hash_id == data->hashid && chap->shash_tfm && @@ -195,7 +195,7 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl, chap->qid, hmac_name, PTR_ERR(chap->shash_tfm)); chap->shash_tfm = NULL; chap->status = NVME_AUTH_DHCHAP_FAILURE_FAILED; - return NVME_SC_AUTH_REQUIRED; + return -ENOMEM; } if (crypto_shash_digestsize(chap->shash_tfm) != data->hl) { @@ -205,7 +205,7 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl, crypto_free_shash(chap->shash_tfm); chap->shash_tfm = NULL; chap->status = NVME_AUTH_DHCHAP_FAILURE_HASH_UNUSABLE; - return NVME_SC_AUTH_REQUIRED; + return -EPROTO; } chap->hash_id = data->hashid; @@ -221,7 +221,7 @@ select_kpp: chap->qid, data->dhgid); chap->status = NVME_AUTH_DHCHAP_FAILURE_DHGROUP_UNUSABLE; /* Leave previous dh_tfm intact */ - return NVME_SC_AUTH_REQUIRED; + return -EPROTO; } if (chap->dhgroup_id == data->dhgid && @@ -244,7 +244,7 @@ select_kpp: "qid %d: empty DH value\n", chap->qid); chap->status = NVME_AUTH_DHCHAP_FAILURE_DHGROUP_UNUSABLE; - return NVME_SC_INVALID_FIELD; + return -EPROTO; } chap->dh_tfm = crypto_alloc_kpp(kpp_name, 0, 0); @@ -256,7 +256,7 @@ select_kpp: chap->qid, ret, gid_name); chap->status = NVME_AUTH_DHCHAP_FAILURE_DHGROUP_UNUSABLE; chap->dh_tfm = NULL; - return NVME_SC_AUTH_REQUIRED; + return -ret; } dev_dbg(ctrl->device, "qid %d: selected DH group %s\n", chap->qid, gid_name); @@ -265,7 +265,7 @@ select_kpp: "qid %d: invalid DH value for NULL DH\n", chap->qid); chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD; - return NVME_SC_INVALID_FIELD; + return -EPROTO; } chap->dhgroup_id = data->dhgid; @@ -276,7 +276,7 @@ skip_kpp: chap->ctrl_key = kmalloc(dhvlen, GFP_KERNEL); if (!chap->ctrl_key) { chap->status = NVME_AUTH_DHCHAP_FAILURE_FAILED; - return NVME_SC_AUTH_REQUIRED; + return -ENOMEM; } chap->ctrl_key_len = dhvlen; memcpy(chap->ctrl_key, data->cval + chap->hash_len, @@ -346,7 +346,7 @@ static int nvme_auth_process_dhchap_success1(struct nvme_ctrl *ctrl, if (size > CHAP_BUF_SIZE) { chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD; - return NVME_SC_INVALID_FIELD; + return -EINVAL; } if (data->hl != chap->hash_len) { @@ -354,7 +354,7 @@ static int nvme_auth_process_dhchap_success1(struct nvme_ctrl *ctrl, "qid %d: invalid hash length %u\n", chap->qid, data->hl); chap->status = NVME_AUTH_DHCHAP_FAILURE_HASH_UNUSABLE; - return NVME_SC_INVALID_FIELD; + return -EPROTO; } /* Just print out information for the admin queue */ @@ -378,7 +378,7 @@ static int nvme_auth_process_dhchap_success1(struct nvme_ctrl *ctrl, "qid %d: controller authentication failed\n", chap->qid); chap->status = NVME_AUTH_DHCHAP_FAILURE_FAILED; - return NVME_SC_AUTH_REQUIRED; + return -ECONNREFUSED; } /* Just print out information for the admin queue */ @@ -732,7 +732,7 @@ static void nvme_queue_auth_work(struct work_struct *work) NVME_AUTH_DHCHAP_MESSAGE_CHALLENGE); if (ret) { chap->status = ret; - chap->error = NVME_SC_AUTH_REQUIRED; + chap->error = -ECONNREFUSED; return; } @@ -800,7 +800,7 @@ static void nvme_queue_auth_work(struct work_struct *work) NVME_AUTH_DHCHAP_MESSAGE_SUCCESS1); if (ret) { chap->status = ret; - chap->error = NVME_SC_AUTH_REQUIRED; + chap->error = -ECONNREFUSED; return; } @@ -821,7 +821,7 @@ static void nvme_queue_auth_work(struct work_struct *work) ret = nvme_auth_process_dhchap_success1(ctrl, chap); if (ret) { /* Controller authentication failed */ - chap->error = NVME_SC_AUTH_REQUIRED; + chap->error = -ECONNREFUSED; goto fail2; } diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c index e958d5015585..bc523ca02254 100644 --- a/drivers/nvme/host/constants.c +++ b/drivers/nvme/host/constants.c @@ -54,6 +54,14 @@ static const char * const nvme_admin_ops[] = { [nvme_admin_get_lba_status] = "Get LBA Status", }; +static const char * const nvme_fabrics_ops[] = { + [nvme_fabrics_type_property_set] = "Property Set", + [nvme_fabrics_type_property_get] = "Property Get", + [nvme_fabrics_type_connect] = "Connect", + [nvme_fabrics_type_auth_send] = "Authentication Send", + [nvme_fabrics_type_auth_receive] = "Authentication Receive", +}; + static const char * const nvme_statuses[] = { [NVME_SC_SUCCESS] = "Success", [NVME_SC_INVALID_OPCODE] = "Invalid Command Opcode", @@ -185,3 +193,11 @@ const unsigned char *nvme_get_admin_opcode_str(u8 opcode) return nvme_admin_ops[opcode]; return "Unknown"; } +EXPORT_SYMBOL_GPL(nvme_get_admin_opcode_str); + +const unsigned char *nvme_get_fabrics_opcode_str(u8 opcode) { + if (opcode < ARRAY_SIZE(nvme_fabrics_ops) && nvme_fabrics_ops[opcode]) + return nvme_fabrics_ops[opcode]; + return "Unknown"; +} +EXPORT_SYMBOL_GPL(nvme_get_fabrics_opcode_str); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8b6421141162..8698410aeb84 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -806,9 +806,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, cmnd->dsm.nr = cpu_to_le32(segments - 1); cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); - req->special_vec.bv_page = virt_to_page(range); - req->special_vec.bv_offset = offset_in_page(range); - req->special_vec.bv_len = alloc_size; + bvec_set_virt(&req->special_vec, range, alloc_size); req->rq_flags |= RQF_SPECIAL_PAYLOAD; return BLK_STS_OK; @@ -1004,7 +1002,7 @@ EXPORT_SYMBOL_GPL(nvme_setup_cmd); * >0: nvme controller's cqe status response * <0: kernel error in lieu of controller response */ -static int nvme_execute_rq(struct request *rq, bool at_head) +int nvme_execute_rq(struct request *rq, bool at_head) { blk_status_t status; @@ -1015,6 +1013,7 @@ static int nvme_execute_rq(struct request *rq, bool at_head) return nvme_req(rq)->status; return blk_status_to_errno(status); } +EXPORT_SYMBOL_NS_GPL(nvme_execute_rq, NVME_TARGET_PASSTHRU); /* * Returns 0 on success. If the result is negative, it's a Linux error code; @@ -1060,41 +1059,12 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, } EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd); -static u32 nvme_known_admin_effects(u8 opcode) -{ - switch (opcode) { - case nvme_admin_format_nvm: - return NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_NCC | - NVME_CMD_EFFECTS_CSE_MASK; - case nvme_admin_sanitize_nvm: - return NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK; - default: - break; - } - return 0; -} - -static u32 nvme_known_nvm_effects(u8 opcode) -{ - switch (opcode) { - case nvme_cmd_write: - case nvme_cmd_write_zeroes: - case nvme_cmd_write_uncor: - return NVME_CMD_EFFECTS_LBCC; - default: - return 0; - } -} - u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) { u32 effects = 0; if (ns) { - if (ns->head->effects) - effects = le32_to_cpu(ns->head->effects->iocs[opcode]); - if (ns->head->ids.csi == NVME_CSI_NVM) - effects |= nvme_known_nvm_effects(opcode); + effects = le32_to_cpu(ns->head->effects->iocs[opcode]); if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC)) dev_warn_once(ctrl->device, "IO command:%02x has unusual effects:%08x\n", @@ -1107,17 +1077,14 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) */ effects &= ~NVME_CMD_EFFECTS_CSE_MASK; } else { - if (ctrl->effects) - effects = le32_to_cpu(ctrl->effects->acs[opcode]); - effects |= nvme_known_admin_effects(opcode); + effects = le32_to_cpu(ctrl->effects->acs[opcode]); } return effects; } EXPORT_SYMBOL_NS_GPL(nvme_command_effects, NVME_TARGET_PASSTHRU); -static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, - u8 opcode) +u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) { u32 effects = nvme_command_effects(ctrl, ns, opcode); @@ -1135,6 +1102,7 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, } return effects; } +EXPORT_SYMBOL_NS_GPL(nvme_passthru_start, NVME_TARGET_PASSTHRU); void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, struct nvme_command *cmd, int status) @@ -1176,17 +1144,6 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, } EXPORT_SYMBOL_NS_GPL(nvme_passthru_end, NVME_TARGET_PASSTHRU); -int nvme_execute_passthru_rq(struct request *rq, u32 *effects) -{ - struct nvme_command *cmd = nvme_req(rq)->cmd; - struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl; - struct nvme_ns *ns = rq->q->queuedata; - - *effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); - return nvme_execute_rq(rq, false); -} -EXPORT_SYMBOL_NS_GPL(nvme_execute_passthru_rq, NVME_TARGET_PASSTHRU); - /* * Recommended frequency for KATO commands per NVMe 1.4 section 7.12.1: * @@ -3122,6 +3079,62 @@ free_data: return ret; } +static void nvme_init_known_nvm_effects(struct nvme_ctrl *ctrl) +{ + struct nvme_effects_log *log = ctrl->effects; + + log->acs[nvme_admin_format_nvm] |= cpu_to_le32(NVME_CMD_EFFECTS_LBCC | + NVME_CMD_EFFECTS_NCC | + NVME_CMD_EFFECTS_CSE_MASK); + log->acs[nvme_admin_sanitize_nvm] |= cpu_to_le32(NVME_CMD_EFFECTS_LBCC | + NVME_CMD_EFFECTS_CSE_MASK); + + /* + * The spec says the result of a security receive command depends on + * the previous security send command. As such, many vendors log this + * command as one to submitted only when no other commands to the same + * namespace are outstanding. The intention is to tell the host to + * prevent mixing security send and receive. + * + * This driver can only enforce such exclusive access against IO + * queues, though. We are not readily able to enforce such a rule for + * two commands to the admin queue, which is the only queue that + * matters for this command. + * + * Rather than blindly freezing the IO queues for this effect that + * doesn't even apply to IO, mask it off. + */ + log->acs[nvme_admin_security_recv] &= ~NVME_CMD_EFFECTS_CSE_MASK; + + log->iocs[nvme_cmd_write] |= cpu_to_le32(NVME_CMD_EFFECTS_LBCC); + log->iocs[nvme_cmd_write_zeroes] |= cpu_to_le32(NVME_CMD_EFFECTS_LBCC); + log->iocs[nvme_cmd_write_uncor] |= cpu_to_le32(NVME_CMD_EFFECTS_LBCC); +} + +static int nvme_init_effects(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) +{ + int ret = 0; + + if (ctrl->effects) + return 0; + + if (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) { + ret = nvme_get_effects_log(ctrl, NVME_CSI_NVM, &ctrl->effects); + if (ret < 0) + return ret; + } + + if (!ctrl->effects) { + ctrl->effects = kzalloc(sizeof(*ctrl->effects), GFP_KERNEL); + if (!ctrl->effects) + return -ENOMEM; + xa_store(&ctrl->cels, NVME_CSI_NVM, ctrl->effects, GFP_KERNEL); + } + + nvme_init_known_nvm_effects(ctrl); + return 0; +} + static int nvme_init_identify(struct nvme_ctrl *ctrl) { struct nvme_id_ctrl *id; @@ -3135,12 +3148,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) return -EIO; } - if (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) { - ret = nvme_get_effects_log(ctrl, NVME_CSI_NVM, &ctrl->effects); - if (ret < 0) - goto out_free; - } - if (!(ctrl->ops->flags & NVME_F_FABRICS)) ctrl->cntlid = le16_to_cpu(id->cntlid); @@ -3163,6 +3170,10 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) ret = nvme_init_subsystem(ctrl, id); if (ret) goto out_free; + + ret = nvme_init_effects(ctrl, id); + if (ret) + goto out_free; } memcpy(ctrl->subsys->firmware_rev, id->fr, sizeof(ctrl->subsys->firmware_rev)); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index ce27276f552d..bbaa04a0c502 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -410,7 +410,14 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) result = le32_to_cpu(res.u32); ctrl->cntlid = result & 0xFFFF; - if ((result >> 16) & 0x3) { + if (result & (NVME_CONNECT_AUTHREQ_ATR | NVME_CONNECT_AUTHREQ_ASCR)) { + /* Secure concatenation is not implemented */ + if (result & NVME_CONNECT_AUTHREQ_ASCR) { + dev_warn(ctrl->device, + "qid 0: secure concatenation is not supported\n"); + ret = NVME_SC_AUTH_REQUIRED; + goto out_free_data; + } /* Authentication required */ ret = nvme_auth_negotiate(ctrl, 0); if (ret) { @@ -486,7 +493,14 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) &cmd, data); } result = le32_to_cpu(res.u32); - if ((result >> 16) & 2) { + if (result & (NVME_CONNECT_AUTHREQ_ATR | NVME_CONNECT_AUTHREQ_ASCR)) { + /* Secure concatenation is not implemented */ + if (result & NVME_CONNECT_AUTHREQ_ASCR) { + dev_warn(ctrl->device, + "qid 0: secure concatenation is not supported\n"); + ret = NVME_SC_AUTH_REQUIRED; + goto out_free_data; + } /* Authentication required */ ret = nvme_auth_negotiate(ctrl, qid); if (ret) { @@ -500,6 +514,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) "qid %u: authentication failed\n", qid); } } +out_free_data: kfree(data); return ret; } diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 06f52db34be9..723e7d5b778f 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -219,6 +219,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, void __user *meta_buffer, unsigned meta_len, u32 meta_seed, u64 *result, unsigned timeout, unsigned int flags) { + struct nvme_ns *ns = q->queuedata; struct nvme_ctrl *ctrl; struct request *req; void *meta = NULL; @@ -241,8 +242,8 @@ static int nvme_submit_user_cmd(struct request_queue *q, bio = req->bio; ctrl = nvme_req(req)->ctrl; - ret = nvme_execute_passthru_rq(req, &effects); - + effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); + ret = nvme_execute_rq(req, false); if (result) *result = le64_to_cpu(nvme_req(req)->result.u64); if (meta) @@ -554,7 +555,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, struct nvme_uring_data d; struct nvme_command c; struct request *req; - blk_opf_t rq_flags = 0; + blk_opf_t rq_flags = REQ_ALLOC_CACHE; blk_mq_req_flags_t blk_flags = 0; void *meta = NULL; int ret; @@ -590,7 +591,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, d.timeout_ms = READ_ONCE(cmd->timeout_ms); if (issue_flags & IO_URING_F_NONBLOCK) { - rq_flags = REQ_NOWAIT; + rq_flags |= REQ_NOWAIT; blk_flags = BLK_MQ_REQ_NOWAIT; } if (issue_flags & IO_URING_F_IOPOLL) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 424c8a467a0c..bf46f122e9e1 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -1070,7 +1070,8 @@ static inline void nvme_auth_free(struct nvme_ctrl *ctrl) {}; u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode); -int nvme_execute_passthru_rq(struct request *rq, u32 *effects); +u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode); +int nvme_execute_rq(struct request *rq, bool at_head); void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, struct nvme_command *cmd, int status); struct nvme_ctrl *nvme_ctrl_from_file(struct file *file); @@ -1086,6 +1087,7 @@ static inline bool nvme_multi_css(struct nvme_ctrl *ctrl) const unsigned char *nvme_get_error_status_str(u16 status); const unsigned char *nvme_get_opcode_str(u8 opcode); const unsigned char *nvme_get_admin_opcode_str(u8 opcode); +const unsigned char *nvme_get_fabrics_opcode_str(u8 opcode); #else /* CONFIG_NVME_VERBOSE_ERRORS */ static inline const unsigned char *nvme_get_error_status_str(u16 status) { @@ -1099,6 +1101,18 @@ static inline const unsigned char *nvme_get_admin_opcode_str(u8 opcode) { return "Admin Cmd"; } + +static inline const unsigned char *nvme_get_fabrics_opcode_str(u8 opcode) +{ + return "Fabrics Cmd"; +} #endif /* CONFIG_NVME_VERBOSE_ERRORS */ +static inline const unsigned char *nvme_opcode_str(int qid, u8 opcode, u8 fctype) +{ + if (opcode == nvme_fabrics_command) + return nvme_get_fabrics_opcode_str(fctype); + return qid ? nvme_get_opcode_str(opcode) : + nvme_get_admin_opcode_str(opcode); +} #endif /* _NVME_H */ diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c11e0cfeef0f..5b95c94ee40f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -42,8 +42,9 @@ * These can be higher, but we need to ensure that any command doesn't * require an sg allocation that needs more than a page of data. */ -#define NVME_MAX_KB_SZ 4096 -#define NVME_MAX_SEGS 127 +#define NVME_MAX_KB_SZ 8192 +#define NVME_MAX_SEGS 128 +#define NVME_MAX_NR_ALLOCATIONS 5 static int use_threaded_interrupts; module_param(use_threaded_interrupts, int, 0444); @@ -216,6 +217,11 @@ struct nvme_queue { struct completion delete_done; }; +union nvme_descriptor { + struct nvme_sgl_desc *sg_list; + __le64 *prp_list; +}; + /* * The nvme_iod describes the data in an I/O. * @@ -225,7 +231,6 @@ struct nvme_queue { struct nvme_iod { struct nvme_request req; struct nvme_command cmd; - bool use_sgl; bool aborted; s8 nr_allocations; /* PRP list pool allocations. 0 means small pool in use */ @@ -233,6 +238,7 @@ struct nvme_iod { dma_addr_t first_dma; dma_addr_t meta_dma; struct sg_table sgt; + union nvme_descriptor list[NVME_MAX_NR_ALLOCATIONS]; }; static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev) @@ -387,16 +393,6 @@ static int nvme_pci_npages_prp(void) return DIV_ROUND_UP(8 * nprps, NVME_CTRL_PAGE_SIZE - 8); } -/* - * Calculates the number of pages needed for the SGL segments. For example a 4k - * page can accommodate 256 SGL descriptors. - */ -static int nvme_pci_npages_sgl(void) -{ - return DIV_ROUND_UP(NVME_MAX_SEGS * sizeof(struct nvme_sgl_desc), - NVME_CTRL_PAGE_SIZE); -} - static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { @@ -510,16 +506,10 @@ static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx) spin_unlock(&nvmeq->sq_lock); } -static void **nvme_pci_iod_list(struct request *req) -{ - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - return (void **)(iod->sgt.sgl + blk_rq_nr_phys_segments(req)); -} - -static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) +static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req, + int nseg) { struct nvme_queue *nvmeq = req->mq_hctx->driver_data; - int nseg = blk_rq_nr_phys_segments(req); unsigned int avg_seg_size; avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), nseg); @@ -541,7 +531,7 @@ static void nvme_free_prps(struct nvme_dev *dev, struct request *req) int i; for (i = 0; i < iod->nr_allocations; i++) { - __le64 *prp_list = nvme_pci_iod_list(req)[i]; + __le64 *prp_list = iod->list[i].prp_list; dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]); dma_pool_free(dev->prp_page_pool, prp_list, dma_addr); @@ -549,22 +539,6 @@ static void nvme_free_prps(struct nvme_dev *dev, struct request *req) } } -static void nvme_free_sgls(struct nvme_dev *dev, struct request *req) -{ - const int last_sg = SGES_PER_PAGE - 1; - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - dma_addr_t dma_addr = iod->first_dma; - int i; - - for (i = 0; i < iod->nr_allocations; i++) { - struct nvme_sgl_desc *sg_list = nvme_pci_iod_list(req)[i]; - dma_addr_t next_dma_addr = le64_to_cpu((sg_list[last_sg]).addr); - - dma_pool_free(dev->prp_page_pool, sg_list, dma_addr); - dma_addr = next_dma_addr; - } -} - static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); @@ -580,10 +554,11 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) dma_unmap_sgtable(dev->dev, &iod->sgt, rq_dma_dir(req), 0); if (iod->nr_allocations == 0) - dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], + dma_pool_free(dev->prp_small_pool, iod->list[0].sg_list, + iod->first_dma); + else if (iod->nr_allocations == 1) + dma_pool_free(dev->prp_page_pool, iod->list[0].sg_list, iod->first_dma); - else if (iod->use_sgl) - nvme_free_sgls(dev, req); else nvme_free_prps(dev, req); mempool_free(iod->sgt.sgl, dev->iod_mempool); @@ -614,7 +589,6 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, u64 dma_addr = sg_dma_address(sg); int offset = dma_addr & (NVME_CTRL_PAGE_SIZE - 1); __le64 *prp_list; - void **list = nvme_pci_iod_list(req); dma_addr_t prp_dma; int nprps, i; @@ -652,7 +626,7 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, iod->nr_allocations = -1; return BLK_STS_RESOURCE; } - list[0] = prp_list; + iod->list[0].prp_list = prp_list; iod->first_dma = prp_dma; i = 0; for (;;) { @@ -661,7 +635,7 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); if (!prp_list) goto free_prps; - list[iod->nr_allocations++] = prp_list; + iod->list[iod->nr_allocations++].prp_list = prp_list; prp_list[0] = old_prp_list[i - 1]; old_prp_list[i - 1] = cpu_to_le64(prp_dma); i = 1; @@ -706,13 +680,8 @@ static void nvme_pci_sgl_set_seg(struct nvme_sgl_desc *sge, dma_addr_t dma_addr, int entries) { sge->addr = cpu_to_le64(dma_addr); - if (entries < SGES_PER_PAGE) { - sge->length = cpu_to_le32(entries * sizeof(*sge)); - sge->type = NVME_SGL_FMT_LAST_SEG_DESC << 4; - } else { - sge->length = cpu_to_le32(NVME_CTRL_PAGE_SIZE); - sge->type = NVME_SGL_FMT_SEG_DESC << 4; - } + sge->length = cpu_to_le32(entries * sizeof(*sge)); + sge->type = NVME_SGL_FMT_LAST_SEG_DESC << 4; } static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, @@ -748,34 +717,16 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, return BLK_STS_RESOURCE; } - nvme_pci_iod_list(req)[0] = sg_list; + iod->list[0].sg_list = sg_list; iod->first_dma = sgl_dma; nvme_pci_sgl_set_seg(&cmd->dptr.sgl, sgl_dma, entries); - do { - if (i == SGES_PER_PAGE) { - struct nvme_sgl_desc *old_sg_desc = sg_list; - struct nvme_sgl_desc *link = &old_sg_desc[i - 1]; - - sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma); - if (!sg_list) - goto free_sgls; - - i = 0; - nvme_pci_iod_list(req)[iod->nr_allocations++] = sg_list; - sg_list[i++] = *link; - nvme_pci_sgl_set_seg(link, sgl_dma, entries); - } - nvme_pci_sgl_set_data(&sg_list[i++], sg); sg = sg_next(sg); } while (--entries > 0); return BLK_STS_OK; -free_sgls: - nvme_free_sgls(dev, req); - return BLK_STS_RESOURCE; } static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, @@ -857,8 +808,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, goto out_free_sg; } - iod->use_sgl = nvme_pci_use_sgls(dev, req); - if (iod->use_sgl) + if (nvme_pci_use_sgls(dev, req, iod->sgt.nents)) ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw); else ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); @@ -2706,11 +2656,8 @@ static void nvme_release_prp_pools(struct nvme_dev *dev) static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev) { - size_t npages = max(nvme_pci_npages_prp(), nvme_pci_npages_sgl()); - size_t alloc_size = sizeof(__le64 *) * npages + - sizeof(struct scatterlist) * NVME_MAX_SEGS; + size_t alloc_size = sizeof(struct scatterlist) * NVME_MAX_SEGS; - WARN_ON_ONCE(alloc_size > PAGE_SIZE); dev->iod_mempool = mempool_create_node(1, mempool_kmalloc, mempool_kfree, (void *)alloc_size, GFP_KERNEL, @@ -3538,8 +3485,9 @@ static int __init nvme_init(void) BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2); - BUILD_BUG_ON(DIV_ROUND_UP(nvme_pci_npages_prp(), NVME_CTRL_PAGE_SIZE) > - S8_MAX); + BUILD_BUG_ON(NVME_MAX_SEGS > SGES_PER_PAGE); + BUILD_BUG_ON(sizeof(struct scatterlist) * NVME_MAX_SEGS > PAGE_SIZE); + BUILD_BUG_ON(nvme_pci_npages_prp() > NVME_MAX_NR_ALLOCATIONS); return pci_register_driver(&nvme_driver); } diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 8cedc1ef496c..d6100a787d39 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2282,10 +2282,13 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq) struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; struct nvme_tcp_cmd_pdu *pdu = req->pdu; + u8 opc = pdu->cmd.common.opcode, fctype = pdu->cmd.fabrics.fctype; + int qid = nvme_tcp_queue_id(req->queue); dev_warn(ctrl->device, - "queue %d: timeout request %#x type %d\n", - nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type); + "queue %d: timeout cid %#x type %d opcode %#x (%s)\n", + nvme_tcp_queue_id(req->queue), nvme_cid(rq), pdu->hdr.type, + opc, nvme_opcode_str(qid, opc, fctype)); if (ctrl->state != NVME_CTRL_LIVE) { /* diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 6a54ed6fb121..80099df37314 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -840,7 +840,7 @@ void nvmet_execute_set_features(struct nvmet_req *req) u16 nsqr; u16 ncqr; - if (!nvmet_check_transfer_len(req, 0)) + if (!nvmet_check_data_len_lte(req, 0)) return; switch (cdw10 & 0xff) { diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 871c4f32f443..2d068439b129 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -73,13 +73,6 @@ err: return ret; } -static void nvmet_file_init_bvec(struct bio_vec *bv, struct scatterlist *sg) -{ - bv->bv_page = sg_page(sg); - bv->bv_offset = sg->offset; - bv->bv_len = sg->length; -} - static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos, unsigned long nr_segs, size_t count, int ki_flags) { @@ -146,7 +139,8 @@ static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags) memset(&req->f.iocb, 0, sizeof(struct kiocb)); for_each_sg(req->sg, sg, req->sg_cnt, i) { - nvmet_file_init_bvec(&req->f.bvec[bv_cnt], sg); + bvec_set_page(&req->f.bvec[bv_cnt], sg_page(sg), sg->length, + sg->offset); len += req->f.bvec[bv_cnt].bv_len; total_len += req->f.bvec[bv_cnt].bv_len; bv_cnt++; diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index adc0958755d6..511c980d538d 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -216,11 +216,12 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w) struct nvmet_req *req = container_of(w, struct nvmet_req, p.work); struct request *rq = req->p.rq; struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl; + struct nvme_ns *ns = rq->q->queuedata; u32 effects; int status; - status = nvme_execute_passthru_rq(rq, &effects); - + effects = nvme_passthru_start(ctrl, ns, req->cmd->common.opcode); + status = nvme_execute_rq(rq, false); if (status == NVME_SC_SUCCESS && req->cmd->common.opcode == nvme_admin_identify) { switch (req->cmd->identify.cns) { diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index cc05c094de22..c5759eb503d0 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -321,9 +321,8 @@ static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd) while (length) { u32 iov_len = min_t(u32, length, sg->length - sg_offset); - iov->bv_page = sg_page(sg); - iov->bv_len = sg->length; - iov->bv_offset = sg->offset + sg_offset; + bvec_set_page(iov, sg_page(sg), sg->length, + sg->offset + sg_offset); length -= iov_len; sg = sg_next(sg); diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index 1254cf57e008..7e4292d88016 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -254,8 +254,7 @@ static unsigned long nvmet_req_nr_zones_from_slba(struct nvmet_req *req) { unsigned int sect = nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba); - return bdev_nr_zones(req->ns->bdev) - - (sect >> ilog2(bdev_zone_sectors(req->ns->bdev))); + return bdev_nr_zones(req->ns->bdev) - bdev_zone_no(req->ns->bdev, sect); } static unsigned long get_nr_zones_from_buf(struct nvmet_req *req, u32 bufsize) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 5a6d9c15395f..a9c2a8d76c45 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -3978,7 +3978,7 @@ static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device, ccw = cqr->cpaddr; ccw->cmd_code = CCW_CMD_RDC; - ccw->cda = (__u32)(addr_t) cqr->data; + ccw->cda = (__u32)virt_to_phys(cqr->data); ccw->flags = 0; ccw->count = rdc_buffer_size; cqr->startdev = device; @@ -4022,8 +4022,7 @@ char *dasd_get_sense(struct irb *irb) if (scsw_is_tm(&irb->scsw) && (irb->scsw.tm.fcxs == 0x01)) { if (irb->scsw.tm.tcw) - tsb = tcw_get_tsb((struct tcw *)(unsigned long) - irb->scsw.tm.tcw); + tsb = tcw_get_tsb(phys_to_virt(irb->scsw.tm.tcw)); if (tsb && tsb->length == 64 && tsb->flags) switch (tsb->flags & 0x07) { case 1: /* tsa_iostat */ diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c index 81d283b3cd3b..9fd36c468706 100644 --- a/drivers/s390/block/dasd_3990_erp.c +++ b/drivers/s390/block/dasd_3990_erp.c @@ -220,7 +220,7 @@ dasd_3990_erp_DCTL(struct dasd_ccw_req * erp, char modifier) memset(ccw, 0, sizeof(struct ccw1)); ccw->cmd_code = CCW_CMD_DCTL; ccw->count = 4; - ccw->cda = (__u32)(addr_t) DCTL_data; + ccw->cda = (__u32)virt_to_phys(DCTL_data); dctl_cqr->flags = erp->flags; dctl_cqr->function = dasd_3990_erp_DCTL; dctl_cqr->refers = erp; @@ -1714,7 +1714,7 @@ dasd_3990_erp_action_1B_32(struct dasd_ccw_req * default_erp, char *sense) ccw->cmd_code = DASD_ECKD_CCW_DEFINE_EXTENT; ccw->flags = CCW_FLAG_CC; ccw->count = 16; - ccw->cda = (__u32)(addr_t) DE_data; + ccw->cda = (__u32)virt_to_phys(DE_data); /* create LO ccw */ ccw++; @@ -1722,7 +1722,7 @@ dasd_3990_erp_action_1B_32(struct dasd_ccw_req * default_erp, char *sense) ccw->cmd_code = DASD_ECKD_CCW_LOCATE_RECORD; ccw->flags = CCW_FLAG_CC; ccw->count = 16; - ccw->cda = (__u32)(addr_t) LO_data; + ccw->cda = (__u32)virt_to_phys(LO_data); /* TIC to the failed ccw */ ccw++; @@ -2419,7 +2419,7 @@ static struct dasd_ccw_req *dasd_3990_erp_add_erp(struct dasd_ccw_req *cqr) tcw = erp->cpaddr; tsb = (struct tsb *) &tcw[1]; *tcw = *((struct tcw *)cqr->cpaddr); - tcw->tsb = (long)tsb; + tcw->tsb = virt_to_phys(tsb); } else if (ccw->cmd_code == DASD_ECKD_CCW_PSF) { /* PSF cannot be chained from NOOP/TIC */ erp->cpaddr = cqr->cpaddr; @@ -2430,7 +2430,7 @@ static struct dasd_ccw_req *dasd_3990_erp_add_erp(struct dasd_ccw_req *cqr) ccw->flags = CCW_FLAG_CC; ccw++; ccw->cmd_code = CCW_CMD_TIC; - ccw->cda = (long)(cqr->cpaddr); + ccw->cda = (__u32)virt_to_phys(cqr->cpaddr); } erp->flags = cqr->flags; diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index b6b938aa6615..c9740ae88d1a 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -443,7 +443,7 @@ static int read_unit_address_configuration(struct dasd_device *device, ccw->cmd_code = DASD_ECKD_CCW_PSF; ccw->count = sizeof(struct dasd_psf_prssd_data); ccw->flags |= CCW_FLAG_CC; - ccw->cda = (__u32)(addr_t) prssdp; + ccw->cda = (__u32)virt_to_phys(prssdp); /* Read Subsystem Data - feature codes */ memset(lcu->uac, 0, sizeof(*(lcu->uac))); @@ -451,7 +451,7 @@ static int read_unit_address_configuration(struct dasd_device *device, ccw++; ccw->cmd_code = DASD_ECKD_CCW_RSSD; ccw->count = sizeof(*(lcu->uac)); - ccw->cda = (__u32)(addr_t) lcu->uac; + ccw->cda = (__u32)virt_to_phys(lcu->uac); cqr->buildclk = get_tod_clock(); cqr->status = DASD_CQR_FILLED; @@ -747,7 +747,7 @@ static int reset_summary_unit_check(struct alias_lcu *lcu, ccw->cmd_code = DASD_ECKD_CCW_RSCK; ccw->flags = CCW_FLAG_SLI; ccw->count = 16; - ccw->cda = (__u32)(addr_t) cqr->data; + ccw->cda = (__u32)virt_to_phys(cqr->data); ((char *)cqr->data)[0] = reason; clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 5d0b9991e91a..1a69f97e88fb 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -288,7 +288,7 @@ define_extent(struct ccw1 *ccw, struct DE_eckd_data *data, unsigned int trk, ccw->cmd_code = DASD_ECKD_CCW_DEFINE_EXTENT; ccw->flags = 0; ccw->count = 16; - ccw->cda = (__u32)__pa(data); + ccw->cda = (__u32)virt_to_phys(data); } memset(data, 0, sizeof(struct DE_eckd_data)); @@ -398,7 +398,7 @@ static void locate_record_ext(struct ccw1 *ccw, struct LRE_eckd_data *data, ccw->count = 22; else ccw->count = 20; - ccw->cda = (__u32)__pa(data); + ccw->cda = (__u32)virt_to_phys(data); } memset(data, 0, sizeof(*data)); @@ -544,11 +544,11 @@ static int prefix_LRE(struct ccw1 *ccw, struct PFX_eckd_data *pfxdata, ccw->flags = 0; if (cmd == DASD_ECKD_CCW_WRITE_FULL_TRACK) { ccw->count = sizeof(*pfxdata) + 2; - ccw->cda = (__u32) __pa(pfxdata); + ccw->cda = (__u32)virt_to_phys(pfxdata); memset(pfxdata, 0, sizeof(*pfxdata) + 2); } else { ccw->count = sizeof(*pfxdata); - ccw->cda = (__u32) __pa(pfxdata); + ccw->cda = (__u32)virt_to_phys(pfxdata); memset(pfxdata, 0, sizeof(*pfxdata)); } @@ -615,7 +615,7 @@ locate_record(struct ccw1 *ccw, struct LO_eckd_data *data, unsigned int trk, ccw->cmd_code = DASD_ECKD_CCW_LOCATE_RECORD; ccw->flags = 0; ccw->count = 16; - ccw->cda = (__u32) __pa(data); + ccw->cda = (__u32)virt_to_phys(data); memset(data, 0, sizeof(struct LO_eckd_data)); sector = 0; @@ -830,7 +830,7 @@ static void dasd_eckd_fill_rcd_cqr(struct dasd_device *device, ccw = cqr->cpaddr; ccw->cmd_code = DASD_ECKD_CCW_RCD; ccw->flags = 0; - ccw->cda = (__u32)(addr_t)rcd_buffer; + ccw->cda = (__u32)virt_to_phys(rcd_buffer); ccw->count = DASD_ECKD_RCD_DATA_SIZE; cqr->magic = DASD_ECKD_MAGIC; @@ -858,7 +858,7 @@ static void read_conf_cb(struct dasd_ccw_req *cqr, void *data) if (cqr->status != DASD_CQR_DONE) { ccw = cqr->cpaddr; - rcd_buffer = (__u8 *)((addr_t) ccw->cda); + rcd_buffer = phys_to_virt(ccw->cda); memset(rcd_buffer, 0, sizeof(*rcd_buffer)); rcd_buffer[0] = 0xE5; @@ -1547,7 +1547,7 @@ static int dasd_eckd_read_features(struct dasd_device *device) ccw->cmd_code = DASD_ECKD_CCW_PSF; ccw->count = sizeof(struct dasd_psf_prssd_data); ccw->flags |= CCW_FLAG_CC; - ccw->cda = (__u32)(addr_t) prssdp; + ccw->cda = (__u32)virt_to_phys(prssdp); /* Read Subsystem Data - feature codes */ features = (struct dasd_rssd_features *) (prssdp + 1); @@ -1556,7 +1556,7 @@ static int dasd_eckd_read_features(struct dasd_device *device) ccw++; ccw->cmd_code = DASD_ECKD_CCW_RSSD; ccw->count = sizeof(struct dasd_rssd_features); - ccw->cda = (__u32)(addr_t) features; + ccw->cda = (__u32)virt_to_phys(features); cqr->buildclk = get_tod_clock(); cqr->status = DASD_CQR_FILLED; @@ -1616,7 +1616,7 @@ static int dasd_eckd_read_vol_info(struct dasd_device *device) ccw->cmd_code = DASD_ECKD_CCW_PSF; ccw->count = sizeof(*prssdp); ccw->flags |= CCW_FLAG_CC; - ccw->cda = (__u32)(addr_t)prssdp; + ccw->cda = (__u32)virt_to_phys(prssdp); /* Read Subsystem Data - Volume Storage Query */ vsq = (struct dasd_rssd_vsq *)(prssdp + 1); @@ -1626,7 +1626,7 @@ static int dasd_eckd_read_vol_info(struct dasd_device *device) ccw->cmd_code = DASD_ECKD_CCW_RSSD; ccw->count = sizeof(*vsq); ccw->flags |= CCW_FLAG_SLI; - ccw->cda = (__u32)(addr_t)vsq; + ccw->cda = (__u32)virt_to_phys(vsq); cqr->buildclk = get_tod_clock(); cqr->status = DASD_CQR_FILLED; @@ -1801,7 +1801,7 @@ static int dasd_eckd_read_ext_pool_info(struct dasd_device *device) ccw->cmd_code = DASD_ECKD_CCW_PSF; ccw->count = sizeof(*prssdp); ccw->flags |= CCW_FLAG_CC; - ccw->cda = (__u32)(addr_t)prssdp; + ccw->cda = (__u32)virt_to_phys(prssdp); lcq = (struct dasd_rssd_lcq *)(prssdp + 1); memset(lcq, 0, sizeof(*lcq)); @@ -1810,7 +1810,7 @@ static int dasd_eckd_read_ext_pool_info(struct dasd_device *device) ccw->cmd_code = DASD_ECKD_CCW_RSSD; ccw->count = sizeof(*lcq); ccw->flags |= CCW_FLAG_SLI; - ccw->cda = (__u32)(addr_t)lcq; + ccw->cda = (__u32)virt_to_phys(lcq); cqr->buildclk = get_tod_clock(); cqr->status = DASD_CQR_FILLED; @@ -1907,7 +1907,7 @@ static struct dasd_ccw_req *dasd_eckd_build_psf_ssc(struct dasd_device *device, } ccw = cqr->cpaddr; ccw->cmd_code = DASD_ECKD_CCW_PSF; - ccw->cda = (__u32)(addr_t)psf_ssc_data; + ccw->cda = (__u32)virt_to_phys(psf_ssc_data); ccw->count = 66; cqr->startdev = device; @@ -2262,7 +2262,7 @@ dasd_eckd_analysis_ccw(struct dasd_device *device) ccw->cmd_code = DASD_ECKD_CCW_READ_COUNT; ccw->flags = 0; ccw->count = 8; - ccw->cda = (__u32)(addr_t) count_data; + ccw->cda = (__u32)virt_to_phys(count_data); ccw++; count_data++; } @@ -2276,7 +2276,7 @@ dasd_eckd_analysis_ccw(struct dasd_device *device) ccw->cmd_code = DASD_ECKD_CCW_READ_COUNT; ccw->flags = 0; ccw->count = 8; - ccw->cda = (__u32)(addr_t) count_data; + ccw->cda = (__u32)virt_to_phys(count_data); cqr->block = NULL; cqr->startdev = device; @@ -2647,7 +2647,7 @@ dasd_eckd_build_check(struct dasd_device *base, struct format_data_t *fdata, ccw->cmd_code = DASD_ECKD_CCW_READ_COUNT; ccw->flags = CCW_FLAG_SLI; ccw->count = 8; - ccw->cda = (__u32)(addr_t) fmt_buffer; + ccw->cda = (__u32)virt_to_phys(fmt_buffer); ccw++; fmt_buffer++; } @@ -2857,7 +2857,7 @@ dasd_eckd_build_format(struct dasd_device *base, struct dasd_device *startdev, ccw->cmd_code = DASD_ECKD_CCW_WRITE_RECORD_ZERO; ccw->flags = CCW_FLAG_SLI; ccw->count = 8; - ccw->cda = (__u32)(addr_t) ect; + ccw->cda = (__u32)virt_to_phys(ect); ccw++; } if ((intensity & ~0x08) & 0x04) { /* erase track */ @@ -2872,7 +2872,7 @@ dasd_eckd_build_format(struct dasd_device *base, struct dasd_device *startdev, ccw->cmd_code = DASD_ECKD_CCW_WRITE_CKD; ccw->flags = CCW_FLAG_SLI; ccw->count = 8; - ccw->cda = (__u32)(addr_t) ect; + ccw->cda = (__u32)virt_to_phys(ect); } else { /* write remaining records */ for (i = 0; i < rpt; i++) { ect = (struct eckd_count *) data; @@ -2907,7 +2907,7 @@ dasd_eckd_build_format(struct dasd_device *base, struct dasd_device *startdev, DASD_ECKD_CCW_WRITE_CKD_MT; ccw->flags = CCW_FLAG_SLI; ccw->count = 8; - ccw->cda = (__u32)(addr_t) ect; + ccw->cda = (__u32)virt_to_phys(ect); ccw++; } } @@ -3821,7 +3821,7 @@ dasd_eckd_dso_ras(struct dasd_device *device, struct dasd_block *block, } ccw = cqr->cpaddr; - ccw->cda = (__u32)(addr_t)cqr->data; + ccw->cda = (__u32)virt_to_phys(cqr->data); ccw->cmd_code = DASD_ECKD_CCW_DSO; ccw->count = size; @@ -4090,11 +4090,11 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single( ccw->cmd_code = rcmd; ccw->count = count; if (idal_is_needed(dst, blksize)) { - ccw->cda = (__u32)(addr_t) idaws; + ccw->cda = (__u32)virt_to_phys(idaws); ccw->flags = CCW_FLAG_IDA; idaws = idal_create_words(idaws, dst, blksize); } else { - ccw->cda = (__u32)(addr_t) dst; + ccw->cda = (__u32)virt_to_phys(dst); ccw->flags = 0; } ccw++; @@ -4228,7 +4228,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track( ccw[-1].flags |= CCW_FLAG_CC; ccw->cmd_code = cmd; ccw->count = len_to_track_end; - ccw->cda = (__u32)(addr_t)idaws; + ccw->cda = (__u32)virt_to_phys(idaws); ccw->flags = CCW_FLAG_IDA; ccw++; recid += count; @@ -4244,7 +4244,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track( * idaw ends */ if (!idaw_dst) { - if (__pa(dst) & (IDA_BLOCK_SIZE-1)) { + if ((__u32)virt_to_phys(dst) & (IDA_BLOCK_SIZE - 1)) { dasd_sfree_request(cqr, startdev); return ERR_PTR(-ERANGE); } else @@ -4264,7 +4264,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track( * idal_create_words will handle cases where idaw_len * is larger then IDA_BLOCK_SIZE */ - if (!(__pa(idaw_dst + idaw_len) & (IDA_BLOCK_SIZE-1))) + if (!((__u32)virt_to_phys(idaw_dst + idaw_len) & (IDA_BLOCK_SIZE - 1))) end_idaw = 1; /* We also need to end the idaw at track end */ if (!len_to_track_end) { @@ -4817,7 +4817,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev, ccw->count = 57326; /* 64k map to one track */ len_to_track_end = 65536 - start_padding_sectors * 512; - ccw->cda = (__u32)(addr_t)idaws; + ccw->cda = (__u32)virt_to_phys(idaws); ccw->flags |= CCW_FLAG_IDA; ccw->flags |= CCW_FLAG_SLI; ccw++; @@ -4836,7 +4836,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev, ccw->count = 57326; /* 64k map to one track */ len_to_track_end = 65536; - ccw->cda = (__u32)(addr_t)idaws; + ccw->cda = (__u32)virt_to_phys(idaws); ccw->flags |= CCW_FLAG_IDA; ccw->flags |= CCW_FLAG_SLI; ccw++; @@ -4893,9 +4893,9 @@ dasd_eckd_free_cp(struct dasd_ccw_req *cqr, struct request *req) ccw++; if (dst) { if (ccw->flags & CCW_FLAG_IDA) - cda = *((char **)((addr_t) ccw->cda)); + cda = *((char **)phys_to_virt(ccw->cda)); else - cda = (char *)((addr_t) ccw->cda); + cda = phys_to_virt(ccw->cda); if (dst != cda) { if (rq_data_dir(req) == READ) memcpy(dst, cda, bv.bv_len); @@ -5045,7 +5045,7 @@ dasd_eckd_release(struct dasd_device *device) ccw->cmd_code = DASD_ECKD_CCW_RELEASE; ccw->flags |= CCW_FLAG_SLI; ccw->count = 32; - ccw->cda = (__u32)(addr_t) cqr->data; + ccw->cda = (__u32)virt_to_phys(cqr->data); cqr->startdev = device; cqr->memdev = device; clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); @@ -5100,7 +5100,7 @@ dasd_eckd_reserve(struct dasd_device *device) ccw->cmd_code = DASD_ECKD_CCW_RESERVE; ccw->flags |= CCW_FLAG_SLI; ccw->count = 32; - ccw->cda = (__u32)(addr_t) cqr->data; + ccw->cda = (__u32)virt_to_phys(cqr->data); cqr->startdev = device; cqr->memdev = device; clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); @@ -5154,7 +5154,7 @@ dasd_eckd_steal_lock(struct dasd_device *device) ccw->cmd_code = DASD_ECKD_CCW_SLCK; ccw->flags |= CCW_FLAG_SLI; ccw->count = 32; - ccw->cda = (__u32)(addr_t) cqr->data; + ccw->cda = (__u32)virt_to_phys(cqr->data); cqr->startdev = device; cqr->memdev = device; clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); @@ -5215,7 +5215,7 @@ static int dasd_eckd_snid(struct dasd_device *device, ccw->cmd_code = DASD_ECKD_CCW_SNID; ccw->flags |= CCW_FLAG_SLI; ccw->count = 12; - ccw->cda = (__u32)(addr_t) cqr->data; + ccw->cda = (__u32)virt_to_phys(cqr->data); cqr->startdev = device; cqr->memdev = device; clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); @@ -5282,7 +5282,7 @@ dasd_eckd_performance(struct dasd_device *device, void __user *argp) ccw->cmd_code = DASD_ECKD_CCW_PSF; ccw->count = sizeof(struct dasd_psf_prssd_data); ccw->flags |= CCW_FLAG_CC; - ccw->cda = (__u32)(addr_t) prssdp; + ccw->cda = (__u32)virt_to_phys(prssdp); /* Read Subsystem Data - Performance Statistics */ stats = (struct dasd_rssd_perf_stats_t *) (prssdp + 1); @@ -5291,7 +5291,7 @@ dasd_eckd_performance(struct dasd_device *device, void __user *argp) ccw++; ccw->cmd_code = DASD_ECKD_CCW_RSSD; ccw->count = sizeof(struct dasd_rssd_perf_stats_t); - ccw->cda = (__u32)(addr_t) stats; + ccw->cda = (__u32)virt_to_phys(stats); cqr->buildclk = get_tod_clock(); cqr->status = DASD_CQR_FILLED; @@ -5435,7 +5435,7 @@ static int dasd_symm_io(struct dasd_device *device, void __user *argp) ccw->cmd_code = DASD_ECKD_CCW_PSF; ccw->count = usrparm.psf_data_len; ccw->flags |= CCW_FLAG_CC; - ccw->cda = (__u32)(addr_t) psf_data; + ccw->cda = (__u32)virt_to_phys(psf_data); ccw++; @@ -5443,7 +5443,7 @@ static int dasd_symm_io(struct dasd_device *device, void __user *argp) ccw->cmd_code = DASD_ECKD_CCW_RSSD; ccw->count = usrparm.rssd_result_len; ccw->flags = CCW_FLAG_SLI ; - ccw->cda = (__u32)(addr_t) rssd_result; + ccw->cda = (__u32)virt_to_phys(rssd_result); rc = dasd_sleep_on(cqr); if (rc) @@ -5512,9 +5512,9 @@ dasd_eckd_dump_ccw_range(struct ccw1 *from, struct ccw1 *to, char *page) /* get pointer to data (consider IDALs) */ if (from->flags & CCW_FLAG_IDA) - datap = (char *) *((addr_t *) (addr_t) from->cda); + datap = (char *)*((addr_t *)phys_to_virt(from->cda)); else - datap = (char *) ((addr_t) from->cda); + datap = phys_to_virt(from->cda); /* dump data (max 128 bytes) */ for (count = 0; count < from->count && count < 128; count++) { @@ -5585,7 +5585,7 @@ static void dasd_eckd_dump_sense_ccw(struct dasd_device *device, len += sprintf(page + len, PRINTK_HEADER " device %s: Failing CCW: %p\n", dev_name(&device->cdev->dev), - (void *) (addr_t) irb->scsw.cmd.cpa); + phys_to_virt(irb->scsw.cmd.cpa)); if (irb->esw.esw0.erw.cons) { for (sl = 0; sl < 4; sl++) { len += sprintf(page + len, PRINTK_HEADER @@ -5632,8 +5632,7 @@ static void dasd_eckd_dump_sense_ccw(struct dasd_device *device, /* print failing CCW area (maximum 4) */ /* scsw->cda is either valid or zero */ from = ++to; - fail = (struct ccw1 *)(addr_t) - irb->scsw.cmd.cpa; /* failing CCW */ + fail = phys_to_virt(irb->scsw.cmd.cpa); /* failing CCW */ if (from < fail - 2) { from = fail - 2; /* there is a gap - print header */ printk(KERN_ERR PRINTK_HEADER "......\n"); @@ -5687,13 +5686,12 @@ static void dasd_eckd_dump_sense_tcw(struct dasd_device *device, len += sprintf(page + len, PRINTK_HEADER " device %s: Failing TCW: %p\n", dev_name(&device->cdev->dev), - (void *) (addr_t) irb->scsw.tm.tcw); + phys_to_virt(irb->scsw.tm.tcw)); tsb = NULL; sense = NULL; if (irb->scsw.tm.tcw && (irb->scsw.tm.fcxs & 0x01)) - tsb = tcw_get_tsb( - (struct tcw *)(unsigned long)irb->scsw.tm.tcw); + tsb = tcw_get_tsb(phys_to_virt(irb->scsw.tm.tcw)); if (tsb) { len += sprintf(page + len, PRINTK_HEADER @@ -5917,7 +5915,7 @@ retry: ccw->count = sizeof(struct dasd_psf_prssd_data); ccw->flags |= CCW_FLAG_CC; ccw->flags |= CCW_FLAG_SLI; - ccw->cda = (__u32)(addr_t) prssdp; + ccw->cda = (__u32)virt_to_phys(prssdp); /* Read Subsystem Data - message buffer */ message_buf = (struct dasd_rssd_messages *) (prssdp + 1); @@ -5927,7 +5925,7 @@ retry: ccw->cmd_code = DASD_ECKD_CCW_RSSD; ccw->count = sizeof(struct dasd_rssd_messages); ccw->flags |= CCW_FLAG_SLI; - ccw->cda = (__u32)(addr_t) message_buf; + ccw->cda = (__u32)virt_to_phys(message_buf); cqr->buildclk = get_tod_clock(); cqr->status = DASD_CQR_FILLED; @@ -6008,14 +6006,14 @@ static int dasd_eckd_query_host_access(struct dasd_device *device, ccw->count = sizeof(struct dasd_psf_prssd_data); ccw->flags |= CCW_FLAG_CC; ccw->flags |= CCW_FLAG_SLI; - ccw->cda = (__u32)(addr_t) prssdp; + ccw->cda = (__u32)virt_to_phys(prssdp); /* Read Subsystem Data - query host access */ ccw++; ccw->cmd_code = DASD_ECKD_CCW_RSSD; ccw->count = sizeof(struct dasd_psf_query_host_access); ccw->flags |= CCW_FLAG_SLI; - ccw->cda = (__u32)(addr_t) host_access; + ccw->cda = (__u32)virt_to_phys(host_access); cqr->buildclk = get_tod_clock(); cqr->status = DASD_CQR_FILLED; @@ -6351,7 +6349,7 @@ dasd_eckd_psf_cuir_response(struct dasd_device *device, int response, psf_cuir->ssid = device->path[pos].ssid; ccw = cqr->cpaddr; ccw->cmd_code = DASD_ECKD_CCW_PSF; - ccw->cda = (__u32)(addr_t)psf_cuir; + ccw->cda = (__u32)virt_to_phys(psf_cuir); ccw->flags = CCW_FLAG_SLI; ccw->count = sizeof(struct dasd_psf_cuir_response); @@ -6956,8 +6954,10 @@ dasd_eckd_init(void) return -ENOMEM; dasd_vol_info_req = kmalloc(sizeof(*dasd_vol_info_req), GFP_KERNEL | GFP_DMA); - if (!dasd_vol_info_req) + if (!dasd_vol_info_req) { + kfree(dasd_reserve_req); return -ENOMEM; + } pe_handler_worker = kmalloc(sizeof(*pe_handler_worker), GFP_KERNEL | GFP_DMA); if (!pe_handler_worker) { diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c index d4d31cd11d26..a4cc772208a6 100644 --- a/drivers/s390/block/dasd_eer.c +++ b/drivers/s390/block/dasd_eer.c @@ -491,7 +491,7 @@ int dasd_eer_enable(struct dasd_device *device) ccw->cmd_code = DASD_ECKD_CCW_SNSS; ccw->count = SNSS_DATA_SIZE; ccw->flags = 0; - ccw->cda = (__u32)(addr_t) cqr->data; + ccw->cda = (__u32)virt_to_phys(cqr->data); cqr->buildclk = get_tod_clock(); cqr->status = DASD_CQR_FILLED; diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index cddfb01a3dca..bcb67fa747a7 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -83,7 +83,7 @@ define_extent(struct ccw1 * ccw, struct DE_fba_data *data, int rw, ccw->cmd_code = DASD_FBA_CCW_DEFINE_EXTENT; ccw->flags = 0; ccw->count = 16; - ccw->cda = (__u32) __pa(data); + ccw->cda = (__u32)virt_to_phys(data); memset(data, 0, sizeof (struct DE_fba_data)); if (rw == WRITE) (data->mask).perm = 0x0; @@ -103,7 +103,7 @@ locate_record(struct ccw1 * ccw, struct LO_fba_data *data, int rw, ccw->cmd_code = DASD_FBA_CCW_LOCATE; ccw->flags = 0; ccw->count = 8; - ccw->cda = (__u32) __pa(data); + ccw->cda = (__u32)virt_to_phys(data); memset(data, 0, sizeof (struct LO_fba_data)); if (rw == WRITE) data->operation.cmd = 0x5; @@ -262,7 +262,7 @@ static void ccw_write_zero(struct ccw1 *ccw, int count) ccw->cmd_code = DASD_FBA_CCW_WRITE; ccw->flags |= CCW_FLAG_SLI; ccw->count = count; - ccw->cda = (__u32) (addr_t) dasd_fba_zero_page; + ccw->cda = (__u32)virt_to_phys(dasd_fba_zero_page); } /* @@ -528,11 +528,11 @@ static struct dasd_ccw_req *dasd_fba_build_cp_regular( ccw->cmd_code = cmd; ccw->count = block->bp_block; if (idal_is_needed(dst, blksize)) { - ccw->cda = (__u32)(addr_t) idaws; + ccw->cda = (__u32)virt_to_phys(idaws); ccw->flags = CCW_FLAG_IDA; idaws = idal_create_words(idaws, dst, blksize); } else { - ccw->cda = (__u32)(addr_t) dst; + ccw->cda = (__u32)virt_to_phys(dst); ccw->flags = 0; } ccw++; @@ -590,9 +590,9 @@ dasd_fba_free_cp(struct dasd_ccw_req *cqr, struct request *req) ccw++; if (dst) { if (ccw->flags & CCW_FLAG_IDA) - cda = *((char **)((addr_t) ccw->cda)); + cda = *((char **)phys_to_virt(ccw->cda)); else - cda = (char *)((addr_t) ccw->cda); + cda = phys_to_virt(ccw->cda); if (dst != cda) { if (rq_data_dir(req) == READ) memcpy(dst, cda, bv.bv_len); diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index c0f85ffb2b62..c09f2e053bf8 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -864,10 +864,6 @@ dcssblk_submit_bio(struct bio *bio) unsigned long source_addr; unsigned long bytes_done; - bio = bio_split_to_limits(bio); - if (!bio) - return; - bytes_done = 0; dev_info = bio->bi_bdev->bd_disk->private_data; if (dev_info == NULL) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 47dafe6b8a66..277960decc10 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -831,6 +831,19 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) blk_queue_max_discard_sectors(q, max_blocks * (logical_block_size >> 9)); } +static void *sd_set_special_bvec(struct request *rq, unsigned int data_len) +{ + struct page *page; + + page = mempool_alloc(sd_page_pool, GFP_ATOMIC); + if (!page) + return NULL; + clear_highpage(page); + bvec_set_page(&rq->special_vec, page, data_len, 0); + rq->rq_flags |= RQF_SPECIAL_PAYLOAD; + return bvec_virt(&rq->special_vec); +} + static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd) { struct scsi_device *sdp = cmd->device; @@ -841,19 +854,14 @@ static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd) unsigned int data_len = 24; char *buf; - rq->special_vec.bv_page = mempool_alloc(sd_page_pool, GFP_ATOMIC); - if (!rq->special_vec.bv_page) + buf = sd_set_special_bvec(rq, data_len); + if (!buf) return BLK_STS_RESOURCE; - clear_highpage(rq->special_vec.bv_page); - rq->special_vec.bv_offset = 0; - rq->special_vec.bv_len = data_len; - rq->rq_flags |= RQF_SPECIAL_PAYLOAD; cmd->cmd_len = 10; cmd->cmnd[0] = UNMAP; cmd->cmnd[8] = 24; - buf = bvec_virt(&rq->special_vec); put_unaligned_be16(6 + 16, &buf[0]); put_unaligned_be16(16, &buf[2]); put_unaligned_be64(lba, &buf[8]); @@ -876,13 +884,8 @@ static blk_status_t sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq)); u32 data_len = sdp->sector_size; - rq->special_vec.bv_page = mempool_alloc(sd_page_pool, GFP_ATOMIC); - if (!rq->special_vec.bv_page) + if (!sd_set_special_bvec(rq, data_len)) return BLK_STS_RESOURCE; - clear_highpage(rq->special_vec.bv_page); - rq->special_vec.bv_offset = 0; - rq->special_vec.bv_len = data_len; - rq->rq_flags |= RQF_SPECIAL_PAYLOAD; cmd->cmd_len = 16; cmd->cmnd[0] = WRITE_SAME_16; @@ -908,13 +911,8 @@ static blk_status_t sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq)); u32 data_len = sdp->sector_size; - rq->special_vec.bv_page = mempool_alloc(sd_page_pool, GFP_ATOMIC); - if (!rq->special_vec.bv_page) + if (!sd_set_special_bvec(rq, data_len)) return BLK_STS_RESOURCE; - clear_highpage(rq->special_vec.bv_page); - rq->special_vec.bv_offset = 0; - rq->special_vec.bv_len = data_len; - rq->rq_flags |= RQF_SPECIAL_PAYLOAD; cmd->cmd_len = 10; cmd->cmnd[0] = WRITE_SAME; diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index fd584111da45..ce0e000b74fc 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -281,10 +281,8 @@ fd_execute_rw_aio(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; for_each_sg(sgl, sg, sgl_nents, i) { - aio_cmd->bvecs[i].bv_page = sg_page(sg); - aio_cmd->bvecs[i].bv_len = sg->length; - aio_cmd->bvecs[i].bv_offset = sg->offset; - + bvec_set_page(&aio_cmd->bvecs[i], sg_page(sg), sg->length, + sg->offset); len += sg->length; } @@ -329,10 +327,7 @@ static int fd_do_rw(struct se_cmd *cmd, struct file *fd, } for_each_sg(sgl, sg, sgl_nents, i) { - bvec[i].bv_page = sg_page(sg); - bvec[i].bv_len = sg->length; - bvec[i].bv_offset = sg->offset; - + bvec_set_page(&bvec[i], sg_page(sg), sg->length, sg->offset); len += sg->length; } @@ -465,10 +460,9 @@ fd_execute_write_same(struct se_cmd *cmd) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; for (i = 0; i < nolb; i++) { - bvec[i].bv_page = sg_page(&cmd->t_data_sg[0]); - bvec[i].bv_len = cmd->t_data_sg[0].length; - bvec[i].bv_offset = cmd->t_data_sg[0].offset; - + bvec_set_page(&bvec[i], sg_page(&cmd->t_data_sg[0]), + cmd->t_data_sg[0].length, + cmd->t_data_sg[0].offset); len += se_dev->dev_attrib.block_size; } diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index 33eb941fcf15..a1e27da54481 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -1126,9 +1126,8 @@ static int iotlb_translate(const struct vringh *vrh, size = map->size - addr + map->start; pa = map->addr + addr - map->start; pfn = pa >> PAGE_SHIFT; - iov[ret].bv_page = pfn_to_page(pfn); - iov[ret].bv_len = min(len - s, size); - iov[ret].bv_offset = pa & (PAGE_SIZE - 1); + bvec_set_page(&iov[ret], pfn_to_page(pfn), min(len - s, size), + pa & (PAGE_SIZE - 1)); s += size; addr += size; ++ret; |