summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/bdev.c12
-rw-r--r--block/blk-cgroup.c9
-rw-r--r--block/blk-core.c5
-rw-r--r--block/blk-flush.c12
-rw-r--r--block/blk-mq.c36
-rw-r--r--block/blk-mq.h2
-rw-r--r--block/blk-sysfs.c10
-rw-r--r--block/blk.h2
-rw-r--r--block/elevator.c10
-rw-r--r--block/fops.c4
-rw-r--r--block/genhd.c2
-rw-r--r--block/ioprio.c12
12 files changed, 74 insertions, 42 deletions
diff --git a/block/bdev.c b/block/bdev.c
index b4dab2fb6a74..b1d087e5e205 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -753,8 +753,7 @@ struct block_device *blkdev_get_no_open(dev_t dev)
if (!bdev)
return NULL;
- if ((bdev->bd_disk->flags & GENHD_FL_HIDDEN) ||
- !try_module_get(bdev->bd_disk->fops->owner)) {
+ if ((bdev->bd_disk->flags & GENHD_FL_HIDDEN)) {
put_device(&bdev->bd_device);
return NULL;
}
@@ -764,7 +763,6 @@ struct block_device *blkdev_get_no_open(dev_t dev)
void blkdev_put_no_open(struct block_device *bdev)
{
- module_put(bdev->bd_disk->fops->owner);
put_device(&bdev->bd_device);
}
@@ -820,12 +818,14 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
ret = -ENXIO;
if (!disk_live(disk))
goto abort_claiming;
+ if (!try_module_get(disk->fops->owner))
+ goto abort_claiming;
if (bdev_is_partition(bdev))
ret = blkdev_get_part(bdev, mode);
else
ret = blkdev_get_whole(bdev, mode);
if (ret)
- goto abort_claiming;
+ goto put_module;
if (mode & FMODE_EXCL) {
bd_finish_claiming(bdev, holder);
@@ -847,7 +847,8 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
if (unblock_events)
disk_unblock_events(disk);
return bdev;
-
+put_module:
+ module_put(disk->fops->owner);
abort_claiming:
if (mode & FMODE_EXCL)
bd_abort_claiming(bdev, holder);
@@ -956,6 +957,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
blkdev_put_whole(bdev, mode);
mutex_unlock(&disk->open_mutex);
+ module_put(disk->fops->owner);
blkdev_put_no_open(bdev);
}
EXPORT_SYMBOL(blkdev_put);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 88b1fce90520..663aabfeba18 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -640,7 +640,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
*/
ret = blk_queue_enter(q, 0);
if (ret)
- return ret;
+ goto fail;
rcu_read_lock();
spin_lock_irq(&q->queue_lock);
@@ -676,13 +676,13 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
new_blkg = blkg_alloc(pos, q, GFP_KERNEL);
if (unlikely(!new_blkg)) {
ret = -ENOMEM;
- goto fail;
+ goto fail_exit_queue;
}
if (radix_tree_preload(GFP_KERNEL)) {
blkg_free(new_blkg);
ret = -ENOMEM;
- goto fail;
+ goto fail_exit_queue;
}
rcu_read_lock();
@@ -722,9 +722,10 @@ fail_preloaded:
fail_unlock:
spin_unlock_irq(&q->queue_lock);
rcu_read_unlock();
+fail_exit_queue:
+ blk_queue_exit(q);
fail:
blkdev_put_no_open(bdev);
- blk_queue_exit(q);
/*
* If queue was bypassing, we should retry. Do so after a
* short msleep(). It isn't strictly necessary but queue
diff --git a/block/blk-core.c b/block/blk-core.c
index 9ee32f85d74e..1378d084c770 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -363,8 +363,10 @@ void blk_cleanup_queue(struct request_queue *q)
blk_queue_flag_set(QUEUE_FLAG_DEAD, q);
blk_sync_queue(q);
- if (queue_is_mq(q))
+ if (queue_is_mq(q)) {
+ blk_mq_cancel_work_sync(q);
blk_mq_exit_queue(q);
+ }
/*
* In theory, request pool of sched_tags belongs to request queue.
@@ -1015,6 +1017,7 @@ EXPORT_SYMBOL(submit_bio);
/**
* bio_poll - poll for BIO completions
* @bio: bio to poll for
+ * @iob: batches of IO
* @flags: BLK_POLL_* flags that control the behavior
*
* Poll for completions on queue associated with the bio. Returns number of
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 8e364bda5166..1fce6d16e6d3 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -379,7 +379,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
* @rq is being submitted. Analyze what needs to be done and put it on the
* right queue.
*/
-bool blk_insert_flush(struct request *rq)
+void blk_insert_flush(struct request *rq)
{
struct request_queue *q = rq->q;
unsigned long fflags = q->queue_flags; /* may change, cache */
@@ -409,7 +409,7 @@ bool blk_insert_flush(struct request *rq)
*/
if (!policy) {
blk_mq_end_request(rq, 0);
- return true;
+ return;
}
BUG_ON(rq->bio != rq->biotail); /*assumes zero or single bio rq */
@@ -420,8 +420,10 @@ bool blk_insert_flush(struct request *rq)
* for normal execution.
*/
if ((policy & REQ_FSEQ_DATA) &&
- !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH)))
- return false;
+ !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
+ blk_mq_request_bypass_insert(rq, false, true);
+ return;
+ }
/*
* @rq should go through flush machinery. Mark it part of flush
@@ -437,8 +439,6 @@ bool blk_insert_flush(struct request *rq)
spin_lock_irq(&fq->mq_flush_lock);
blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0);
spin_unlock_irq(&fq->mq_flush_lock);
-
- return true;
}
/**
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3ab34c4f20da..8874a63ae952 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -860,13 +860,14 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob)
if (iob->need_ts)
__blk_mq_end_request_acct(rq, now);
+ rq_qos_done(rq->q, rq);
+
WRITE_ONCE(rq->state, MQ_RQ_IDLE);
if (!refcount_dec_and_test(&rq->ref))
continue;
blk_crypto_free_request(rq);
blk_pm_mark_last_busy(rq);
- rq_qos_done(rq->q, rq);
if (nr_tags == TAG_COMP_BATCH || cur_hctx != rq->mq_hctx) {
if (cur_hctx)
@@ -2543,8 +2544,7 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
return NULL;
}
-static inline bool blk_mq_can_use_cached_rq(struct request *rq,
- struct bio *bio)
+static inline bool blk_mq_can_use_cached_rq(struct request *rq, struct bio *bio)
{
if (blk_mq_get_hctx_type(bio->bi_opf) != rq->mq_hctx->type)
return false;
@@ -2565,7 +2565,6 @@ static inline struct request *blk_mq_get_request(struct request_queue *q,
bool checked = false;
if (plug) {
-
rq = rq_list_peek(&plug->cached_rq);
if (rq && rq->q == q) {
if (unlikely(!submit_bio_checks(bio)))
@@ -2587,12 +2586,14 @@ static inline struct request *blk_mq_get_request(struct request_queue *q,
fallback:
if (unlikely(bio_queue_enter(bio)))
return NULL;
- if (!checked && !submit_bio_checks(bio))
- return NULL;
+ if (unlikely(!checked && !submit_bio_checks(bio)))
+ goto out_put;
rq = blk_mq_get_new_requests(q, plug, bio, nsegs, same_queue_rq);
- if (!rq)
- blk_queue_exit(q);
- return rq;
+ if (rq)
+ return rq;
+out_put:
+ blk_queue_exit(q);
+ return NULL;
}
/**
@@ -2647,8 +2648,10 @@ void blk_mq_submit_bio(struct bio *bio)
return;
}
- if (op_is_flush(bio->bi_opf) && blk_insert_flush(rq))
+ if (op_is_flush(bio->bi_opf)) {
+ blk_insert_flush(rq);
return;
+ }
if (plug && (q->nr_hw_queues == 1 ||
blk_mq_is_shared_tags(rq->mq_hctx->flags) ||
@@ -4417,6 +4420,19 @@ unsigned int blk_mq_rq_cpu(struct request *rq)
}
EXPORT_SYMBOL(blk_mq_rq_cpu);
+void blk_mq_cancel_work_sync(struct request_queue *q)
+{
+ if (queue_is_mq(q)) {
+ struct blk_mq_hw_ctx *hctx;
+ int i;
+
+ cancel_delayed_work_sync(&q->requeue_work);
+
+ queue_for_each_hw_ctx(q, hctx, i)
+ cancel_delayed_work_sync(&hctx->run_work);
+ }
+}
+
static int __init blk_mq_init(void)
{
int i;
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 8acfa650f575..afcf9931a489 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -128,6 +128,8 @@ extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
void blk_mq_free_plug_rqs(struct blk_plug *plug);
void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
+void blk_mq_cancel_work_sync(struct request_queue *q);
+
void blk_mq_release(struct request_queue *q);
static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index cef1f713370b..cd75b0f73dc6 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -791,16 +791,6 @@ static void blk_release_queue(struct kobject *kobj)
blk_free_queue_stats(q->stats);
- if (queue_is_mq(q)) {
- struct blk_mq_hw_ctx *hctx;
- int i;
-
- cancel_delayed_work_sync(&q->requeue_work);
-
- queue_for_each_hw_ctx(q, hctx, i)
- cancel_delayed_work_sync(&hctx->run_work);
- }
-
blk_exit_queue(q);
blk_queue_free_zone_bitmaps(q);
diff --git a/block/blk.h b/block/blk.h
index b4fed2033e48..ccde6e6f1736 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -271,7 +271,7 @@ void __blk_account_io_done(struct request *req, u64 now);
*/
#define ELV_ON_HASH(rq) ((rq)->rq_flags & RQF_HASHED)
-bool blk_insert_flush(struct request *rq);
+void blk_insert_flush(struct request *rq);
int elevator_switch_mq(struct request_queue *q,
struct elevator_type *new_e);
diff --git a/block/elevator.c b/block/elevator.c
index 1f39f6e8ebb9..19a78d5516ba 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -694,12 +694,18 @@ void elevator_init_mq(struct request_queue *q)
if (!e)
return;
+ /*
+ * We are called before adding disk, when there isn't any FS I/O,
+ * so freezing queue plus canceling dispatch work is enough to
+ * drain any dispatch activities originated from passthrough
+ * requests, then no need to quiesce queue which may add long boot
+ * latency, especially when lots of disks are involved.
+ */
blk_mq_freeze_queue(q);
- blk_mq_quiesce_queue(q);
+ blk_mq_cancel_work_sync(q);
err = blk_mq_init_sched(q, e);
- blk_mq_unquiesce_queue(q);
blk_mq_unfreeze_queue(q);
if (err) {
diff --git a/block/fops.c b/block/fops.c
index ad732a36f9b3..0da147edbd18 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -15,6 +15,7 @@
#include <linux/falloc.h>
#include <linux/suspend.h>
#include <linux/fs.h>
+#include <linux/module.h>
#include "blk.h"
static inline struct inode *bdev_file_inode(struct file *file)
@@ -340,8 +341,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
} else {
ret = bio_iov_iter_get_pages(bio, iter);
if (unlikely(ret)) {
- bio->bi_status = BLK_STS_IOERR;
- bio_endio(bio);
+ bio_put(bio);
return ret;
}
}
diff --git a/block/genhd.c b/block/genhd.c
index c5392cc24d37..30362aeacac4 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1111,6 +1111,8 @@ static void disk_release(struct device *dev)
might_sleep();
WARN_ON_ONCE(disk_live(disk));
+ blk_mq_cancel_work_sync(disk->queue);
+
disk_release_events(disk);
kfree(disk->random);
xa_destroy(&disk->part_tbl);
diff --git a/block/ioprio.c b/block/ioprio.c
index 0e4ff245f2bf..6f01d35a5145 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -69,7 +69,14 @@ int ioprio_check_cap(int ioprio)
switch (class) {
case IOPRIO_CLASS_RT:
- if (!capable(CAP_SYS_NICE) && !capable(CAP_SYS_ADMIN))
+ /*
+ * Originally this only checked for CAP_SYS_ADMIN,
+ * which was implicitly allowed for pid 0 by security
+ * modules such as SELinux. Make sure we check
+ * CAP_SYS_ADMIN first to avoid a denial/avc for
+ * possibly missing CAP_SYS_NICE permission.
+ */
+ if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE))
return -EPERM;
fallthrough;
/* rt has prio field too */
@@ -213,6 +220,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
pgrp = task_pgrp(current);
else
pgrp = find_vpid(who);
+ read_lock(&tasklist_lock);
do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
tmpio = get_task_ioprio(p);
if (tmpio < 0)
@@ -222,6 +230,8 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
else
ret = ioprio_best(ret, tmpio);
} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
+ read_unlock(&tasklist_lock);
+
break;
case IOPRIO_WHO_USER:
uid = make_kuid(current_user_ns(), who);