diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-14 10:23:25 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-14 10:23:25 -0700 |
commit | 73ba2fb33c492916853dfe63e3b3163da0be661d (patch) | |
tree | c2fda8ca1273744d2e884d24189a15ac1a7d63c2 /block/blk-core.c | |
parent | 958f338e96f874a0d29442396d6adf9c1e17aa2d (diff) | |
parent | b86d865cb1cae1e61527ea0b8977078bbf694328 (diff) |
Merge tag 'for-4.19/block-20180812' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe:
"First pull request for this merge window, there will also be a
followup request with some stragglers.
This pull request contains:
- Fix for a thundering heard issue in the wbt block code (Anchal
Agarwal)
- A few NVMe pull requests:
* Improved tracepoints (Keith)
* Larger inline data support for RDMA (Steve Wise)
* RDMA setup/teardown fixes (Sagi)
* Effects log suppor for NVMe target (Chaitanya Kulkarni)
* Buffered IO suppor for NVMe target (Chaitanya Kulkarni)
* TP4004 (ANA) support (Christoph)
* Various NVMe fixes
- Block io-latency controller support. Much needed support for
properly containing block devices. (Josef)
- Series improving how we handle sense information on the stack
(Kees)
- Lightnvm fixes and updates/improvements (Mathias/Javier et al)
- Zoned device support for null_blk (Matias)
- AIX partition fixes (Mauricio Faria de Oliveira)
- DIF checksum code made generic (Max Gurtovoy)
- Add support for discard in iostats (Michael Callahan / Tejun)
- Set of updates for BFQ (Paolo)
- Removal of async write support for bsg (Christoph)
- Bio page dirtying and clone fixups (Christoph)
- Set of bcache fix/changes (via Coly)
- Series improving blk-mq queue setup/teardown speed (Ming)
- Series improving merging performance on blk-mq (Ming)
- Lots of other fixes and cleanups from a slew of folks"
* tag 'for-4.19/block-20180812' of git://git.kernel.dk/linux-block: (190 commits)
blkcg: Make blkg_root_lookup() work for queues in bypass mode
bcache: fix error setting writeback_rate through sysfs interface
null_blk: add lock drop/acquire annotation
Blk-throttle: reduce tail io latency when iops limit is enforced
block: paride: pd: mark expected switch fall-throughs
block: Ensure that a request queue is dissociated from the cgroup controller
block: Introduce blk_exit_queue()
blkcg: Introduce blkg_root_lookup()
block: Remove two superfluous #include directives
blk-mq: count the hctx as active before allocating tag
block: bvec_nr_vecs() returns value for wrong slab
bcache: trivial - remove tailing backslash in macro BTREE_FLAG
bcache: make the pr_err statement used for ENOENT only in sysfs_attatch section
bcache: set max writeback rate when I/O request is idle
bcache: add code comments for bset.c
bcache: fix mistaken comments in request.c
bcache: fix mistaken code comments in bcache.h
bcache: add a comment in super.c
bcache: avoid unncessary cache prefetch bch_btree_node_get()
bcache: display rate debug parameters to 0 when writeback is not running
...
Diffstat (limited to 'block/blk-core.c')
-rw-r--r-- | block/blk-core.c | 106 |
1 files changed, 61 insertions, 45 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index ee33590f54eb..12550340418d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -42,7 +42,7 @@ #include "blk.h" #include "blk-mq.h" #include "blk-mq-sched.h" -#include "blk-wbt.h" +#include "blk-rq-qos.h" #ifdef CONFIG_DEBUG_FS struct dentry *blk_debugfs_root; @@ -715,6 +715,35 @@ void blk_set_queue_dying(struct request_queue *q) } EXPORT_SYMBOL_GPL(blk_set_queue_dying); +/* Unconfigure the I/O scheduler and dissociate from the cgroup controller. */ +void blk_exit_queue(struct request_queue *q) +{ + /* + * Since the I/O scheduler exit code may access cgroup information, + * perform I/O scheduler exit before disassociating from the block + * cgroup controller. + */ + if (q->elevator) { + ioc_clear_queue(q); + elevator_exit(q, q->elevator); + q->elevator = NULL; + } + + /* + * Remove all references to @q from the block cgroup controller before + * restoring @q->queue_lock to avoid that restoring this pointer causes + * e.g. blkcg_print_blkgs() to crash. + */ + blkcg_exit_queue(q); + + /* + * Since the cgroup code may dereference the @q->backing_dev_info + * pointer, only decrease its reference count after having removed the + * association with the block cgroup controller. + */ + bdi_put(q->backing_dev_info); +} + /** * blk_cleanup_queue - shutdown a request queue * @q: request queue to shutdown @@ -762,9 +791,13 @@ void blk_cleanup_queue(struct request_queue *q) * make sure all in-progress dispatch are completed because * blk_freeze_queue() can only complete all requests, and * dispatch may still be in-progress since we dispatch requests - * from more than one contexts + * from more than one contexts. + * + * No need to quiesce queue if it isn't initialized yet since + * blk_freeze_queue() should be enough for cases of passthrough + * request. */ - if (q->mq_ops) + if (q->mq_ops && blk_queue_init_done(q)) blk_mq_quiesce_queue(q); /* for synchronous bio-based driver finish in-flight integrity i/o */ @@ -780,30 +813,7 @@ void blk_cleanup_queue(struct request_queue *q) */ WARN_ON_ONCE(q->kobj.state_in_sysfs); - /* - * Since the I/O scheduler exit code may access cgroup information, - * perform I/O scheduler exit before disassociating from the block - * cgroup controller. - */ - if (q->elevator) { - ioc_clear_queue(q); - elevator_exit(q, q->elevator); - q->elevator = NULL; - } - - /* - * Remove all references to @q from the block cgroup controller before - * restoring @q->queue_lock to avoid that restoring this pointer causes - * e.g. blkcg_print_blkgs() to crash. - */ - blkcg_exit_queue(q); - - /* - * Since the cgroup code may dereference the @q->backing_dev_info - * pointer, only decrease its reference count after having removed the - * association with the block cgroup controller. - */ - bdi_put(q->backing_dev_info); + blk_exit_queue(q); if (q->mq_ops) blk_mq_free_queue(q); @@ -1180,6 +1190,7 @@ out_exit_flush_rq: q->exit_rq_fn(q, q->fq->flush_rq); out_free_flush_queue: blk_free_flush_queue(q->fq); + q->fq = NULL; return -ENOMEM; } EXPORT_SYMBOL(blk_init_allocated_queue); @@ -1641,7 +1652,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq) blk_delete_timer(rq); blk_clear_rq_complete(rq); trace_block_rq_requeue(q, rq); - wbt_requeue(q->rq_wb, rq); + rq_qos_requeue(q, rq); if (rq->rq_flags & RQF_QUEUED) blk_queue_end_tag(q, rq); @@ -1748,7 +1759,7 @@ void __blk_put_request(struct request_queue *q, struct request *req) /* this is a bio leak */ WARN_ON(req->bio != NULL); - wbt_done(q->rq_wb, req); + rq_qos_done(q, req); /* * Request may not have originated from ll_rw_blk. if not, @@ -1982,7 +1993,6 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) int where = ELEVATOR_INSERT_SORT; struct request *req, *free; unsigned int request_count = 0; - unsigned int wb_acct; /* * low level driver can indicate that it wants pages above a @@ -2040,7 +2050,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) } get_rq: - wb_acct = wbt_wait(q->rq_wb, bio, q->queue_lock); + rq_qos_throttle(q, bio, q->queue_lock); /* * Grab a free request. This is might sleep but can not fail. @@ -2050,7 +2060,7 @@ get_rq: req = get_request(q, bio->bi_opf, bio, 0, GFP_NOIO); if (IS_ERR(req)) { blk_queue_exit(q); - __wbt_done(q->rq_wb, wb_acct); + rq_qos_cleanup(q, bio); if (PTR_ERR(req) == -ENOMEM) bio->bi_status = BLK_STS_RESOURCE; else @@ -2059,7 +2069,7 @@ get_rq: goto out_unlock; } - wbt_track(req, wb_acct); + rq_qos_track(q, req, bio); /* * After dropping the lock and possibly sleeping here, our request @@ -2700,13 +2710,13 @@ EXPORT_SYMBOL_GPL(blk_rq_err_bytes); void blk_account_io_completion(struct request *req, unsigned int bytes) { if (blk_do_io_stat(req)) { - const int rw = rq_data_dir(req); + const int sgrp = op_stat_group(req_op(req)); struct hd_struct *part; int cpu; cpu = part_stat_lock(); part = req->part; - part_stat_add(cpu, part, sectors[rw], bytes >> 9); + part_stat_add(cpu, part, sectors[sgrp], bytes >> 9); part_stat_unlock(); } } @@ -2720,7 +2730,7 @@ void blk_account_io_done(struct request *req, u64 now) */ if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) { unsigned long duration; - const int rw = rq_data_dir(req); + const int sgrp = op_stat_group(req_op(req)); struct hd_struct *part; int cpu; @@ -2728,10 +2738,10 @@ void blk_account_io_done(struct request *req, u64 now) cpu = part_stat_lock(); part = req->part; - part_stat_inc(cpu, part, ios[rw]); - part_stat_add(cpu, part, ticks[rw], duration); + part_stat_inc(cpu, part, ios[sgrp]); + part_stat_add(cpu, part, ticks[sgrp], duration); part_round_stats(req->q, cpu, part); - part_dec_in_flight(req->q, part, rw); + part_dec_in_flight(req->q, part, rq_data_dir(req)); hd_struct_put(part); part_stat_unlock(); @@ -2751,9 +2761,9 @@ static bool blk_pm_allow_request(struct request *rq) return rq->rq_flags & RQF_PM; case RPM_SUSPENDED: return false; + default: + return true; } - - return true; } #else static bool blk_pm_allow_request(struct request *rq) @@ -2980,7 +2990,7 @@ void blk_start_request(struct request *req) req->throtl_size = blk_rq_sectors(req); #endif req->rq_flags |= RQF_STATS; - wbt_issue(req->q->rq_wb, req); + rq_qos_issue(req->q, req); } BUG_ON(blk_rq_is_complete(req)); @@ -3053,6 +3063,10 @@ EXPORT_SYMBOL_GPL(blk_steal_bios); * Passing the result of blk_rq_bytes() as @nr_bytes guarantees * %false return from this function. * + * Note: + * The RQF_SPECIAL_PAYLOAD flag is ignored on purpose in both + * blk_rq_bytes() and in blk_update_request(). + * * Return: * %false - this request doesn't have any more data * %true - this request has more data @@ -3200,7 +3214,7 @@ void blk_finish_request(struct request *req, blk_status_t error) blk_account_io_done(req, now); if (req->end_io) { - wbt_done(req->q->rq_wb, req); + rq_qos_done(q, req); req->end_io(req, error); } else { if (blk_bidi_rq(req)) @@ -3763,9 +3777,11 @@ EXPORT_SYMBOL(blk_finish_plug); */ void blk_pm_runtime_init(struct request_queue *q, struct device *dev) { - /* not support for RQF_PM and ->rpm_status in blk-mq yet */ - if (q->mq_ops) + /* Don't enable runtime PM for blk-mq until it is ready */ + if (q->mq_ops) { + pm_runtime_disable(dev); return; + } q->dev = dev; q->rpm_status = RPM_ACTIVE; |