Merge tag 'for-4.19/block-20180812' of git://git.kernel.dk/linux-block

Pull block updates from Jens Axboe: "First pull request for this merge window, there will also be a followup request with some stragglers. This pull request contains: - Fix for a thundering heard issue in the wbt block code (Anchal Agarwal) - A few NVMe pull requests: * Improved tracepoints (Keith) * Larger inline data support for RDMA (Steve Wise) * RDMA setup/teardown fixes (Sagi) * Effects log suppor for NVMe target (Chaitanya Kulkarni) * Buffered IO suppor for NVMe target (Chaitanya Kulkarni) * TP4004 (ANA) support (Christoph) * Various NVMe fixes - Block io-latency controller support. Much needed support for properly containing block devices. (Josef) - Series improving how we handle sense information on the stack (Kees) - Lightnvm fixes and updates/improvements (Mathias/Javier et al) - Zoned device support for null_blk (Matias) - AIX partition fixes (Mauricio Faria de Oliveira) - DIF checksum code made generic (Max Gurtovoy) - Add support for discard in iostats (Michael Callahan / Tejun) - Set of updates for BFQ (Paolo) - Removal of async write support for bsg (Christoph) - Bio page dirtying and clone fixups (Christoph) - Set of bcache fix/changes (via Coly) - Series improving blk-mq queue setup/teardown speed (Ming) - Series improving merging performance on blk-mq (Ming) - Lots of other fixes and cleanups from a slew of folks" * tag 'for-4.19/block-20180812' of git://git.kernel.dk/linux-block: (190 commits) blkcg: Make blkg_root_lookup() work for queues in bypass mode bcache: fix error setting writeback_rate through sysfs interface null_blk: add lock drop/acquire annotation Blk-throttle: reduce tail io latency when iops limit is enforced block: paride: pd: mark expected switch fall-throughs block: Ensure that a request queue is dissociated from the cgroup controller block: Introduce blk_exit_queue() blkcg: Introduce blkg_root_lookup() block: Remove two superfluous #include directives blk-mq: count the hctx as active before allocating tag block: bvec_nr_vecs() returns value for wrong slab bcache: trivial - remove tailing backslash in macro BTREE_FLAG bcache: make the pr_err statement used for ENOENT only in sysfs_attatch section bcache: set max writeback rate when I/O request is idle bcache: add code comments for bset.c bcache: fix mistaken comments in request.c bcache: fix mistaken code comments in bcache.h bcache: add a comment in super.c bcache: avoid unncessary cache prefetch bch_btree_node_get() bcache: display rate debug parameters to 0 when writeback is not running ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2018-08-14 10:23:25 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2018-08-14 10:23:25 -0700
commit: 73ba2fb33c492916853dfe63e3b3163da0be661d (patch)
tree: c2fda8ca1273744d2e884d24189a15ac1a7d63c2 /block/blk-core.c
parent: 958f338e96f874a0d29442396d6adf9c1e17aa2d (diff)
parent: b86d865cb1cae1e61527ea0b8977078bbf694328 (diff)
1 files changed, 61 insertions, 45 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index ee33590f54eb..12550340418d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -42,7 +42,7 @@
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-sched.h"
-#include "blk-wbt.h"
+#include "blk-rq-qos.h"
 
 #ifdef CONFIG_DEBUG_FS
 struct dentry *blk_debugfs_root;
@@ -715,6 +715,35 @@ void blk_set_queue_dying(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_set_queue_dying);
 
+/* Unconfigure the I/O scheduler and dissociate from the cgroup controller. */
+void blk_exit_queue(struct request_queue *q)
+{
+	/*
+	 * Since the I/O scheduler exit code may access cgroup information,
+	 * perform I/O scheduler exit before disassociating from the block
+	 * cgroup controller.
+	 */
+	if (q->elevator) {
+		ioc_clear_queue(q);
+		elevator_exit(q, q->elevator);
+		q->elevator = NULL;
+	}
+
+	/*
+	 * Remove all references to @q from the block cgroup controller before
+	 * restoring @q->queue_lock to avoid that restoring this pointer causes
+	 * e.g. blkcg_print_blkgs() to crash.
+	 */
+	blkcg_exit_queue(q);
+
+	/*
+	 * Since the cgroup code may dereference the @q->backing_dev_info
+	 * pointer, only decrease its reference count after having removed the
+	 * association with the block cgroup controller.
+	 */
+	bdi_put(q->backing_dev_info);
+}
+
 /**
  * blk_cleanup_queue - shutdown a request queue
  * @q: request queue to shutdown
@@ -762,9 +791,13 @@ void blk_cleanup_queue(struct request_queue *q)
 	 * make sure all in-progress dispatch are completed because
 	 * blk_freeze_queue() can only complete all requests, and
 	 * dispatch may still be in-progress since we dispatch requests
-	 * from more than one contexts
+	 * from more than one contexts.
+	 *
+	 * No need to quiesce queue if it isn't initialized yet since
+	 * blk_freeze_queue() should be enough for cases of passthrough
+	 * request.
 	 */
-	if (q->mq_ops)
+	if (q->mq_ops && blk_queue_init_done(q))
 		blk_mq_quiesce_queue(q);
 
 	/* for synchronous bio-based driver finish in-flight integrity i/o */
@@ -780,30 +813,7 @@ void blk_cleanup_queue(struct request_queue *q)
 	 */
 	WARN_ON_ONCE(q->kobj.state_in_sysfs);
 
-	/*
-	 * Since the I/O scheduler exit code may access cgroup information,
-	 * perform I/O scheduler exit before disassociating from the block
-	 * cgroup controller.
-	 */
-	if (q->elevator) {
-		ioc_clear_queue(q);
-		elevator_exit(q, q->elevator);
-		q->elevator = NULL;
-	}
-
-	/*
-	 * Remove all references to @q from the block cgroup controller before
-	 * restoring @q->queue_lock to avoid that restoring this pointer causes
-	 * e.g. blkcg_print_blkgs() to crash.
-	 */
-	blkcg_exit_queue(q);
-
-	/*
-	 * Since the cgroup code may dereference the @q->backing_dev_info
-	 * pointer, only decrease its reference count after having removed the
-	 * association with the block cgroup controller.
-	 */
-	bdi_put(q->backing_dev_info);
+	blk_exit_queue(q);
 
 	if (q->mq_ops)
 		blk_mq_free_queue(q);
@@ -1180,6 +1190,7 @@ out_exit_flush_rq:
 		q->exit_rq_fn(q, q->fq->flush_rq);
 out_free_flush_queue:
 	blk_free_flush_queue(q->fq);
+	q->fq = NULL;
 	return -ENOMEM;
 }
 EXPORT_SYMBOL(blk_init_allocated_queue);
@@ -1641,7 +1652,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
 	blk_delete_timer(rq);
 	blk_clear_rq_complete(rq);
 	trace_block_rq_requeue(q, rq);
-	wbt_requeue(q->rq_wb, rq);
+	rq_qos_requeue(q, rq);
 
 	if (rq->rq_flags & RQF_QUEUED)
 		blk_queue_end_tag(q, rq);
@@ -1748,7 +1759,7 @@ void __blk_put_request(struct request_queue *q, struct request *req)
 	/* this is a bio leak */
 	WARN_ON(req->bio != NULL);
 
-	wbt_done(q->rq_wb, req);
+	rq_qos_done(q, req);
 
 	/*
 	 * Request may not have originated from ll_rw_blk. if not,
@@ -1982,7 +1993,6 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 	int where = ELEVATOR_INSERT_SORT;
 	struct request *req, *free;
 	unsigned int request_count = 0;
-	unsigned int wb_acct;
 
 	/*
 	 * low level driver can indicate that it wants pages above a
@@ -2040,7 +2050,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 	}
 
 get_rq:
-	wb_acct = wbt_wait(q->rq_wb, bio, q->queue_lock);
+	rq_qos_throttle(q, bio, q->queue_lock);
 
 	/*
 	 * Grab a free request. This is might sleep but can not fail.
@@ -2050,7 +2060,7 @@ get_rq:
 	req = get_request(q, bio->bi_opf, bio, 0, GFP_NOIO);
 	if (IS_ERR(req)) {
 		blk_queue_exit(q);
-		__wbt_done(q->rq_wb, wb_acct);
+		rq_qos_cleanup(q, bio);
 		if (PTR_ERR(req) == -ENOMEM)
 			bio->bi_status = BLK_STS_RESOURCE;
 		else
@@ -2059,7 +2069,7 @@ get_rq:
 		goto out_unlock;
 	}
 
-	wbt_track(req, wb_acct);
+	rq_qos_track(q, req, bio);
 
 	/*
 	 * After dropping the lock and possibly sleeping here, our request
@@ -2700,13 +2710,13 @@ EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
 void blk_account_io_completion(struct request *req, unsigned int bytes)
 {
 	if (blk_do_io_stat(req)) {
-		const int rw = rq_data_dir(req);
+		const int sgrp = op_stat_group(req_op(req));
 		struct hd_struct *part;
 		int cpu;
 
 		cpu = part_stat_lock();
 		part = req->part;
-		part_stat_add(cpu, part, sectors[rw], bytes >> 9);
+		part_stat_add(cpu, part, sectors[sgrp], bytes >> 9);
 		part_stat_unlock();
 	}
 }
@@ -2720,7 +2730,7 @@ void blk_account_io_done(struct request *req, u64 now)
 	 */
 	if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
 		unsigned long duration;
-		const int rw = rq_data_dir(req);
+		const int sgrp = op_stat_group(req_op(req));
 		struct hd_struct *part;
 		int cpu;
 
@@ -2728,10 +2738,10 @@ void blk_account_io_done(struct request *req, u64 now)
 		cpu = part_stat_lock();
 		part = req->part;
 
-		part_stat_inc(cpu, part, ios[rw]);
-		part_stat_add(cpu, part, ticks[rw], duration);
+		part_stat_inc(cpu, part, ios[sgrp]);
+		part_stat_add(cpu, part, ticks[sgrp], duration);
 		part_round_stats(req->q, cpu, part);
-		part_dec_in_flight(req->q, part, rw);
+		part_dec_in_flight(req->q, part, rq_data_dir(req));
 
 		hd_struct_put(part);
 		part_stat_unlock();
@@ -2751,9 +2761,9 @@ static bool blk_pm_allow_request(struct request *rq)
 		return rq->rq_flags & RQF_PM;
 	case RPM_SUSPENDED:
 		return false;
+	default:
+		return true;
 	}
-
-	return true;
 }
 #else
 static bool blk_pm_allow_request(struct request *rq)
@@ -2980,7 +2990,7 @@ void blk_start_request(struct request *req)
 		req->throtl_size = blk_rq_sectors(req);
 #endif
 		req->rq_flags |= RQF_STATS;
-		wbt_issue(req->q->rq_wb, req);
+		rq_qos_issue(req->q, req);
 	}
 
 	BUG_ON(blk_rq_is_complete(req));
@@ -3053,6 +3063,10 @@ EXPORT_SYMBOL_GPL(blk_steal_bios);
  *     Passing the result of blk_rq_bytes() as @nr_bytes guarantees
  *     %false return from this function.
  *
+ * Note:
+ *	The RQF_SPECIAL_PAYLOAD flag is ignored on purpose in both
+ *	blk_rq_bytes() and in blk_update_request().
+ *
  * Return:
  *     %false - this request doesn't have any more data
  *     %true  - this request has more data
@@ -3200,7 +3214,7 @@ void blk_finish_request(struct request *req, blk_status_t error)
 	blk_account_io_done(req, now);
 
 	if (req->end_io) {
-		wbt_done(req->q->rq_wb, req);
+		rq_qos_done(q, req);
 		req->end_io(req, error);
 	} else {
 		if (blk_bidi_rq(req))
@@ -3763,9 +3777,11 @@ EXPORT_SYMBOL(blk_finish_plug);
  */
 void blk_pm_runtime_init(struct request_queue *q, struct device *dev)
 {
-	/* not support for RQF_PM and ->rpm_status in blk-mq yet */
-	if (q->mq_ops)
+	/* Don't enable runtime PM for blk-mq until it is ready */
+	if (q->mq_ops) {
+		pm_runtime_disable(dev);
 		return;
+	}
 
 	q->dev = dev;
 	q->rpm_status = RPM_ACTIVE;
author	Linus Torvalds <torvalds@linux-foundation.org>	2018-08-14 10:23:25 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2018-08-14 10:23:25 -0700
commit	73ba2fb33c492916853dfe63e3b3163da0be661d (patch)
tree	c2fda8ca1273744d2e884d24189a15ac1a7d63c2 /block/blk-core.c
parent	958f338e96f874a0d29442396d6adf9c1e17aa2d (diff)
parent	b86d865cb1cae1e61527ea0b8977078bbf694328 (diff)