summaryrefslogtreecommitdiff
path: root/block/blk-mq-sched.c
diff options
context:
space:
mode:
authorMing Lei <ming.lei@redhat.com>2017-10-14 17:22:30 +0800
committerJens Axboe <axboe@kernel.dk>2017-11-01 08:20:02 -0600
commitb347689ffbca745ac457ee27400ce1affd571c6f (patch)
tree44edb581ccc8c51915efa957300f9814b7044dc2 /block/blk-mq-sched.c
parentde1482974080ec9ef414bf048b2646b246b63f6e (diff)
blk-mq-sched: improve dispatching from sw queue
SCSI devices use host-wide tagset, and the shared driver tag space is often quite big. However, there is also a queue depth for each lun( .cmd_per_lun), which is often small, for example, on both lpfc and qla2xxx, .cmd_per_lun is just 3. So lots of requests may stay in sw queue, and we always flush all belonging to same hw queue and dispatch them all to driver. Unfortunately it is easy to cause queue busy because of the small .cmd_per_lun. Once these requests are flushed out, they have to stay in hctx->dispatch, and no bio merge can happen on these requests, and sequential IO performance is harmed. This patch introduces blk_mq_dequeue_from_ctx for dequeuing a request from a sw queue, so that we can dispatch them in scheduler's way. We can then avoid dequeueing too many requests from sw queue, since we don't flush ->dispatch completely. This patch improves dispatching from sw queue by using the .get_budget and .put_budget callbacks. Reviewed-by: Omar Sandoval <osandov@fb.com> Signed-off-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/blk-mq-sched.c')
-rw-r--r--block/blk-mq-sched.c74
1 files changed, 71 insertions, 3 deletions
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 8e525e66a0d9..df8581bb0a37 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -128,6 +128,61 @@ static bool blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
return false;
}
+static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
+ struct blk_mq_ctx *ctx)
+{
+ unsigned idx = ctx->index_hw;
+
+ if (++idx == hctx->nr_ctx)
+ idx = 0;
+
+ return hctx->ctxs[idx];
+}
+
+/* return true if hctx need to run again */
+static bool blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
+{
+ struct request_queue *q = hctx->queue;
+ LIST_HEAD(rq_list);
+ struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
+
+ do {
+ struct request *rq;
+ blk_status_t ret;
+
+ if (!sbitmap_any_bit_set(&hctx->ctx_map))
+ break;
+
+ ret = blk_mq_get_dispatch_budget(hctx);
+ if (ret == BLK_STS_RESOURCE)
+ return true;
+
+ rq = blk_mq_dequeue_from_ctx(hctx, ctx);
+ if (!rq) {
+ blk_mq_put_dispatch_budget(hctx);
+ break;
+ } else if (ret != BLK_STS_OK) {
+ blk_mq_end_request(rq, ret);
+ continue;
+ }
+
+ /*
+ * Now this rq owns the budget which has to be released
+ * if this rq won't be queued to driver via .queue_rq()
+ * in blk_mq_dispatch_rq_list().
+ */
+ list_add(&rq->queuelist, &rq_list);
+
+ /* round robin for fair dispatch */
+ ctx = blk_mq_next_ctx(hctx, rq->mq_ctx);
+
+ } while (blk_mq_dispatch_rq_list(q, &rq_list, true));
+
+ WRITE_ONCE(hctx->dispatch_from, ctx);
+
+ return false;
+}
+
/* return true if hw queue need to be run again */
bool blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
{
@@ -169,11 +224,24 @@ bool blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
*/
if (!list_empty(&rq_list)) {
blk_mq_sched_mark_restart_hctx(hctx);
- if (blk_mq_dispatch_rq_list(q, &rq_list, false) &&
- has_sched_dispatch)
- run_queue = blk_mq_do_dispatch_sched(hctx);
+ if (blk_mq_dispatch_rq_list(q, &rq_list, false)) {
+ if (has_sched_dispatch)
+ run_queue = blk_mq_do_dispatch_sched(hctx);
+ else
+ run_queue = blk_mq_do_dispatch_ctx(hctx);
+ }
} else if (has_sched_dispatch) {
run_queue = blk_mq_do_dispatch_sched(hctx);
+ } else if (q->mq_ops->get_budget) {
+ /*
+ * If we need to get budget before queuing request, we
+ * dequeue request one by one from sw queue for avoiding
+ * to mess up I/O merge when dispatch runs out of resource.
+ *
+ * TODO: get more budgets, and dequeue more requests in
+ * one time.
+ */
+ run_queue = blk_mq_do_dispatch_ctx(hctx);
} else {
blk_mq_flush_busy_ctxs(hctx, &rq_list);
blk_mq_dispatch_rq_list(q, &rq_list, false);