diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-mq-sched.c | 181 | ||||
-rw-r--r-- | block/blk-mq-sched.h | 25 | ||||
-rw-r--r-- | block/blk-mq.c | 114 | ||||
-rw-r--r-- | block/blk-mq.h | 2 | ||||
-rw-r--r-- | block/blk-stat.c | 4 | ||||
-rw-r--r-- | block/blk-sysfs.c | 2 | ||||
-rw-r--r-- | block/elevator.c | 126 |
7 files changed, 289 insertions, 165 deletions
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 09af8ff18719..c974a1bbf4cb 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -171,7 +171,8 @@ void blk_mq_sched_put_request(struct request *rq) void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) { - struct elevator_queue *e = hctx->queue->elevator; + struct request_queue *q = hctx->queue; + struct elevator_queue *e = q->elevator; const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request; bool did_work = false; LIST_HEAD(rq_list); @@ -203,10 +204,10 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) */ if (!list_empty(&rq_list)) { blk_mq_sched_mark_restart_hctx(hctx); - did_work = blk_mq_dispatch_rq_list(hctx, &rq_list); + did_work = blk_mq_dispatch_rq_list(q, &rq_list); } else if (!has_sched_dispatch) { blk_mq_flush_busy_ctxs(hctx, &rq_list); - blk_mq_dispatch_rq_list(hctx, &rq_list); + blk_mq_dispatch_rq_list(q, &rq_list); } /* @@ -222,7 +223,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) if (!rq) break; list_add(&rq->queuelist, &rq_list); - } while (blk_mq_dispatch_rq_list(hctx, &rq_list)); + } while (blk_mq_dispatch_rq_list(q, &rq_list)); } } @@ -317,25 +318,68 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, return true; } -static void blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) +static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) { if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) { clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); - if (blk_mq_hctx_has_pending(hctx)) + if (blk_mq_hctx_has_pending(hctx)) { blk_mq_run_hw_queue(hctx, true); + return true; + } } + return false; } -void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx) -{ - struct request_queue *q = hctx->queue; - unsigned int i; +/** + * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list + * @pos: loop cursor. + * @skip: the list element that will not be examined. Iteration starts at + * @skip->next. + * @head: head of the list to examine. This list must have at least one + * element, namely @skip. + * @member: name of the list_head structure within typeof(*pos). + */ +#define list_for_each_entry_rcu_rr(pos, skip, head, member) \ + for ((pos) = (skip); \ + (pos = (pos)->member.next != (head) ? list_entry_rcu( \ + (pos)->member.next, typeof(*pos), member) : \ + list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \ + (pos) != (skip); ) - if (test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) { - if (test_and_clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) { - queue_for_each_hw_ctx(q, hctx, i) - blk_mq_sched_restart_hctx(hctx); +/* + * Called after a driver tag has been freed to check whether a hctx needs to + * be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware + * queues in a round-robin fashion if the tag set of @hctx is shared with other + * hardware queues. + */ +void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx) +{ + struct blk_mq_tags *const tags = hctx->tags; + struct blk_mq_tag_set *const set = hctx->queue->tag_set; + struct request_queue *const queue = hctx->queue, *q; + struct blk_mq_hw_ctx *hctx2; + unsigned int i, j; + + if (set->flags & BLK_MQ_F_TAG_SHARED) { + rcu_read_lock(); + list_for_each_entry_rcu_rr(q, queue, &set->tag_list, + tag_set_list) { + queue_for_each_hw_ctx(q, hctx2, i) + if (hctx2->tags == tags && + blk_mq_sched_restart_hctx(hctx2)) + goto done; + } + j = hctx->queue_num + 1; + for (i = 0; i < queue->nr_hw_queues; i++, j++) { + if (j == queue->nr_hw_queues) + j = 0; + hctx2 = queue->queue_hw_ctx[j]; + if (hctx2->tags == tags && + blk_mq_sched_restart_hctx(hctx2)) + break; } +done: + rcu_read_unlock(); } else { blk_mq_sched_restart_hctx(hctx); } @@ -431,11 +475,67 @@ static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set, } } -int blk_mq_sched_setup(struct request_queue *q) +static int blk_mq_sched_alloc_tags(struct request_queue *q, + struct blk_mq_hw_ctx *hctx, + unsigned int hctx_idx) +{ + struct blk_mq_tag_set *set = q->tag_set; + int ret; + + hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests, + set->reserved_tags); + if (!hctx->sched_tags) + return -ENOMEM; + + ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests); + if (ret) + blk_mq_sched_free_tags(set, hctx, hctx_idx); + + return ret; +} + +static void blk_mq_sched_tags_teardown(struct request_queue *q) { struct blk_mq_tag_set *set = q->tag_set; struct blk_mq_hw_ctx *hctx; - int ret, i; + int i; + + queue_for_each_hw_ctx(q, hctx, i) + blk_mq_sched_free_tags(set, hctx, i); +} + +int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, + unsigned int hctx_idx) +{ + struct elevator_queue *e = q->elevator; + + if (!e) + return 0; + + return blk_mq_sched_alloc_tags(q, hctx, hctx_idx); +} + +void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, + unsigned int hctx_idx) +{ + struct elevator_queue *e = q->elevator; + + if (!e) + return; + + blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx); +} + +int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) +{ + struct blk_mq_hw_ctx *hctx; + unsigned int i; + int ret; + + if (!e) { + q->elevator = NULL; + return 0; + } /* * Default to 256, since we don't split into sync/async like the @@ -443,49 +543,30 @@ int blk_mq_sched_setup(struct request_queue *q) */ q->nr_requests = 2 * BLKDEV_MAX_RQ; - /* - * We're switching to using an IO scheduler, so setup the hctx - * scheduler tags and switch the request map from the regular - * tags to scheduler tags. First allocate what we need, so we - * can safely fail and fallback, if needed. - */ - ret = 0; queue_for_each_hw_ctx(q, hctx, i) { - hctx->sched_tags = blk_mq_alloc_rq_map(set, i, - q->nr_requests, set->reserved_tags); - if (!hctx->sched_tags) { - ret = -ENOMEM; - break; - } - ret = blk_mq_alloc_rqs(set, hctx->sched_tags, i, q->nr_requests); + ret = blk_mq_sched_alloc_tags(q, hctx, i); if (ret) - break; + goto err; } - /* - * If we failed, free what we did allocate - */ - if (ret) { - queue_for_each_hw_ctx(q, hctx, i) { - if (!hctx->sched_tags) - continue; - blk_mq_sched_free_tags(set, hctx, i); - } - - return ret; - } + ret = e->ops.mq.init_sched(q, e); + if (ret) + goto err; return 0; + +err: + blk_mq_sched_tags_teardown(q); + q->elevator = NULL; + return ret; } -void blk_mq_sched_teardown(struct request_queue *q) +void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) { - struct blk_mq_tag_set *set = q->tag_set; - struct blk_mq_hw_ctx *hctx; - int i; - - queue_for_each_hw_ctx(q, hctx, i) - blk_mq_sched_free_tags(set, hctx, i); + if (e->type->ops.mq.exit_sched) + e->type->ops.mq.exit_sched(e); + blk_mq_sched_tags_teardown(q); + q->elevator = NULL; } int blk_mq_sched_init(struct request_queue *q) diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index a75b16b123f7..3a9e6e40558b 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -19,7 +19,7 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, struct request **merged_request); bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio); bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq); -void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx); +void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx); void blk_mq_sched_insert_request(struct request *rq, bool at_head, bool run_queue, bool async, bool can_block); @@ -32,8 +32,13 @@ void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx, struct list_head *rq_list, struct request *(*get_rq)(struct blk_mq_hw_ctx *)); -int blk_mq_sched_setup(struct request_queue *q); -void blk_mq_sched_teardown(struct request_queue *q); +int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e); +void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e); + +int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, + unsigned int hctx_idx); +void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, + unsigned int hctx_idx); int blk_mq_sched_init(struct request_queue *q); @@ -131,20 +136,6 @@ static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx) set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); } -/* - * Mark a hardware queue and the request queue it belongs to as needing a - * restart. - */ -static inline void blk_mq_sched_mark_restart_queue(struct blk_mq_hw_ctx *hctx) -{ - struct request_queue *q = hctx->queue; - - if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) - set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); - if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) - set_bit(QUEUE_FLAG_RESTART, &q->queue_flags); -} - static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx) { return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); diff --git a/block/blk-mq.c b/block/blk-mq.c index a4546f060e80..c7836a1ded97 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -321,7 +321,6 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw, rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data); - blk_mq_put_ctx(alloc_data.ctx); blk_queue_exit(q); if (!rq) @@ -349,7 +348,7 @@ void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag); if (sched_tag != -1) blk_mq_sched_completed_request(hctx, rq); - blk_mq_sched_restart_queues(hctx); + blk_mq_sched_restart(hctx); blk_queue_exit(q); } @@ -697,17 +696,8 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, { struct blk_mq_timeout_data *data = priv; - if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) { - /* - * If a request wasn't started before the queue was - * marked dying, kill it here or it'll go unnoticed. - */ - if (unlikely(blk_queue_dying(rq->q))) { - rq->errors = -EIO; - blk_mq_end_request(rq, rq->errors); - } + if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) return; - } if (time_after_eq(jiffies, rq->deadline)) { if (!blk_mark_rq_complete(rq)) @@ -855,12 +845,8 @@ bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx, .flags = wait ? 0 : BLK_MQ_REQ_NOWAIT, }; - if (rq->tag != -1) { -done: - if (hctx) - *hctx = data.hctx; - return true; - } + if (rq->tag != -1) + goto done; if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag)) data.flags |= BLK_MQ_REQ_RESERVED; @@ -872,10 +858,12 @@ done: atomic_inc(&data.hctx->nr_active); } data.hctx->tags->rqs[rq->tag] = rq; - goto done; } - return false; +done: + if (hctx) + *hctx = data.hctx; + return rq->tag != -1; } static void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, @@ -972,13 +960,16 @@ static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx *hctx) return true; } -bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) +bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list) { - struct request_queue *q = hctx->queue; + struct blk_mq_hw_ctx *hctx; struct request *rq; LIST_HEAD(driver_list); struct list_head *dptr; - int queued, ret = BLK_MQ_RQ_QUEUE_OK; + int errors, queued, ret = BLK_MQ_RQ_QUEUE_OK; + + if (list_empty(list)) + return false; /* * Start off with dptr being NULL, so we start the first request @@ -989,8 +980,8 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) /* * Now process all the entries, sending them to the driver. */ - queued = 0; - while (!list_empty(list)) { + errors = queued = 0; + do { struct blk_mq_queue_data bd; rq = list_first_entry(list, struct request, queuelist); @@ -1046,6 +1037,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) default: pr_err("blk-mq: bad return on queue: %d\n", ret); case BLK_MQ_RQ_QUEUE_ERROR: + errors++; rq->errors = -EIO; blk_mq_end_request(rq, rq->errors); break; @@ -1060,7 +1052,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) */ if (!dptr && list->next != list->prev) dptr = &driver_list; - } + } while (!list_empty(list)); hctx->dispatched[queued_to_index(queued)]++; @@ -1097,7 +1089,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) blk_mq_run_hw_queue(hctx, true); } - return queued != 0; + return (queued + errors) != 0; } static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) @@ -1143,7 +1135,8 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx) return hctx->next_cpu; } -void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) +static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async, + unsigned long msecs) { if (unlikely(blk_mq_hctx_stopped(hctx) || !blk_mq_hw_queue_mapped(hctx))) @@ -1160,7 +1153,24 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) put_cpu(); } - kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work); + if (msecs == 0) + kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx), + &hctx->run_work); + else + kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx), + &hctx->delayed_run_work, + msecs_to_jiffies(msecs)); +} + +void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) +{ + __blk_mq_delay_run_hw_queue(hctx, true, msecs); +} +EXPORT_SYMBOL(blk_mq_delay_run_hw_queue); + +void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) +{ + __blk_mq_delay_run_hw_queue(hctx, async, 0); } void blk_mq_run_hw_queues(struct request_queue *q, bool async) @@ -1263,6 +1273,15 @@ static void blk_mq_run_work_fn(struct work_struct *work) __blk_mq_run_hw_queue(hctx); } +static void blk_mq_delayed_run_work_fn(struct work_struct *work) +{ + struct blk_mq_hw_ctx *hctx; + + hctx = container_of(work, struct blk_mq_hw_ctx, delayed_run_work.work); + + __blk_mq_run_hw_queue(hctx); +} + static void blk_mq_delay_work_fn(struct work_struct *work) { struct blk_mq_hw_ctx *hctx; @@ -1932,6 +1951,8 @@ static void blk_mq_exit_hctx(struct request_queue *q, hctx->fq->flush_rq, hctx_idx, flush_start_tag + hctx_idx); + blk_mq_sched_exit_hctx(q, hctx, hctx_idx); + if (set->ops->exit_hctx) set->ops->exit_hctx(hctx, hctx_idx); @@ -1968,6 +1989,7 @@ static int blk_mq_init_hctx(struct request_queue *q, node = hctx->numa_node = set->numa_node; INIT_WORK(&hctx->run_work, blk_mq_run_work_fn); + INIT_DELAYED_WORK(&hctx->delayed_run_work, blk_mq_delayed_run_work_fn); INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn); spin_lock_init(&hctx->lock); INIT_LIST_HEAD(&hctx->dispatch); @@ -1998,9 +2020,12 @@ static int blk_mq_init_hctx(struct request_queue *q, set->ops->init_hctx(hctx, set->driver_data, hctx_idx)) goto free_bitmap; + if (blk_mq_sched_init_hctx(q, hctx, hctx_idx)) + goto exit_hctx; + hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size); if (!hctx->fq) - goto exit_hctx; + goto sched_exit_hctx; if (set->ops->init_request && set->ops->init_request(set->driver_data, @@ -2015,6 +2040,8 @@ static int blk_mq_init_hctx(struct request_queue *q, free_fq: kfree(hctx->fq); + sched_exit_hctx: + blk_mq_sched_exit_hctx(q, hctx, hctx_idx); exit_hctx: if (set->ops->exit_hctx) set->ops->exit_hctx(hctx, hctx_idx); @@ -2241,8 +2268,6 @@ void blk_mq_release(struct request_queue *q) struct blk_mq_hw_ctx *hctx; unsigned int i; - blk_mq_sched_teardown(q); - /* hctx kobj stays in hctx */ queue_for_each_hw_ctx(q, hctx, i) { if (!hctx) @@ -2573,6 +2598,14 @@ static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) return 0; } +static int blk_mq_update_queue_map(struct blk_mq_tag_set *set) +{ + if (set->ops->map_queues) + return set->ops->map_queues(set); + else + return blk_mq_map_queues(set); +} + /* * Alloc a tag set to be associated with one or more request queues. * May fail with EINVAL for various error conditions. May adjust the @@ -2627,10 +2660,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) if (!set->mq_map) goto out_free_tags; - if (set->ops->map_queues) - ret = set->ops->map_queues(set); - else - ret = blk_mq_map_queues(set); + ret = blk_mq_update_queue_map(set); if (ret) goto out_free_mq_map; @@ -2722,6 +2752,7 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) blk_mq_freeze_queue(q); set->nr_hw_queues = nr_hw_queues; + blk_mq_update_queue_map(set); list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_realloc_hw_ctxs(set, q); @@ -2897,8 +2928,17 @@ bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie) hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)]; if (!blk_qc_t_is_internal(cookie)) rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie)); - else + else { rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie)); + /* + * With scheduling, if the request has completed, we'll + * get a NULL return here, as we clear the sched tag when + * that happens. The request still remains valid, like always, + * so we should be safe with just the NULL check. + */ + if (!rq) + return false; + } return __blk_mq_poll(hctx, rq); } diff --git a/block/blk-mq.h b/block/blk-mq.h index b79f9a7d8cf6..660a17e1d033 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -31,7 +31,7 @@ void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_free_queue(struct request_queue *q); int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); void blk_mq_wake_waiters(struct request_queue *q); -bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *); +bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *); void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx); bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx, diff --git a/block/blk-stat.c b/block/blk-stat.c index 9b43efb8933f..186fcb981e9b 100644 --- a/block/blk-stat.c +++ b/block/blk-stat.c @@ -30,11 +30,11 @@ static void blk_stat_flush_batch(struct blk_rq_stat *stat) static void blk_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src) { + blk_stat_flush_batch(src); + if (!src->nr_samples) return; - blk_stat_flush_batch(src); - dst->min = min(dst->min, src->min); dst->max = max(dst->max, src->max); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index c44b321335f3..37f0b3ad635e 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -816,7 +816,7 @@ static void blk_release_queue(struct kobject *kobj) if (q->elevator) { ioc_clear_queue(q); - elevator_exit(q->elevator); + elevator_exit(q, q->elevator); } blk_exit_rl(&q->root_rl); diff --git a/block/elevator.c b/block/elevator.c index 01139f549b5b..4d9084a14c10 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -242,26 +242,21 @@ int elevator_init(struct request_queue *q, char *name) } } - if (e->uses_mq) { - err = blk_mq_sched_setup(q); - if (!err) - err = e->ops.mq.init_sched(q, e); - } else + if (e->uses_mq) + err = blk_mq_init_sched(q, e); + else err = e->ops.sq.elevator_init_fn(q, e); - if (err) { - if (e->uses_mq) - blk_mq_sched_teardown(q); + if (err) elevator_put(e); - } return err; } EXPORT_SYMBOL(elevator_init); -void elevator_exit(struct elevator_queue *e) +void elevator_exit(struct request_queue *q, struct elevator_queue *e) { mutex_lock(&e->sysfs_lock); if (e->uses_mq && e->type->ops.mq.exit_sched) - e->type->ops.mq.exit_sched(e); + blk_mq_exit_sched(q, e); else if (!e->uses_mq && e->type->ops.sq.elevator_exit_fn) e->type->ops.sq.elevator_exit_fn(e); mutex_unlock(&e->sysfs_lock); @@ -946,6 +941,45 @@ void elv_unregister(struct elevator_type *e) } EXPORT_SYMBOL_GPL(elv_unregister); +static int elevator_switch_mq(struct request_queue *q, + struct elevator_type *new_e) +{ + int ret; + + blk_mq_freeze_queue(q); + blk_mq_quiesce_queue(q); + + if (q->elevator) { + if (q->elevator->registered) + elv_unregister_queue(q); + ioc_clear_queue(q); + elevator_exit(q, q->elevator); + } + + ret = blk_mq_init_sched(q, new_e); + if (ret) + goto out; + + if (new_e) { + ret = elv_register_queue(q); + if (ret) { + elevator_exit(q, q->elevator); + goto out; + } + } + + if (new_e) + blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); + else + blk_add_trace_msg(q, "elv switch: none"); + +out: + blk_mq_unfreeze_queue(q); + blk_mq_start_stopped_hw_queues(q, true); + return ret; + +} + /* * switch to new_e io scheduler. be careful not to introduce deadlocks - * we don't free the old io scheduler, before we have allocated what we @@ -958,10 +992,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) bool old_registered = false; int err; - if (q->mq_ops) { - blk_mq_freeze_queue(q); - blk_mq_quiesce_queue(q); - } + if (q->mq_ops) + return elevator_switch_mq(q, new_e); /* * Turn on BYPASS and drain all requests w/ elevator private data. @@ -973,11 +1005,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) if (old) { old_registered = old->registered; - if (old->uses_mq) - blk_mq_sched_teardown(q); - - if (!q->mq_ops) - blk_queue_bypass_start(q); + blk_queue_bypass_start(q); /* unregister and clear all auxiliary data of the old elevator */ if (old_registered) @@ -987,56 +1015,32 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) } /* allocate, init and register new elevator */ - if (new_e) { - if (new_e->uses_mq) { - err = blk_mq_sched_setup(q); - if (!err) - err = new_e->ops.mq.init_sched(q, new_e); - } else - err = new_e->ops.sq.elevator_init_fn(q, new_e); - if (err) - goto fail_init; + err = new_e->ops.sq.elevator_init_fn(q, new_e); + if (err) + goto fail_init; - err = elv_register_queue(q); - if (err) - goto fail_register; - } else - q->elevator = NULL; + err = elv_register_queue(q); + if (err) + goto fail_register; /* done, kill the old one and finish */ if (old) { - elevator_exit(old); - if (!q->mq_ops) - blk_queue_bypass_end(q); + elevator_exit(q, old); + blk_queue_bypass_end(q); } - if (q->mq_ops) { - blk_mq_unfreeze_queue(q); - blk_mq_start_stopped_hw_queues(q, true); - } - - if (new_e) - blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); - else - blk_add_trace_msg(q, "elv switch: none"); + blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); return 0; fail_register: - if (q->mq_ops) - blk_mq_sched_teardown(q); - elevator_exit(q->elevator); + elevator_exit(q, q->elevator); fail_init: /* switch failed, restore and re-register old elevator */ if (old) { q->elevator = old; elv_register_queue(q); - if (!q->mq_ops) - blk_queue_bypass_end(q); - } - if (q->mq_ops) { - blk_mq_unfreeze_queue(q); - blk_mq_start_stopped_hw_queues(q, true); + blk_queue_bypass_end(q); } return err; @@ -1094,12 +1098,20 @@ int elevator_change(struct request_queue *q, const char *name) } EXPORT_SYMBOL(elevator_change); +static inline bool elv_support_iosched(struct request_queue *q) +{ + if (q->mq_ops && q->tag_set && (q->tag_set->flags & + BLK_MQ_F_NO_SCHED)) + return false; + return true; +} + ssize_t elv_iosched_store(struct request_queue *q, const char *name, size_t count) { int ret; - if (!(q->mq_ops || q->request_fn)) + if (!(q->mq_ops || q->request_fn) || !elv_support_iosched(q)) return count; ret = __elevator_change(q, name); @@ -1131,7 +1143,7 @@ ssize_t elv_iosched_show(struct request_queue *q, char *name) len += sprintf(name+len, "[%s] ", elv->elevator_name); continue; } - if (__e->uses_mq && q->mq_ops) + if (__e->uses_mq && q->mq_ops && elv_support_iosched(q)) len += sprintf(name+len, "%s ", __e->elevator_name); else if (!__e->uses_mq && !q->mq_ops) len += sprintf(name+len, "%s ", __e->elevator_name); |