diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/bio.c | 43 | ||||
-rw-r--r-- | block/blk-cgroup.c | 9 | ||||
-rw-r--r-- | block/blk-flush.c | 4 | ||||
-rw-r--r-- | block/blk-iolatency.c | 1 | ||||
-rw-r--r-- | block/blk-mq.c | 24 | ||||
-rw-r--r-- | block/blk-mq.h | 11 | ||||
-rw-r--r-- | block/blk-sysfs.c | 12 |
7 files changed, 57 insertions, 47 deletions
diff --git a/block/bio.c b/block/bio.c index 71a78d9fb8b7..b64cedc7f87c 100644 --- a/block/bio.c +++ b/block/bio.c @@ -849,20 +849,14 @@ static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter) size = bio_add_page(bio, bv->bv_page, len, bv->bv_offset + iter->iov_offset); if (size == len) { - struct page *page; - int i; + if (!bio_flagged(bio, BIO_NO_PAGE_REF)) { + struct page *page; + int i; + + mp_bvec_for_each_page(page, bv, i) + get_page(page); + } - /* - * For the normal O_DIRECT case, we could skip grabbing this - * reference and then not have to put them again when IO - * completes. But this breaks some in-kernel users, like - * splicing to/from a loop device, where we release the pipe - * pages unconditionally. If we can fix that case, we can - * get rid of the get here and the need to call - * bio_release_pages() at IO completion time. - */ - mp_bvec_for_each_page(page, bv, i) - get_page(page); iov_iter_advance(iter, size); return 0; } @@ -925,10 +919,12 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) * This takes either an iterator pointing to user memory, or one pointing to * kernel pages (BVEC iterator). If we're adding user pages, we pin them and * map them into the kernel. On IO completion, the caller should put those - * pages. For now, when adding kernel pages, we still grab a reference to the - * page. This isn't strictly needed for the common case, but some call paths - * end up releasing pages from eg a pipe and we can't easily control these. - * See comment in __bio_iov_bvec_add_pages(). + * pages. If we're adding kernel pages, and the caller told us it's safe to + * do so, we just have to add the pages to the bio directly. We don't grab an + * extra reference to those pages (the user should already have that), and we + * don't put the page on IO completion. The caller needs to check if the bio is + * flagged BIO_NO_PAGE_REF on IO completion. If it isn't, then pages should be + * released. * * The function tries, but does not guarantee, to pin as many pages as * fit into the bio, or are requested in *iter, whatever is smaller. If @@ -940,6 +936,13 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) const bool is_bvec = iov_iter_is_bvec(iter); unsigned short orig_vcnt = bio->bi_vcnt; + /* + * If this is a BVEC iter, then the pages are kernel pages. Don't + * release them on IO completion, if the caller asked us to. + */ + if (is_bvec && iov_iter_bvec_no_ref(iter)) + bio_set_flag(bio, BIO_NO_PAGE_REF); + do { int ret; @@ -1696,7 +1699,8 @@ static void bio_dirty_fn(struct work_struct *work) next = bio->bi_private; bio_set_pages_dirty(bio); - bio_release_pages(bio); + if (!bio_flagged(bio, BIO_NO_PAGE_REF)) + bio_release_pages(bio); bio_put(bio); } } @@ -1713,7 +1717,8 @@ void bio_check_pages_dirty(struct bio *bio) goto defer; } - bio_release_pages(bio); + if (!bio_flagged(bio, BIO_NO_PAGE_REF)) + bio_release_pages(bio); bio_put(bio); return; defer: diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 77f37ef8ef06..617a2b3f7582 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1736,8 +1736,8 @@ out: /** * blkcg_schedule_throttle - this task needs to check for throttling - * @q - the request queue IO was submitted on - * @use_memdelay - do we charge this to memory delay for PSI + * @q: the request queue IO was submitted on + * @use_memdelay: do we charge this to memory delay for PSI * * This is called by the IO controller when we know there's delay accumulated * for the blkg for this task. We do not pass the blkg because there are places @@ -1769,8 +1769,9 @@ void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) /** * blkcg_add_delay - add delay to this blkg - * @now - the current time in nanoseconds - * @delta - how many nanoseconds of delay to add + * @blkg: blkg of interest + * @now: the current time in nanoseconds + * @delta: how many nanoseconds of delay to add * * Charge @delta to the blkg's current delay accumulation. This is used to * throttle tasks if an IO controller thinks we need more throttling. diff --git a/block/blk-flush.c b/block/blk-flush.c index 6e0f2d97fc6d..d95f94892015 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -220,7 +220,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq); flush_rq->tag = -1; } else { - blk_mq_put_driver_tag_hctx(hctx, flush_rq); + blk_mq_put_driver_tag(flush_rq); flush_rq->internal_tag = -1; } @@ -324,7 +324,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error) if (q->elevator) { WARN_ON(rq->tag < 0); - blk_mq_put_driver_tag_hctx(hctx, rq); + blk_mq_put_driver_tag(rq); } /* diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 2620baa1f699..507212d75ee2 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -75,6 +75,7 @@ #include <linux/blk-mq.h> #include "blk-rq-qos.h" #include "blk-stat.h" +#include "blk.h" #define DEFAULT_SCALE_COOKIE 1000000U diff --git a/block/blk-mq.c b/block/blk-mq.c index a9c181603cbd..3ff3d7b49969 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -59,7 +59,8 @@ static int blk_mq_poll_stats_bkt(const struct request *rq) } /* - * Check if any of the ctx's have pending work in this hardware queue + * Check if any of the ctx, dispatch list or elevator + * have pending work in this hardware queue. */ static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx) { @@ -782,7 +783,6 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, if (kick_requeue_list) blk_mq_kick_requeue_list(q); } -EXPORT_SYMBOL(blk_mq_add_to_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q) { @@ -1072,7 +1072,13 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait); spin_lock(&hctx->dispatch_wait_lock); - list_del_init(&wait->entry); + if (!list_empty(&wait->entry)) { + struct sbitmap_queue *sbq; + + list_del_init(&wait->entry); + sbq = &hctx->tags->bitmap_tags; + atomic_dec(&sbq->ws_active); + } spin_unlock(&hctx->dispatch_wait_lock); blk_mq_run_hw_queue(hctx, true); @@ -1088,13 +1094,13 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, struct request *rq) { + struct sbitmap_queue *sbq = &hctx->tags->bitmap_tags; struct wait_queue_head *wq; wait_queue_entry_t *wait; bool ret; if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) { - if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) - set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); + blk_mq_sched_mark_restart_hctx(hctx); /* * It's possible that a tag was freed in the window between the @@ -1111,7 +1117,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, if (!list_empty_careful(&wait->entry)) return false; - wq = &bt_wait_ptr(&hctx->tags->bitmap_tags, hctx)->wait; + wq = &bt_wait_ptr(sbq, hctx)->wait; spin_lock_irq(&wq->lock); spin_lock(&hctx->dispatch_wait_lock); @@ -1121,6 +1127,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, return false; } + atomic_inc(&sbq->ws_active); wait->flags &= ~WQ_FLAG_EXCLUSIVE; __add_wait_queue(wq, wait); @@ -1141,6 +1148,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, * someone else gets the wakeup. */ list_del_init(&wait->entry); + atomic_dec(&sbq->ws_active); spin_unlock(&hctx->dispatch_wait_lock); spin_unlock_irq(&wq->lock); @@ -2857,7 +2865,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, /* * Default to classic polling */ - q->poll_nsec = -1; + q->poll_nsec = BLK_MQ_POLL_CLASSIC; blk_mq_init_cpu_queues(q, set->nr_hw_queues); blk_mq_add_queue_tag_set(set, q); @@ -3392,7 +3400,7 @@ static bool blk_mq_poll_hybrid(struct request_queue *q, { struct request *rq; - if (q->poll_nsec == -1) + if (q->poll_nsec == BLK_MQ_POLL_CLASSIC) return false; if (!blk_qc_t_is_internal(cookie)) diff --git a/block/blk-mq.h b/block/blk-mq.h index c11353a3749d..d704fc7766f4 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -41,6 +41,8 @@ void blk_mq_free_queue(struct request_queue *q); int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); void blk_mq_wake_waiters(struct request_queue *q); bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *, bool); +void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, + bool kick_requeue_list); void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); bool blk_mq_get_driver_tag(struct request *rq); struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, @@ -222,15 +224,6 @@ static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, } } -static inline void blk_mq_put_driver_tag_hctx(struct blk_mq_hw_ctx *hctx, - struct request *rq) -{ - if (rq->tag == -1 || rq->internal_tag == -1) - return; - - __blk_mq_put_driver_tag(hctx, rq); -} - static inline void blk_mq_put_driver_tag(struct request *rq) { if (rq->tag == -1 || rq->internal_tag == -1) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 59685918167e..422327089e0f 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -360,8 +360,8 @@ static ssize_t queue_poll_delay_show(struct request_queue *q, char *page) { int val; - if (q->poll_nsec == -1) - val = -1; + if (q->poll_nsec == BLK_MQ_POLL_CLASSIC) + val = BLK_MQ_POLL_CLASSIC; else val = q->poll_nsec / 1000; @@ -380,10 +380,12 @@ static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page, if (err < 0) return err; - if (val == -1) - q->poll_nsec = -1; - else + if (val == BLK_MQ_POLL_CLASSIC) + q->poll_nsec = BLK_MQ_POLL_CLASSIC; + else if (val >= 0) q->poll_nsec = val * 1000; + else + return -EINVAL; return count; } |