summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/bio.c43
-rw-r--r--block/blk-cgroup.c9
-rw-r--r--block/blk-flush.c4
-rw-r--r--block/blk-iolatency.c1
-rw-r--r--block/blk-mq.c24
-rw-r--r--block/blk-mq.h11
-rw-r--r--block/blk-sysfs.c12
7 files changed, 57 insertions, 47 deletions
diff --git a/block/bio.c b/block/bio.c
index 71a78d9fb8b7..b64cedc7f87c 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -849,20 +849,14 @@ static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter)
size = bio_add_page(bio, bv->bv_page, len,
bv->bv_offset + iter->iov_offset);
if (size == len) {
- struct page *page;
- int i;
+ if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
+ struct page *page;
+ int i;
+
+ mp_bvec_for_each_page(page, bv, i)
+ get_page(page);
+ }
- /*
- * For the normal O_DIRECT case, we could skip grabbing this
- * reference and then not have to put them again when IO
- * completes. But this breaks some in-kernel users, like
- * splicing to/from a loop device, where we release the pipe
- * pages unconditionally. If we can fix that case, we can
- * get rid of the get here and the need to call
- * bio_release_pages() at IO completion time.
- */
- mp_bvec_for_each_page(page, bv, i)
- get_page(page);
iov_iter_advance(iter, size);
return 0;
}
@@ -925,10 +919,12 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
* This takes either an iterator pointing to user memory, or one pointing to
* kernel pages (BVEC iterator). If we're adding user pages, we pin them and
* map them into the kernel. On IO completion, the caller should put those
- * pages. For now, when adding kernel pages, we still grab a reference to the
- * page. This isn't strictly needed for the common case, but some call paths
- * end up releasing pages from eg a pipe and we can't easily control these.
- * See comment in __bio_iov_bvec_add_pages().
+ * pages. If we're adding kernel pages, and the caller told us it's safe to
+ * do so, we just have to add the pages to the bio directly. We don't grab an
+ * extra reference to those pages (the user should already have that), and we
+ * don't put the page on IO completion. The caller needs to check if the bio is
+ * flagged BIO_NO_PAGE_REF on IO completion. If it isn't, then pages should be
+ * released.
*
* The function tries, but does not guarantee, to pin as many pages as
* fit into the bio, or are requested in *iter, whatever is smaller. If
@@ -940,6 +936,13 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
const bool is_bvec = iov_iter_is_bvec(iter);
unsigned short orig_vcnt = bio->bi_vcnt;
+ /*
+ * If this is a BVEC iter, then the pages are kernel pages. Don't
+ * release them on IO completion, if the caller asked us to.
+ */
+ if (is_bvec && iov_iter_bvec_no_ref(iter))
+ bio_set_flag(bio, BIO_NO_PAGE_REF);
+
do {
int ret;
@@ -1696,7 +1699,8 @@ static void bio_dirty_fn(struct work_struct *work)
next = bio->bi_private;
bio_set_pages_dirty(bio);
- bio_release_pages(bio);
+ if (!bio_flagged(bio, BIO_NO_PAGE_REF))
+ bio_release_pages(bio);
bio_put(bio);
}
}
@@ -1713,7 +1717,8 @@ void bio_check_pages_dirty(struct bio *bio)
goto defer;
}
- bio_release_pages(bio);
+ if (!bio_flagged(bio, BIO_NO_PAGE_REF))
+ bio_release_pages(bio);
bio_put(bio);
return;
defer:
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 77f37ef8ef06..617a2b3f7582 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1736,8 +1736,8 @@ out:
/**
* blkcg_schedule_throttle - this task needs to check for throttling
- * @q - the request queue IO was submitted on
- * @use_memdelay - do we charge this to memory delay for PSI
+ * @q: the request queue IO was submitted on
+ * @use_memdelay: do we charge this to memory delay for PSI
*
* This is called by the IO controller when we know there's delay accumulated
* for the blkg for this task. We do not pass the blkg because there are places
@@ -1769,8 +1769,9 @@ void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay)
/**
* blkcg_add_delay - add delay to this blkg
- * @now - the current time in nanoseconds
- * @delta - how many nanoseconds of delay to add
+ * @blkg: blkg of interest
+ * @now: the current time in nanoseconds
+ * @delta: how many nanoseconds of delay to add
*
* Charge @delta to the blkg's current delay accumulation. This is used to
* throttle tasks if an IO controller thinks we need more throttling.
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 6e0f2d97fc6d..d95f94892015 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -220,7 +220,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
flush_rq->tag = -1;
} else {
- blk_mq_put_driver_tag_hctx(hctx, flush_rq);
+ blk_mq_put_driver_tag(flush_rq);
flush_rq->internal_tag = -1;
}
@@ -324,7 +324,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
if (q->elevator) {
WARN_ON(rq->tag < 0);
- blk_mq_put_driver_tag_hctx(hctx, rq);
+ blk_mq_put_driver_tag(rq);
}
/*
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 2620baa1f699..507212d75ee2 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -75,6 +75,7 @@
#include <linux/blk-mq.h>
#include "blk-rq-qos.h"
#include "blk-stat.h"
+#include "blk.h"
#define DEFAULT_SCALE_COOKIE 1000000U
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a9c181603cbd..3ff3d7b49969 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -59,7 +59,8 @@ static int blk_mq_poll_stats_bkt(const struct request *rq)
}
/*
- * Check if any of the ctx's have pending work in this hardware queue
+ * Check if any of the ctx, dispatch list or elevator
+ * have pending work in this hardware queue.
*/
static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
{
@@ -782,7 +783,6 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
if (kick_requeue_list)
blk_mq_kick_requeue_list(q);
}
-EXPORT_SYMBOL(blk_mq_add_to_requeue_list);
void blk_mq_kick_requeue_list(struct request_queue *q)
{
@@ -1072,7 +1072,13 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);
spin_lock(&hctx->dispatch_wait_lock);
- list_del_init(&wait->entry);
+ if (!list_empty(&wait->entry)) {
+ struct sbitmap_queue *sbq;
+
+ list_del_init(&wait->entry);
+ sbq = &hctx->tags->bitmap_tags;
+ atomic_dec(&sbq->ws_active);
+ }
spin_unlock(&hctx->dispatch_wait_lock);
blk_mq_run_hw_queue(hctx, true);
@@ -1088,13 +1094,13 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
struct request *rq)
{
+ struct sbitmap_queue *sbq = &hctx->tags->bitmap_tags;
struct wait_queue_head *wq;
wait_queue_entry_t *wait;
bool ret;
if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) {
- if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
- set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
+ blk_mq_sched_mark_restart_hctx(hctx);
/*
* It's possible that a tag was freed in the window between the
@@ -1111,7 +1117,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
if (!list_empty_careful(&wait->entry))
return false;
- wq = &bt_wait_ptr(&hctx->tags->bitmap_tags, hctx)->wait;
+ wq = &bt_wait_ptr(sbq, hctx)->wait;
spin_lock_irq(&wq->lock);
spin_lock(&hctx->dispatch_wait_lock);
@@ -1121,6 +1127,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
return false;
}
+ atomic_inc(&sbq->ws_active);
wait->flags &= ~WQ_FLAG_EXCLUSIVE;
__add_wait_queue(wq, wait);
@@ -1141,6 +1148,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
* someone else gets the wakeup.
*/
list_del_init(&wait->entry);
+ atomic_dec(&sbq->ws_active);
spin_unlock(&hctx->dispatch_wait_lock);
spin_unlock_irq(&wq->lock);
@@ -2857,7 +2865,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
/*
* Default to classic polling
*/
- q->poll_nsec = -1;
+ q->poll_nsec = BLK_MQ_POLL_CLASSIC;
blk_mq_init_cpu_queues(q, set->nr_hw_queues);
blk_mq_add_queue_tag_set(set, q);
@@ -3392,7 +3400,7 @@ static bool blk_mq_poll_hybrid(struct request_queue *q,
{
struct request *rq;
- if (q->poll_nsec == -1)
+ if (q->poll_nsec == BLK_MQ_POLL_CLASSIC)
return false;
if (!blk_qc_t_is_internal(cookie))
diff --git a/block/blk-mq.h b/block/blk-mq.h
index c11353a3749d..d704fc7766f4 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -41,6 +41,8 @@ void blk_mq_free_queue(struct request_queue *q);
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
void blk_mq_wake_waiters(struct request_queue *q);
bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *, bool);
+void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
+ bool kick_requeue_list);
void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
bool blk_mq_get_driver_tag(struct request *rq);
struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
@@ -222,15 +224,6 @@ static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
}
}
-static inline void blk_mq_put_driver_tag_hctx(struct blk_mq_hw_ctx *hctx,
- struct request *rq)
-{
- if (rq->tag == -1 || rq->internal_tag == -1)
- return;
-
- __blk_mq_put_driver_tag(hctx, rq);
-}
-
static inline void blk_mq_put_driver_tag(struct request *rq)
{
if (rq->tag == -1 || rq->internal_tag == -1)
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 59685918167e..422327089e0f 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -360,8 +360,8 @@ static ssize_t queue_poll_delay_show(struct request_queue *q, char *page)
{
int val;
- if (q->poll_nsec == -1)
- val = -1;
+ if (q->poll_nsec == BLK_MQ_POLL_CLASSIC)
+ val = BLK_MQ_POLL_CLASSIC;
else
val = q->poll_nsec / 1000;
@@ -380,10 +380,12 @@ static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page,
if (err < 0)
return err;
- if (val == -1)
- q->poll_nsec = -1;
- else
+ if (val == BLK_MQ_POLL_CLASSIC)
+ q->poll_nsec = BLK_MQ_POLL_CLASSIC;
+ else if (val >= 0)
q->poll_nsec = val * 1000;
+ else
+ return -EINVAL;
return count;
}