From c9f5f3aa19c617fe85085b19abbf7a9a077336d0 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 7 Feb 2024 14:14:28 +0000 Subject: block: extend bio caching to task context bio_put_percpu_cache() puts all non-iopoll bios into the irq-safe list, which entails disabling irqs. The overhead of that is not that bad when interrupts are already off but getting worse otherwise. We can optimise it when we're in the task context by using ->free_list directly just as the IOPOLL path does. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/4774e1a0f905f96c63174b0f3e4f79f0d9b63246.1707314970.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- block/bio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'block/bio.c') diff --git a/block/bio.c b/block/bio.c index b9642a41f286..8da941974f88 100644 --- a/block/bio.c +++ b/block/bio.c @@ -770,8 +770,9 @@ static inline void bio_put_percpu_cache(struct bio *bio) bio_uninit(bio); - if ((bio->bi_opf & REQ_POLLED) && !WARN_ON_ONCE(in_interrupt())) { + if (in_task()) { bio->bi_next = cache->free_list; + /* Not necessary but helps not to iopoll already freed bios */ bio->bi_bdev = NULL; cache->free_list = bio; cache->nr++; -- cgit v1.2.3 From e516c3fc6c182736aec5418a73f15199640491e2 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 7 Feb 2024 14:14:29 +0000 Subject: block: optimise in irq bio put caching When enlisting a bio into ->free_list_irq we protect the list by disabling irqs. It's likely they're already disabled and performance of local_irq_{save,restore}() is decent, but it's not zero cost. Let's only use the irq cache when when we're serving a hard irq, which allows to remove local_irq_{save,restore}(), and fall back to bio_free() in all left cases. Profiles indicate that the bio_put() cost is reduced by ~3.5 times (1.76% -> 0.49%), and total throughput of a CPU bound benchmark improve by around 1% (t/io_uring with high QD and several drives). Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/36d207540b7046c653cc16e5ff08fe7234b19f81.1707314970.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- block/bio.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'block/bio.c') diff --git a/block/bio.c b/block/bio.c index 8da941974f88..00847ff1415c 100644 --- a/block/bio.c +++ b/block/bio.c @@ -762,30 +762,31 @@ static inline void bio_put_percpu_cache(struct bio *bio) struct bio_alloc_cache *cache; cache = per_cpu_ptr(bio->bi_pool->cache, get_cpu()); - if (READ_ONCE(cache->nr_irq) + cache->nr > ALLOC_CACHE_MAX) { - put_cpu(); - bio_free(bio); - return; - } - - bio_uninit(bio); + if (READ_ONCE(cache->nr_irq) + cache->nr > ALLOC_CACHE_MAX) + goto out_free; if (in_task()) { + bio_uninit(bio); bio->bi_next = cache->free_list; /* Not necessary but helps not to iopoll already freed bios */ bio->bi_bdev = NULL; cache->free_list = bio; cache->nr++; - } else { - unsigned long flags; + } else if (in_hardirq()) { + lockdep_assert_irqs_disabled(); - local_irq_save(flags); + bio_uninit(bio); bio->bi_next = cache->free_list_irq; cache->free_list_irq = bio; cache->nr_irq++; - local_irq_restore(flags); + } else { + goto out_free; } put_cpu(); + return; +out_free: + put_cpu(); + bio_free(bio); } /** -- cgit v1.2.3 From 0eb4db4706603db09644ec3bc9bb0d63ea5d326c Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 23 Feb 2024 07:59:09 -0800 Subject: block: io wait hang check helper This is the same in two places, and another will be added soon. Create a helper for it. Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20240223155910.3622666-4-kbusch@meta.com Signed-off-by: Jens Axboe --- block/bio.c | 12 +----------- block/blk-mq.c | 19 +++---------------- block/blk.h | 13 +++++++++++++ 3 files changed, 17 insertions(+), 27 deletions(-) (limited to 'block/bio.c') diff --git a/block/bio.c b/block/bio.c index 00847ff1415c..496867b51609 100644 --- a/block/bio.c +++ b/block/bio.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include @@ -1371,21 +1370,12 @@ int submit_bio_wait(struct bio *bio) { DECLARE_COMPLETION_ONSTACK_MAP(done, bio->bi_bdev->bd_disk->lockdep_map); - unsigned long hang_check; bio->bi_private = &done; bio->bi_end_io = submit_bio_wait_endio; bio->bi_opf |= REQ_SYNC; submit_bio(bio); - - /* Prevent hang_check timer from firing at us during very long I/O */ - hang_check = sysctl_hung_task_timeout_secs; - if (hang_check) - while (!wait_for_completion_io_timeout(&done, - hang_check * (HZ/2))) - ; - else - wait_for_completion_io(&done); + blk_wait_io(&done); return blk_status_to_errno(bio->bi_status); } diff --git a/block/blk-mq.c b/block/blk-mq.c index 6abb4ce46baa..45f994c10044 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -1409,22 +1408,10 @@ blk_status_t blk_execute_rq(struct request *rq, bool at_head) blk_mq_insert_request(rq, at_head ? BLK_MQ_INSERT_AT_HEAD : 0); blk_mq_run_hw_queue(hctx, false); - if (blk_rq_is_poll(rq)) { + if (blk_rq_is_poll(rq)) blk_rq_poll_completion(rq, &wait.done); - } else { - /* - * Prevent hang_check timer from firing at us during very long - * I/O - */ - unsigned long hang_check = sysctl_hung_task_timeout_secs; - - if (hang_check) - while (!wait_for_completion_io_timeout(&wait.done, - hang_check * (HZ/2))) - ; - else - wait_for_completion_io(&wait.done); - } + else + blk_wait_io(&wait.done); return wait.ret; } diff --git a/block/blk.h b/block/blk.h index 7c30e2ac8ebc..6c2749d122ab 100644 --- a/block/blk.h +++ b/block/blk.h @@ -4,6 +4,7 @@ #include #include /* for max_pfn/max_low_pfn */ +#include #include #include #include "blk-crypto-internal.h" @@ -71,6 +72,18 @@ static inline int bio_queue_enter(struct bio *bio) return __bio_queue_enter(q, bio); } +static inline void blk_wait_io(struct completion *done) +{ + /* Prevent hang_check timer from firing at us during very long I/O */ + unsigned long timeout = sysctl_hung_task_timeout_secs * HZ / 2; + + if (timeout) + while (!wait_for_completion_io_timeout(done, timeout)) + ; + else + wait_for_completion_io(done); +} + #define BIO_INLINE_VECS 4 struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs, gfp_t gfp_mask); -- cgit v1.2.3 From 38b43539d64b2fa020b3b9a752a986769f87f7a6 Mon Sep 17 00:00:00 2001 From: Tony Battersby Date: Thu, 29 Feb 2024 13:08:09 -0500 Subject: block: Fix page refcounts for unaligned buffers in __bio_release_pages() Fix an incorrect number of pages being released for buffers that do not start at the beginning of a page. Fixes: 1b151e2435fc ("block: Remove special-casing of compound pages") Cc: stable@vger.kernel.org Signed-off-by: Tony Battersby Tested-by: Greg Edwards Link: https://lore.kernel.org/r/86e592a9-98d4-4cff-a646-0c0084328356@cybernetics.com Signed-off-by: Jens Axboe --- block/bio.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'block/bio.c') diff --git a/block/bio.c b/block/bio.c index 496867b51609..a8b691940027 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1153,7 +1153,7 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty) bio_for_each_folio_all(fi, bio) { struct page *page; - size_t done = 0; + size_t nr_pages; if (mark_dirty) { folio_lock(fi.folio); @@ -1161,10 +1161,11 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty) folio_unlock(fi.folio); } page = folio_page(fi.folio, fi.offset / PAGE_SIZE); + nr_pages = (fi.offset + fi.length - 1) / PAGE_SIZE - + fi.offset / PAGE_SIZE + 1; do { bio_release_page(bio, page++); - done += PAGE_SIZE; - } while (done < fi.length); + } while (--nr_pages != 0); } } EXPORT_SYMBOL_GPL(__bio_release_pages); -- cgit v1.2.3