diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-04 07:58:06 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-04 07:58:06 -0700 |
commit | f459c34538f57661e0fd1d3eaf7c0b17125ae011 (patch) | |
tree | 3addc82d7f792c4533501978798dad0095293933 /lib | |
parent | 29dcea88779c856c7dc92040a0c01233263101d4 (diff) | |
parent | 32a50fabb334b2f0725de84bf248bd8c24c22b05 (diff) |
Merge tag 'for-4.18/block-20180603' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe:
- clean up how we pass around gfp_t and
blk_mq_req_flags_t (Christoph)
- prepare us to defer scheduler attach (Christoph)
- clean up drivers handling of bounce buffers (Christoph)
- fix timeout handling corner cases (Christoph/Bart/Keith)
- bcache fixes (Coly)
- prep work for bcachefs and some block layer optimizations (Kent).
- convert users of bio_sets to using embedded structs (Kent).
- fixes for the BFQ io scheduler (Paolo/Davide/Filippo)
- lightnvm fixes and improvements (Matias, with contributions from Hans
and Javier)
- adding discard throttling to blk-wbt (me)
- sbitmap blk-mq-tag handling (me/Omar/Ming).
- remove the sparc jsflash block driver, acked by DaveM.
- Kyber scheduler improvement from Jianchao, making it more friendly
wrt merging.
- conversion of symbolic proc permissions to octal, from Joe Perches.
Previously the block parts were a mix of both.
- nbd fixes (Josef and Kevin Vigor)
- unify how we handle the various kinds of timestamps that the block
core and utility code uses (Omar)
- three NVMe pull requests from Keith and Christoph, bringing AEN to
feature completeness, file backed namespaces, cq/sq lock split, and
various fixes
- various little fixes and improvements all over the map
* tag 'for-4.18/block-20180603' of git://git.kernel.dk/linux-block: (196 commits)
blk-mq: update nr_requests when switching to 'none' scheduler
block: don't use blocking queue entered for recursive bio submits
dm-crypt: fix warning in shutdown path
lightnvm: pblk: take bitmap alloc. out of critical section
lightnvm: pblk: kick writer on new flush points
lightnvm: pblk: only try to recover lines with written smeta
lightnvm: pblk: remove unnecessary bio_get/put
lightnvm: pblk: add possibility to set write buffer size manually
lightnvm: fix partial read error path
lightnvm: proper error handling for pblk_bio_add_pages
lightnvm: pblk: fix smeta write error path
lightnvm: pblk: garbage collect lines with failed writes
lightnvm: pblk: rework write error recovery path
lightnvm: pblk: remove dead function
lightnvm: pass flag on graceful teardown to targets
lightnvm: pblk: check for chunk size before allocating it
lightnvm: pblk: remove unnecessary argument
lightnvm: pblk: remove unnecessary indirection
lightnvm: pblk: return NVM_ error on failed submission
lightnvm: pblk: warn in case of corrupted write buffer
...
Diffstat (limited to 'lib')
-rw-r--r-- | lib/sbitmap.c | 113 |
1 files changed, 81 insertions, 32 deletions
diff --git a/lib/sbitmap.c b/lib/sbitmap.c index e6a9c06ec70c..6fdc6267f4a8 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -270,18 +270,33 @@ void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m) } EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); -static unsigned int sbq_calc_wake_batch(unsigned int depth) +static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq, + unsigned int depth) { unsigned int wake_batch; + unsigned int shallow_depth; /* * For each batch, we wake up one queue. We need to make sure that our - * batch size is small enough that the full depth of the bitmap is - * enough to wake up all of the queues. + * batch size is small enough that the full depth of the bitmap, + * potentially limited by a shallow depth, is enough to wake up all of + * the queues. + * + * Each full word of the bitmap has bits_per_word bits, and there might + * be a partial word. There are depth / bits_per_word full words and + * depth % bits_per_word bits left over. In bitwise arithmetic: + * + * bits_per_word = 1 << shift + * depth / bits_per_word = depth >> shift + * depth % bits_per_word = depth & ((1 << shift) - 1) + * + * Each word can be limited to sbq->min_shallow_depth bits. */ - wake_batch = SBQ_WAKE_BATCH; - if (wake_batch > depth / SBQ_WAIT_QUEUES) - wake_batch = max(1U, depth / SBQ_WAIT_QUEUES); + shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth); + depth = ((depth >> sbq->sb.shift) * shallow_depth + + min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth)); + wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1, + SBQ_WAKE_BATCH); return wake_batch; } @@ -307,7 +322,8 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth; } - sbq->wake_batch = sbq_calc_wake_batch(depth); + sbq->min_shallow_depth = UINT_MAX; + sbq->wake_batch = sbq_calc_wake_batch(sbq, depth); atomic_set(&sbq->wake_index, 0); sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); @@ -327,21 +343,28 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, } EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); -void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) +static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, + unsigned int depth) { - unsigned int wake_batch = sbq_calc_wake_batch(depth); + unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth); int i; if (sbq->wake_batch != wake_batch) { WRITE_ONCE(sbq->wake_batch, wake_batch); /* - * Pairs with the memory barrier in sbq_wake_up() to ensure that - * the batch size is updated before the wait counts. + * Pairs with the memory barrier in sbitmap_queue_wake_up() + * to ensure that the batch size is updated before the wait + * counts. */ smp_mb__before_atomic(); for (i = 0; i < SBQ_WAIT_QUEUES; i++) atomic_set(&sbq->ws[i].wait_cnt, 1); } +} + +void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) +{ + sbitmap_queue_update_wake_batch(sbq, depth); sbitmap_resize(&sbq->sb, depth); } EXPORT_SYMBOL_GPL(sbitmap_queue_resize); @@ -380,6 +403,8 @@ int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, unsigned int hint, depth; int nr; + WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth); + hint = this_cpu_read(*sbq->alloc_hint); depth = READ_ONCE(sbq->sb.depth); if (unlikely(hint >= depth)) { @@ -403,6 +428,14 @@ int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, } EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow); +void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, + unsigned int min_shallow_depth) +{ + sbq->min_shallow_depth = min_shallow_depth; + sbitmap_queue_update_wake_batch(sbq, sbq->sb.depth); +} +EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth); + static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) { int i, wake_index; @@ -425,52 +458,67 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) return NULL; } -static void sbq_wake_up(struct sbitmap_queue *sbq) +static bool __sbq_wake_up(struct sbitmap_queue *sbq) { struct sbq_wait_state *ws; unsigned int wake_batch; int wait_cnt; - /* - * Pairs with the memory barrier in set_current_state() to ensure the - * proper ordering of clear_bit()/waitqueue_active() in the waker and - * test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the - * waiter. See the comment on waitqueue_active(). This is __after_atomic - * because we just did clear_bit_unlock() in the caller. - */ - smp_mb__after_atomic(); - ws = sbq_wake_ptr(sbq); if (!ws) - return; + return false; wait_cnt = atomic_dec_return(&ws->wait_cnt); if (wait_cnt <= 0) { + int ret; + wake_batch = READ_ONCE(sbq->wake_batch); + /* * Pairs with the memory barrier in sbitmap_queue_resize() to * ensure that we see the batch size update before the wait * count is reset. */ smp_mb__before_atomic(); + /* - * If there are concurrent callers to sbq_wake_up(), the last - * one to decrement the wait count below zero will bump it back - * up. If there is a concurrent resize, the count reset will - * either cause the cmpxchg to fail or overwrite after the - * cmpxchg. + * For concurrent callers of this, the one that failed the + * atomic_cmpxhcg() race should call this function again + * to wakeup a new batch on a different 'ws'. */ - atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch); - sbq_index_atomic_inc(&sbq->wake_index); - wake_up_nr(&ws->wait, wake_batch); + ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch); + if (ret == wait_cnt) { + sbq_index_atomic_inc(&sbq->wake_index); + wake_up_nr(&ws->wait, wake_batch); + return false; + } + + return true; } + + return false; +} + +void sbitmap_queue_wake_up(struct sbitmap_queue *sbq) +{ + while (__sbq_wake_up(sbq)) + ; } +EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, unsigned int cpu) { sbitmap_clear_bit_unlock(&sbq->sb, nr); - sbq_wake_up(sbq); + /* + * Pairs with the memory barrier in set_current_state() to ensure the + * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker + * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the + * waiter. See the comment on waitqueue_active(). + */ + smp_mb__after_atomic(); + sbitmap_queue_wake_up(sbq); + if (likely(!sbq->round_robin && nr < sbq->sb.depth)) *per_cpu_ptr(sbq->alloc_hint, cpu) = nr; } @@ -482,7 +530,7 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) /* * Pairs with the memory barrier in set_current_state() like in - * sbq_wake_up(). + * sbitmap_queue_wake_up(). */ smp_mb(); wake_index = atomic_read(&sbq->wake_index); @@ -528,5 +576,6 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) seq_puts(m, "}\n"); seq_printf(m, "round_robin=%d\n", sbq->round_robin); + seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth); } EXPORT_SYMBOL_GPL(sbitmap_queue_show); |