From ea6e7ceedaf11e1bad3ff21e8624694d696d276b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 18 Jan 2022 19:10:11 -0700 Subject: io-wq: remove useless 'work' argument to __io_worker_busy() We don't use 'work' anymore in the busy logic, remove the dead argument. Signed-off-by: Jens Axboe --- fs/io-wq.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/io-wq.c') diff --git a/fs/io-wq.c b/fs/io-wq.c index 5c4f582d6549..f8a5f172b9eb 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -405,8 +405,7 @@ static void io_wqe_dec_running(struct io_worker *worker) * Worker will start processing some work. Move it to the busy list, if * it's currently on the freelist */ -static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker, - struct io_wq_work *work) +static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker) __must_hold(wqe->lock) { if (worker->flags & IO_WORKER_F_FREE) { @@ -556,7 +555,7 @@ get_next: */ work = io_get_next_work(acct, worker); if (work) - __io_worker_busy(wqe, worker, work); + __io_worker_busy(wqe, worker); raw_spin_unlock(&wqe->lock); if (!work) -- cgit v1.2.3 From 081b58204629eff9dd93e7f68ed15c8aa6452a4b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 18 Jan 2022 19:13:43 -0700 Subject: io-wq: make io_worker lock a raw spinlock In preparation to nesting it under the wqe lock (which is raw due to being acquired from the scheduler side), change the io_worker lock from a normal spinlock to a raw spinlock. Signed-off-by: Jens Axboe --- fs/io-wq.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/io-wq.c') diff --git a/fs/io-wq.c b/fs/io-wq.c index f8a5f172b9eb..c369910de793 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -48,7 +48,7 @@ struct io_worker { struct io_wqe *wqe; struct io_wq_work *cur_work; - spinlock_t lock; + raw_spinlock_t lock; struct completion ref_done; @@ -528,9 +528,9 @@ static void io_assign_current_work(struct io_worker *worker, cond_resched(); } - spin_lock(&worker->lock); + raw_spin_lock(&worker->lock); worker->cur_work = work; - spin_unlock(&worker->lock); + raw_spin_unlock(&worker->lock); } static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work); @@ -814,7 +814,7 @@ fail: refcount_set(&worker->ref, 1); worker->wqe = wqe; - spin_lock_init(&worker->lock); + raw_spin_lock_init(&worker->lock); init_completion(&worker->ref_done); if (index == IO_WQ_ACCT_BOUND) @@ -980,13 +980,13 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data) * Hold the lock to avoid ->cur_work going out of scope, caller * may dereference the passed in work. */ - spin_lock(&worker->lock); + raw_spin_lock(&worker->lock); if (worker->cur_work && match->fn(worker->cur_work, match->data)) { set_notify_signal(worker->task); match->nr_running++; } - spin_unlock(&worker->lock); + raw_spin_unlock(&worker->lock); return match->nr_running && !match->cancel_all; } -- cgit v1.2.3 From 36e4c58bf044b07204c8c7e6dd7c2384e439921a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 18 Jan 2022 19:18:20 -0700 Subject: io-wq: invoke work cancelation with wqe->lock held io_wqe_cancel_pending_work() grabs it internally, grab it upfront instead. For the running work cancelation, grab the lock around it as well. Signed-off-by: Jens Axboe --- fs/io-wq.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'fs/io-wq.c') diff --git a/fs/io-wq.c b/fs/io-wq.c index c369910de793..a92fbdc8bea3 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -1038,17 +1038,16 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe, { int i; retry: - raw_spin_lock(&wqe->lock); for (i = 0; i < IO_WQ_ACCT_NR; i++) { struct io_wqe_acct *acct = io_get_acct(wqe, i == 0); if (io_acct_cancel_pending_work(wqe, acct, match)) { + raw_spin_lock(&wqe->lock); if (match->cancel_all) goto retry; - return; + break; } } - raw_spin_unlock(&wqe->lock); } static void io_wqe_cancel_running_work(struct io_wqe *wqe, @@ -1077,7 +1076,9 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, for_each_node(node) { struct io_wqe *wqe = wq->wqes[node]; + raw_spin_lock(&wqe->lock); io_wqe_cancel_pending_work(wqe, &match); + raw_spin_unlock(&wqe->lock); if (match.nr_pending && !match.cancel_all) return IO_WQ_CANCEL_OK; } @@ -1091,7 +1092,9 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, for_each_node(node) { struct io_wqe *wqe = wq->wqes[node]; + raw_spin_lock(&wqe->lock); io_wqe_cancel_running_work(wqe, &match); + raw_spin_unlock(&wqe->lock); if (match.nr_running && !match.cancel_all) return IO_WQ_CANCEL_RUNNING; } @@ -1262,7 +1265,9 @@ static void io_wq_destroy(struct io_wq *wq) .fn = io_wq_work_match_all, .cancel_all = true, }; + raw_spin_lock(&wqe->lock); io_wqe_cancel_pending_work(wqe, &match); + raw_spin_unlock(&wqe->lock); free_cpumask_var(wqe->cpu_mask); kfree(wqe); } -- cgit v1.2.3 From efdf518459b17e18a93c7c9cb622fd3051dabd0c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 18 Jan 2022 19:22:32 -0700 Subject: io-wq: perform both unstarted and started work cancelations in one go Rather than split these into two separate lookups and matches, combine them into one loop. This will become important when we can guarantee that we don't have a window where a pending work item isn't discoverable in either state. Signed-off-by: Jens Axboe --- fs/io-wq.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'fs/io-wq.c') diff --git a/fs/io-wq.c b/fs/io-wq.c index a92fbdc8bea3..db150186ce94 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -1072,27 +1072,25 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, * First check pending list, if we're lucky we can just remove it * from there. CANCEL_OK means that the work is returned as-new, * no completion will be posted for it. - */ - for_each_node(node) { - struct io_wqe *wqe = wq->wqes[node]; - - raw_spin_lock(&wqe->lock); - io_wqe_cancel_pending_work(wqe, &match); - raw_spin_unlock(&wqe->lock); - if (match.nr_pending && !match.cancel_all) - return IO_WQ_CANCEL_OK; - } - - /* - * Now check if a free (going busy) or busy worker has the work + * + * Then check if a free (going busy) or busy worker has the work * currently running. If we find it there, we'll return CANCEL_RUNNING * as an indication that we attempt to signal cancellation. The * completion will run normally in this case. + * + * Do both of these while holding the wqe->lock, to ensure that + * we'll find a work item regardless of state. */ for_each_node(node) { struct io_wqe *wqe = wq->wqes[node]; raw_spin_lock(&wqe->lock); + io_wqe_cancel_pending_work(wqe, &match); + if (match.nr_pending && !match.cancel_all) { + raw_spin_unlock(&wqe->lock); + return IO_WQ_CANCEL_OK; + } + io_wqe_cancel_running_work(wqe, &match); raw_spin_unlock(&wqe->lock); if (match.nr_running && !match.cancel_all) -- cgit v1.2.3 From 361aee450c6e36c8dbab712c94a8a7835bd92e25 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 18 Jan 2022 19:23:51 -0700 Subject: io-wq: add intermediate work step between pending list and active work We have a gap where a worker removes an item from the work list and to when it gets added as the workers active work. In this state, the work item cannot be found by cancelations. This is a small window, but it does exist. Add a temporary pointer to a work item that isn't on the pending work list anymore, but also not the active work. This is needed as we need to drop the wqe lock in between grabbing the work item and marking it as active, to ensure that signal based cancelations are properly ordered. Reported-by: Florian Fischer Link: https://lore.kernel.org/io-uring/20220118151337.fac6cthvbnu7icoc@pasture/ Signed-off-by: Jens Axboe --- fs/io-wq.c | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) (limited to 'fs/io-wq.c') diff --git a/fs/io-wq.c b/fs/io-wq.c index db150186ce94..1efb134c98b7 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -48,6 +48,7 @@ struct io_worker { struct io_wqe *wqe; struct io_wq_work *cur_work; + struct io_wq_work *next_work; raw_spinlock_t lock; struct completion ref_done; @@ -530,6 +531,7 @@ static void io_assign_current_work(struct io_worker *worker, raw_spin_lock(&worker->lock); worker->cur_work = work; + worker->next_work = NULL; raw_spin_unlock(&worker->lock); } @@ -554,9 +556,20 @@ get_next: * clear the stalled flag. */ work = io_get_next_work(acct, worker); - if (work) + if (work) { __io_worker_busy(wqe, worker); + /* + * Make sure cancelation can find this, even before + * it becomes the active work. That avoids a window + * where the work has been removed from our general + * work list, but isn't yet discoverable as the + * current work item for this worker. + */ + raw_spin_lock(&worker->lock); + worker->next_work = work; + raw_spin_unlock(&worker->lock); + } raw_spin_unlock(&wqe->lock); if (!work) break; @@ -972,6 +985,19 @@ void io_wq_hash_work(struct io_wq_work *work, void *val) work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT)); } +static bool __io_wq_worker_cancel(struct io_worker *worker, + struct io_cb_cancel_data *match, + struct io_wq_work *work) +{ + if (work && match->fn(work, match->data)) { + work->flags |= IO_WQ_WORK_CANCEL; + set_notify_signal(worker->task); + return true; + } + + return false; +} + static bool io_wq_worker_cancel(struct io_worker *worker, void *data) { struct io_cb_cancel_data *match = data; @@ -981,11 +1007,9 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data) * may dereference the passed in work. */ raw_spin_lock(&worker->lock); - if (worker->cur_work && - match->fn(worker->cur_work, match->data)) { - set_notify_signal(worker->task); + if (__io_wq_worker_cancel(worker, match, worker->cur_work) || + __io_wq_worker_cancel(worker, match, worker->next_work)) match->nr_running++; - } raw_spin_unlock(&worker->lock); return match->nr_running && !match->cancel_all; -- cgit v1.2.3 From 73031f761cb7c2397d73957d14d041c31fe58c34 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 19 Jan 2022 13:11:58 -0700 Subject: io-wq: delete dead lock shuffling code We used to have more code around the work loop, but now the goto and lock juggling just makes it less readable than it should. Get rid of it. Signed-off-by: Jens Axboe --- fs/io-wq.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'fs/io-wq.c') diff --git a/fs/io-wq.c b/fs/io-wq.c index 1efb134c98b7..013e12b9fabf 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -547,7 +547,7 @@ static void io_worker_handle_work(struct io_worker *worker) do { struct io_wq_work *work; -get_next: + /* * If we got some work, mark us as busy. If we didn't, but * the list isn't empty, it means we stalled on hashed work. @@ -606,11 +606,6 @@ get_next: spin_unlock_irq(&wq->hash->wait.lock); if (wq_has_sleeper(&wq->hash->wait)) wake_up(&wq->hash->wait); - raw_spin_lock(&wqe->lock); - /* skip unnecessary unlock-lock wqe->lock */ - if (!work) - goto get_next; - raw_spin_unlock(&wqe->lock); } } while (work); -- cgit v1.2.3