Merge tag 'for-6.1/block-2022-10-03' of git://git.kernel.dk/linux

Pull block updates from Jens Axboe: - NVMe pull requests via Christoph: - handle number of queue changes in the TCP and RDMA drivers (Daniel Wagner) - allow changing the number of queues in nvmet (Daniel Wagner) - also consider host_iface when checking ip options (Daniel Wagner) - don't map pages which can't come from HIGHMEM (Fabio M. De Francesco) - avoid unnecessary flush bios in nvmet (Guixin Liu) - shrink and better pack the nvme_iod structure (Keith Busch) - add comment for unaligned "fake" nqn (Linjun Bao) - print actual source IP address through sysfs "address" attr (Martin Belanger) - various cleanups (Jackie Liu, Wolfram Sang, Genjian Zhang) - handle effects after freeing the request (Keith Busch) - copy firmware_rev on each init (Keith Busch) - restrict management ioctls to admin (Keith Busch) - ensure subsystem reset is single threaded (Keith Busch) - report the actual number of tagset maps in nvme-pci (Keith Busch) - small fabrics authentication fixups (Christoph Hellwig) - add common code for tagset allocation and freeing (Christoph Hellwig) - stop using the request_queue in nvmet (Christoph Hellwig) - set min_align_mask before calculating max_hw_sectors (Rishabh Bhatnagar) - send a rediscover uevent when a persistent discovery controller reconnects (Sagi Grimberg) - misc nvmet-tcp fixes (Varun Prakash, zhenwei pi) - MD pull request via Song: - Various raid5 fix and clean up, by Logan Gunthorpe and David Sloan. - Raid10 performance optimization, by Yu Kuai. - sbitmap wakeup hang fixes (Hugh, Keith, Jan, Yu) - IO scheduler switching quisce fix (Keith) - s390/dasd block driver updates (Stefan) - support for recovery for the ublk driver (ZiyangZhang) - rnbd drivers fixes and updates (Guoqing, Santosh, ye, Christoph) - blk-mq and null_blk map fixes (Bart) - various bcache fixes (Coly, Jilin, Jules) - nbd signal hang fix (Shigeru) - block writeback throttling fix (Yu) - optimize the passthrough mapping handling (me) - prepare block cgroups to being gendisk based (Christoph) - get rid of an old PSI hack in the block layer, moving it to the callers instead where it belongs (Christoph) - blk-throttle fixes and cleanups (Yu) - misc fixes and cleanups (Liu Shixin, Liu Song, Miaohe, Pankaj, Ping-Xiang, Wolfram, Saurabh, Li Jinlin, Li Lei, Lin, Li zeming, Miaohe, Bart, Coly, Gaosheng * tag 'for-6.1/block-2022-10-03' of git://git.kernel.dk/linux: (162 commits) sbitmap: fix lockup while swapping block: add rationale for not using blk_mq_plug() when applicable block: adapt blk_mq_plug() to not plug for writes that require a zone lock s390/dasd: use blk_mq_alloc_disk blk-cgroup: don't update the blkg lookup hint in blkg_conf_prep nvmet: don't look at the request_queue in nvmet_bdev_set_limits nvmet: don't look at the request_queue in nvmet_bdev_zone_mgmt_emulate_all blk-mq: use quiesced elevator switch when reinitializing queues block: replace blk_queue_nowait with bdev_nowait nvme: remove nvme_ctrl_init_connect_q nvme-loop: use the tagset alloc/free helpers nvme-loop: store the generic nvme_ctrl in set->driver_data nvme-loop: initialize sqsize later nvme-fc: use the tagset alloc/free helpers nvme-fc: store the generic nvme_ctrl in set->driver_data nvme-fc: keep ctrl->sqsize in sync with opts->queue_size nvme-rdma: use the tagset alloc/free helpers nvme-rdma: store the generic nvme_ctrl in set->driver_data nvme-tcp: use the tagset alloc/free helpers nvme-tcp: store the generic nvme_ctrl in set->driver_data ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2022-10-07 09:19:14 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2022-10-07 09:19:14 -0700
commit: 513389809e138ae903b6ef43c1d5d2ffaf4dca17 (patch)
tree: c71e478fab1568da4706868b14eb67a75c148a8b /drivers/md/raid10.c
parent: 0a78a376ef3c2f3d397df48909f00cd75f92137a (diff)
parent: 30514bd2dd4e86a3ecfd6a93a3eadf7b9ea164a0 (diff)
1 files changed, 96 insertions, 55 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 64d6e4cd8a3a..3aa8b6e11d58 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf);
 
 #include "raid1-10.c"
 
+#define NULL_CMD
+#define cmd_before(conf, cmd) \
+	do { \
+		write_sequnlock_irq(&(conf)->resync_lock); \
+		cmd; \
+	} while (0)
+#define cmd_after(conf) write_seqlock_irq(&(conf)->resync_lock)
+
+#define wait_event_barrier_cmd(conf, cond, cmd) \
+	wait_event_cmd((conf)->wait_barrier, cond, cmd_before(conf, cmd), \
+		       cmd_after(conf))
+
+#define wait_event_barrier(conf, cond) \
+	wait_event_barrier_cmd(conf, cond, NULL_CMD)
+
 /*
  * for resync bio, r10bio pointer can be retrieved from the per-bio
  * 'struct resync_pages'.
@@ -274,6 +289,12 @@ static void put_buf(struct r10bio *r10_bio)
 	lower_barrier(conf);
 }
 
+static void wake_up_barrier(struct r10conf *conf)
+{
+	if (wq_has_sleeper(&conf->wait_barrier))
+		wake_up(&conf->wait_barrier);
+}
+
 static void reschedule_retry(struct r10bio *r10_bio)
 {
 	unsigned long flags;
@@ -930,78 +951,101 @@ static void flush_pending_writes(struct r10conf *conf)
 
 static void raise_barrier(struct r10conf *conf, int force)
 {
+	write_seqlock_irq(&conf->resync_lock);
 	BUG_ON(force && !conf->barrier);
-	spin_lock_irq(&conf->resync_lock);
 
 	/* Wait until no block IO is waiting (unless 'force') */
-	wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
-			    conf->resync_lock);
+	wait_event_barrier(conf, force || !conf->nr_waiting);
 
 	/* block any new IO from starting */
-	conf->barrier++;
+	WRITE_ONCE(conf->barrier, conf->barrier + 1);
 
 	/* Now wait for all pending IO to complete */
-	wait_event_lock_irq(conf->wait_barrier,
-			    !atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH,
-			    conf->resync_lock);
+	wait_event_barrier(conf, !atomic_read(&conf->nr_pending) &&
+				 conf->barrier < RESYNC_DEPTH);
 
-	spin_unlock_irq(&conf->resync_lock);
+	write_sequnlock_irq(&conf->resync_lock);
 }
 
 static void lower_barrier(struct r10conf *conf)
 {
 	unsigned long flags;
-	spin_lock_irqsave(&conf->resync_lock, flags);
-	conf->barrier--;
-	spin_unlock_irqrestore(&conf->resync_lock, flags);
+
+	write_seqlock_irqsave(&conf->resync_lock, flags);
+	WRITE_ONCE(conf->barrier, conf->barrier - 1);
+	write_sequnlock_irqrestore(&conf->resync_lock, flags);
 	wake_up(&conf->wait_barrier);
 }
 
+static bool stop_waiting_barrier(struct r10conf *conf)
+{
+	struct bio_list *bio_list = current->bio_list;
+
+	/* barrier is dropped */
+	if (!conf->barrier)
+		return true;
+
+	/*
+	 * If there are already pending requests (preventing the barrier from
+	 * rising completely), and the pre-process bio queue isn't empty, then
+	 * don't wait, as we need to empty that queue to get the nr_pending
+	 * count down.
+	 */
+	if (atomic_read(&conf->nr_pending) && bio_list &&
+	    (!bio_list_empty(&bio_list[0]) || !bio_list_empty(&bio_list[1])))
+		return true;
+
+	/* move on if recovery thread is blocked by us */
+	if (conf->mddev->thread->tsk == current &&
+	    test_bit(MD_RECOVERY_RUNNING, &conf->mddev->recovery) &&
+	    conf->nr_queued > 0)
+		return true;
+
+	return false;
+}
+
+static bool wait_barrier_nolock(struct r10conf *conf)
+{
+	unsigned int seq = read_seqbegin(&conf->resync_lock);
+
+	if (READ_ONCE(conf->barrier))
+		return false;
+
+	atomic_inc(&conf->nr_pending);
+	if (!read_seqretry(&conf->resync_lock, seq))
+		return true;
+
+	if (atomic_dec_and_test(&conf->nr_pending))
+		wake_up_barrier(conf);
+
+	return false;
+}
+
 static bool wait_barrier(struct r10conf *conf, bool nowait)
 {
 	bool ret = true;
 
-	spin_lock_irq(&conf->resync_lock);
+	if (wait_barrier_nolock(conf))
+		return true;
+
+	write_seqlock_irq(&conf->resync_lock);
 	if (conf->barrier) {
-		struct bio_list *bio_list = current->bio_list;
-		conf->nr_waiting++;
-		/* Wait for the barrier to drop.
-		 * However if there are already pending
-		 * requests (preventing the barrier from
-		 * rising completely), and the
-		 * pre-process bio queue isn't empty,
-		 * then don't wait, as we need to empty
-		 * that queue to get the nr_pending
-		 * count down.
-		 */
 		/* Return false when nowait flag is set */
 		if (nowait) {
 			ret = false;
 		} else {
+			conf->nr_waiting++;
 			raid10_log(conf->mddev, "wait barrier");
-			wait_event_lock_irq(conf->wait_barrier,
-					    !conf->barrier ||
-					    (atomic_read(&conf->nr_pending) &&
-					     bio_list &&
-					     (!bio_list_empty(&bio_list[0]) ||
-					      !bio_list_empty(&bio_list[1]))) ||
-					     /* move on if recovery thread is
-					      * blocked by us
-					      */
-					     (conf->mddev->thread->tsk == current &&
-					      test_bit(MD_RECOVERY_RUNNING,
-						       &conf->mddev->recovery) &&
-					      conf->nr_queued > 0),
-					    conf->resync_lock);
+			wait_event_barrier(conf, stop_waiting_barrier(conf));
+			conf->nr_waiting--;
 		}
-		conf->nr_waiting--;
 		if (!conf->nr_waiting)
 			wake_up(&conf->wait_barrier);
 	}
 	/* Only increment nr_pending when we wait */
 	if (ret)
 		atomic_inc(&conf->nr_pending);
-	spin_unlock_irq(&conf->resync_lock);
+	write_sequnlock_irq(&conf->resync_lock);
 	return ret;
 }
 
@@ -1009,7 +1053,7 @@ static void allow_barrier(struct r10conf *conf)
 {
 	if ((atomic_dec_and_test(&conf->nr_pending)) ||
 			(conf->array_freeze_pending))
-		wake_up(&conf->wait_barrier);
+		wake_up_barrier(conf);
 }
 
 static void freeze_array(struct r10conf *conf, int extra)
@@ -1026,27 +1070,24 @@ static void freeze_array(struct r10conf *conf, int extra)
 	 * must match the number of pending IOs (nr_pending) before
 	 * we continue.
 	 */
-	spin_lock_irq(&conf->resync_lock);
+	write_seqlock_irq(&conf->resync_lock);
 	conf->array_freeze_pending++;
-	conf->barrier++;
+	WRITE_ONCE(conf->barrier, conf->barrier + 1);
 	conf->nr_waiting++;
-	wait_event_lock_irq_cmd(conf->wait_barrier,
-				atomic_read(&conf->nr_pending) == conf->nr_queued+extra,
-				conf->resync_lock,
-				flush_pending_writes(conf));
-
+	wait_event_barrier_cmd(conf, atomic_read(&conf->nr_pending) ==
+			conf->nr_queued + extra, flush_pending_writes(conf));
 	conf->array_freeze_pending--;
-	spin_unlock_irq(&conf->resync_lock);
+	write_sequnlock_irq(&conf->resync_lock);
 }
 
 static void unfreeze_array(struct r10conf *conf)
 {
 	/* reverse the effect of the freeze */
-	spin_lock_irq(&conf->resync_lock);
-	conf->barrier--;
+	write_seqlock_irq(&conf->resync_lock);
+	WRITE_ONCE(conf->barrier, conf->barrier - 1);
 	conf->nr_waiting--;
 	wake_up(&conf->wait_barrier);
-	spin_unlock_irq(&conf->resync_lock);
+	write_sequnlock_irq(&conf->resync_lock);
 }
 
 static sector_t choose_data_offset(struct r10bio *r10_bio,
@@ -1885,7 +1926,7 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
 	__make_request(mddev, bio, sectors);
 
 	/* In case raid10d snuck in to freeze_array */
-	wake_up(&conf->wait_barrier);
+	wake_up_barrier(conf);
 	return true;
 }
 
@@ -1980,7 +2021,7 @@ static int enough(struct r10conf *conf, int ignore)
  * Otherwise, it must be degraded:
  *	- recovery is interrupted.
  *	- &mddev->degraded is bumped.
-
+ *
  * @rdev is marked as &Faulty excluding case when array is failed and
  * &mddev->fail_last_dev is off.
  */
@@ -4032,7 +4073,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
 	INIT_LIST_HEAD(&conf->retry_list);
 	INIT_LIST_HEAD(&conf->bio_end_io_list);
 
-	spin_lock_init(&conf->resync_lock);
+	seqlock_init(&conf->resync_lock);
 	init_waitqueue_head(&conf->wait_barrier);
 	atomic_set(&conf->nr_pending, 0);
 
@@ -4351,7 +4392,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs)
 				rdev->new_raid_disk = rdev->raid_disk * 2;
 				rdev->sectors = size;
 			}
-		conf->barrier = 1;
+		WRITE_ONCE(conf->barrier, 1);
 	}
 
 	return conf;
author	Linus Torvalds <torvalds@linux-foundation.org>	2022-10-07 09:19:14 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2022-10-07 09:19:14 -0700
commit	513389809e138ae903b6ef43c1d5d2ffaf4dca17 (patch)
tree	c71e478fab1568da4706868b14eb67a75c148a8b /drivers/md/raid10.c
parent	0a78a376ef3c2f3d397df48909f00cd75f92137a (diff)
parent	30514bd2dd4e86a3ecfd6a93a3eadf7b9ea164a0 (diff)