xfs: reduce exclusive locking on unaligned dio

Attempt shared locking for unaligned DIO, but only if the the underlying extent is already allocated and in written state. On failure, retry with the existing exclusive locking. Test case is fio randrw of 512 byte IOs using AIO and an iodepth of 32 IOs. Vanilla: READ: bw=4560KiB/s (4670kB/s), 4560KiB/s-4560KiB/s (4670kB/s-4670kB/s), io=134MiB (140MB), run=30001-30001msec WRITE: bw=4567KiB/s (4676kB/s), 4567KiB/s-4567KiB/s (4676kB/s-4676kB/s), io=134MiB (140MB), run=30001-30001msec Patched: READ: bw=37.6MiB/s (39.4MB/s), 37.6MiB/s-37.6MiB/s (39.4MB/s-39.4MB/s), io=1127MiB (1182MB), run=30002-30002msec WRITE: bw=37.6MiB/s (39.4MB/s), 37.6MiB/s-37.6MiB/s (39.4MB/s-39.4MB/s), io=1128MiB (1183MB), run=30002-30002msec That's an improvement from ~18k IOPS to a ~150k IOPS, which is about the IOPS limit of the VM block device setup I'm testing on. 4kB block IO comparison: READ: bw=296MiB/s (310MB/s), 296MiB/s-296MiB/s (310MB/s-310MB/s), io=8868MiB (9299MB), run=30002-30002msec WRITE: bw=296MiB/s (310MB/s), 296MiB/s-296MiB/s (310MB/s-310MB/s), io=8878MiB (9309MB), run=30002-30002msec Which is ~150k IOPS, same as what the test gets for sub-block AIO+DIO writes with this patch. Signed-off-by: Dave Chinner <dchinner@redhat.com> [hch: rebased, split unaligned from nowait] Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
author: Dave Chinner <dchinner@redhat.com> 2021-01-23 10:06:31 -0800
committer: Darrick J. Wong <djwong@kernel.org> 2021-02-01 09:47:19 -0800
commit: ed1128c2d0c87e5ff49c40f5529f06bc35f4251b (patch)
tree: 599ec5d4c13fc28a9cdec1254605c7629c315681 /fs/xfs/xfs_file.c
parent: caa89dbc4303d9bc7caa6c1124a84aa3efa47e00 (diff)
1 files changed, 45 insertions, 13 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index c60ff7b5dd82..39695b59dfcc 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -544,10 +544,13 @@ out_unlock:
  * to do sub-block zeroing and that requires serialisation against other direct
  * I/O to the same block.  In this case we need to serialise the submission of
  * the unaligned I/O so that we don't get racing block zeroing in the dio layer.
+ * In the case where sub-block zeroing is not required, we can do concurrent
+ * sub-block dios to the same block successfully.
  *
- * This means that unaligned dio writes always block. There is no "nowait" fast
- * path in this code - if IOCB_NOWAIT is set we simply return -EAGAIN up front
- * and we don't have to worry about that anymore.
+ * Optimistically submit the I/O using the shared lock first, but use the
+ * IOMAP_DIO_OVERWRITE_ONLY flag to tell the lower layers to return -EAGAIN
+ * if block allocation or partial block zeroing would be required.  In that case
+ * we try again with the exclusive lock.
  */
 static noinline ssize_t
 xfs_file_dio_write_unaligned(
@@ -555,13 +558,28 @@ xfs_file_dio_write_unaligned(
 	struct kiocb		*iocb,
 	struct iov_iter		*from)
 {
-	int			iolock = XFS_IOLOCK_EXCL;
+	size_t			isize = i_size_read(VFS_I(ip));
+	size_t			count = iov_iter_count(from);
+	int			iolock = XFS_IOLOCK_SHARED;
+	unsigned int		flags = IOMAP_DIO_OVERWRITE_ONLY;
 	ssize_t			ret;
 
-	/* unaligned dio always waits, bail */
-	if (iocb->ki_flags & IOCB_NOWAIT)
-		return -EAGAIN;
-	xfs_ilock(ip, iolock);
+	/*
+	 * Extending writes need exclusivity because of the sub-block zeroing
+	 * that the DIO code always does for partial tail blocks beyond EOF, so
+	 * don't even bother trying the fast path in this case.
+	 */
+	if (iocb->ki_pos > isize || iocb->ki_pos + count >= isize) {
+retry_exclusive:
+		if (iocb->ki_flags & IOCB_NOWAIT)
+			return -EAGAIN;
+		iolock = XFS_IOLOCK_EXCL;
+		flags = IOMAP_DIO_FORCE_WAIT;
+	}
+
+	ret = xfs_ilock_iocb(iocb, iolock);
+	if (ret)
+		return ret;
 
 	/*
 	 * We can't properly handle unaligned direct I/O to reflink files yet,
@@ -578,15 +596,29 @@ xfs_file_dio_write_unaligned(
 		goto out_unlock;
 
 	/*
-	 * If we are doing unaligned I/O, this must be the only I/O in-flight.
-	 * Otherwise we risk data corruption due to unwritten extent conversions
-	 * from the AIO end_io handler.  Wait for all other I/O to drain first.
+	 * If we are doing exclusive unaligned I/O, this must be the only I/O
+	 * in-flight.  Otherwise we risk data corruption due to unwritten extent
+	 * conversions from the AIO end_io handler.  Wait for all other I/O to
+	 * drain first.
 	 */
-	inode_dio_wait(VFS_I(ip));
+	if (flags & IOMAP_DIO_FORCE_WAIT)
+		inode_dio_wait(VFS_I(ip));
 
 	trace_xfs_file_direct_write(iocb, from);
 	ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
-			   &xfs_dio_write_ops, IOMAP_DIO_FORCE_WAIT);
+			   &xfs_dio_write_ops, flags);
+
+	/*
+	 * Retry unaligned I/O with exclusive blocking semantics if the DIO
+	 * layer rejected it for mapping or locking reasons. If we are doing
+	 * nonblocking user I/O, propagate the error.
+	 */
+	if (ret == -EAGAIN && !(iocb->ki_flags & IOCB_NOWAIT)) {
+		ASSERT(flags & IOMAP_DIO_OVERWRITE_ONLY);
+		xfs_iunlock(ip, iolock);
+		goto retry_exclusive;
+	}
+
 out_unlock:
 	if (iolock)
 		xfs_iunlock(ip, iolock);
author	Dave Chinner <dchinner@redhat.com>	2021-01-23 10:06:31 -0800
committer	Darrick J. Wong <djwong@kernel.org>	2021-02-01 09:47:19 -0800
commit	ed1128c2d0c87e5ff49c40f5529f06bc35f4251b (patch)
tree	599ec5d4c13fc28a9cdec1254605c7629c315681 /fs/xfs/xfs_file.c
parent	caa89dbc4303d9bc7caa6c1124a84aa3efa47e00 (diff)