diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-16 12:13:31 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-16 12:13:31 +0200 |
commit | 35219bc5c71f4197c8bd10297597de797c1eece5 (patch) | |
tree | 2448156135b78f54cd341a8457ccd84a371ddac7 /fs/ceph | |
parent | 9020d0d844ad58a051f90b1e5b82ba34123925b9 (diff) | |
parent | 4b40d43d9f951d87ae8dc414c2ef5ae50303a266 (diff) |
Merge tag 'vfs-6.12.netfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull netfs updates from Christian Brauner:
"This contains the work to improve read/write performance for the new
netfs library.
The main performance enhancing changes are:
- Define a structure, struct folio_queue, and a new iterator type,
ITER_FOLIOQ, to hold a buffer as a replacement for ITER_XARRAY. See
that patch for questions about naming and form.
ITER_FOLIOQ is provided as a replacement for ITER_XARRAY. The
problem with an xarray is that accessing it requires the use of a
lock (typically the RCU read lock) - and this means that we can't
supply iterate_and_advance() with a step function that might sleep
(crypto for example) without having to drop the lock between pages.
ITER_FOLIOQ is the iterator for a chain of folio_queue structs,
where each folio_queue holds a small list of folios. A folio_queue
struct is a simpler structure than xarray and is not subject to
concurrent manipulation by the VM. folio_queue is used rather than
a bvec[] as it can form lists of indefinite size, adding to one end
and removing from the other on the fly.
- Provide a copy_folio_from_iter() wrapper.
- Make cifs RDMA support ITER_FOLIOQ.
- Use folio queues in the write-side helpers instead of xarrays.
- Add a function to reset the iterator in a subrequest.
- Simplify the write-side helpers to use sheaves to skip gaps rather
than trying to work out where gaps are.
- In afs, make the read subrequests asynchronous, putting them into
work items to allow the next patch to do progressive
unlocking/reading.
- Overhaul the read-side helpers to improve performance.
- Fix the caching of a partial block at the end of a file.
- Allow a store to be cancelled.
Then some changes for cifs to make it use folio queues instead of
xarrays for crypto bufferage:
- Use raw iteration functions rather than manually coding iteration
when hashing data.
- Switch to using folio_queue for crypto buffers.
- Remove the xarray bits.
Make some adjustments to the /proc/fs/netfs/stats file such that:
- All the netfs stats lines begin 'Netfs:' but change this to
something a bit more useful.
- Add a couple of stats counters to track the numbers of skips and
waits on the per-inode writeback serialisation lock to make it
easier to check for this as a source of performance loss.
Miscellaneous work:
- Ensure that the sb_writers lock is taken around
vfs_{set,remove}xattr() in the cachefiles code.
- Reduce the number of conditional branches in netfs_perform_write().
- Move the CIFS_INO_MODIFIED_ATTR flag to the netfs_inode struct and
remove cifs_post_modify().
- Move the max_len/max_nr_segs members from netfs_io_subrequest to
netfs_io_request as they're only needed for one subreq at a time.
- Add an 'unknown' source value for tracing purposes.
- Remove NETFS_COPY_TO_CACHE as it's no longer used.
- Set the request work function up front at allocation time.
- Use bh-disabling spinlocks for rreq->lock as cachefiles completion
may be run from block-filesystem DIO completion in softirq context.
- Remove fs/netfs/io.c"
* tag 'vfs-6.12.netfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (25 commits)
docs: filesystems: corrected grammar of netfs page
cifs: Don't support ITER_XARRAY
cifs: Switch crypto buffer to use a folio_queue rather than an xarray
cifs: Use iterate_and_advance*() routines directly for hashing
netfs: Cancel dirty folios that have no storage destination
cachefiles, netfs: Fix write to partial block at EOF
netfs: Remove fs/netfs/io.c
netfs: Speed up buffered reading
afs: Make read subreqs async
netfs: Simplify the writeback code
netfs: Provide an iterator-reset function
netfs: Use new folio_queue data type and iterator instead of xarray iter
cifs: Provide the capability to extract from ITER_FOLIOQ to RDMA SGEs
iov_iter: Provide copy_folio_from_iter()
mm: Define struct folio_queue and ITER_FOLIOQ to handle a sequence of folios
netfs: Use bh-disabling spinlocks for rreq->lock
netfs: Set the request work function upon allocation
netfs: Remove NETFS_COPY_TO_CACHE
netfs: Reserve netfs_sreq_source 0 as unset/unknown
netfs: Move max_len/max_nr_segs from netfs_io_subrequest to netfs_io_stream
...
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/addr.c | 76 |
1 files changed, 46 insertions, 30 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index a5f848c167fa..5d9ccda098cc 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -13,6 +13,7 @@ #include <linux/iversion.h> #include <linux/ktime.h> #include <linux/netfs.h> +#include <trace/events/netfs.h> #include "super.h" #include "mds_client.h" @@ -205,21 +206,6 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq) } } -static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq) -{ - struct inode *inode = subreq->rreq->inode; - struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); - struct ceph_inode_info *ci = ceph_inode(inode); - u64 objno, objoff; - u32 xlen; - - /* Truncate the extent at the end of the current block */ - ceph_calc_file_object_mapping(&ci->i_layout, subreq->start, subreq->len, - &objno, &objoff, &xlen); - subreq->len = min(xlen, fsc->mount_options->rsize); - return true; -} - static void finish_netfs_read(struct ceph_osd_request *req) { struct inode *inode = req->r_inode; @@ -264,7 +250,12 @@ static void finish_netfs_read(struct ceph_osd_request *req) calc_pages_for(osd_data->alignment, osd_data->length), false); } - netfs_subreq_terminated(subreq, err, false); + if (err > 0) { + subreq->transferred = err; + err = 0; + } + trace_netfs_sreq(subreq, netfs_sreq_trace_io_progress); + netfs_read_subreq_terminated(subreq, err, false); iput(req->r_inode); ceph_dec_osd_stopping_blocker(fsc->mdsc); } @@ -278,7 +269,6 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq) struct ceph_mds_request *req; struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); struct ceph_inode_info *ci = ceph_inode(inode); - struct iov_iter iter; ssize_t err = 0; size_t len; int mode; @@ -301,6 +291,7 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq) req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INLINE_DATA); req->r_num_caps = 2; + trace_netfs_sreq(subreq, netfs_sreq_trace_submit); err = ceph_mdsc_do_request(mdsc, NULL, req); if (err < 0) goto out; @@ -314,17 +305,36 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq) } len = min_t(size_t, iinfo->inline_len - subreq->start, subreq->len); - iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len); - err = copy_to_iter(iinfo->inline_data + subreq->start, len, &iter); - if (err == 0) + err = copy_to_iter(iinfo->inline_data + subreq->start, len, &subreq->io_iter); + if (err == 0) { err = -EFAULT; + } else { + subreq->transferred += err; + err = 0; + } ceph_mdsc_put_request(req); out: - netfs_subreq_terminated(subreq, err, false); + netfs_read_subreq_terminated(subreq, err, false); return true; } +static int ceph_netfs_prepare_read(struct netfs_io_subrequest *subreq) +{ + struct netfs_io_request *rreq = subreq->rreq; + struct inode *inode = rreq->inode; + struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + u64 objno, objoff; + u32 xlen; + + /* Truncate the extent at the end of the current block */ + ceph_calc_file_object_mapping(&ci->i_layout, subreq->start, subreq->len, + &objno, &objoff, &xlen); + rreq->io_streams[0].sreq_max_len = umin(xlen, fsc->mount_options->rsize); + return 0; +} + static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) { struct netfs_io_request *rreq = subreq->rreq; @@ -334,9 +344,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) struct ceph_client *cl = fsc->client; struct ceph_osd_request *req = NULL; struct ceph_vino vino = ceph_vino(inode); - struct iov_iter iter; - int err = 0; - u64 len = subreq->len; + int err; + u64 len; bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD); u64 off = subreq->start; int extent_cnt; @@ -349,6 +358,12 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) if (ceph_has_inline_data(ci) && ceph_netfs_issue_op_inline(subreq)) return; + // TODO: This rounding here is slightly dodgy. It *should* work, for + // now, as the cache only deals in blocks that are a multiple of + // PAGE_SIZE and fscrypt blocks are at most PAGE_SIZE. What needs to + // happen is for the fscrypt driving to be moved into netfslib and the + // data in the cache also to be stored encrypted. + len = subreq->len; ceph_fscrypt_adjust_off_and_len(inode, &off, &len); req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, @@ -371,8 +386,6 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) doutc(cl, "%llx.%llx pos=%llu orig_len=%zu len=%llu\n", ceph_vinop(inode), subreq->start, subreq->len, len); - iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len); - /* * FIXME: For now, use CEPH_OSD_DATA_TYPE_PAGES instead of _ITER for * encrypted inodes. We'd need infrastructure that handles an iov_iter @@ -384,7 +397,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) struct page **pages; size_t page_off; - err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off); + err = iov_iter_get_pages_alloc2(&subreq->io_iter, &pages, len, &page_off); if (err < 0) { doutc(cl, "%llx.%llx failed to allocate pages, %d\n", ceph_vinop(inode), err); @@ -399,7 +412,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false, false); } else { - osd_req_op_extent_osd_iter(req, 0, &iter); + osd_req_op_extent_osd_iter(req, 0, &subreq->io_iter); } if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) { err = -EIO; @@ -410,17 +423,19 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) req->r_inode = inode; ihold(inode); + trace_netfs_sreq(subreq, netfs_sreq_trace_submit); ceph_osdc_start_request(req->r_osdc, req); out: ceph_osdc_put_request(req); if (err) - netfs_subreq_terminated(subreq, err, false); + netfs_read_subreq_terminated(subreq, err, false); doutc(cl, "%llx.%llx result %d\n", ceph_vinop(inode), err); } static int ceph_init_request(struct netfs_io_request *rreq, struct file *file) { struct inode *inode = rreq->inode; + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_client *cl = ceph_inode_to_client(inode); int got = 0, want = CEPH_CAP_FILE_CACHE; struct ceph_netfs_request_data *priv; @@ -472,6 +487,7 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file) priv->caps = got; rreq->netfs_priv = priv; + rreq->io_streams[0].sreq_max_len = fsc->mount_options->rsize; out: if (ret < 0) @@ -496,9 +512,9 @@ static void ceph_netfs_free_request(struct netfs_io_request *rreq) const struct netfs_request_ops ceph_netfs_ops = { .init_request = ceph_init_request, .free_request = ceph_netfs_free_request, + .prepare_read = ceph_netfs_prepare_read, .issue_read = ceph_netfs_issue_read, .expand_readahead = ceph_netfs_expand_readahead, - .clamp_length = ceph_netfs_clamp_length, .check_write_begin = ceph_netfs_check_write_begin, }; |