From 49b29a573da83b65d5f4ecf2db6619bab7aa910c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 27 May 2024 18:36:09 +0200 Subject: nfs: add support for large folios NFS already is void of folio size assumption, so just pass the chunk size to __filemap_get_folio and set the large folio address_space flag for all regular files. Signed-off-by: Christoph Hellwig Tested-by: Sagi Grimberg Signed-off-by: Anna Schumaker --- fs/nfs/file.c | 4 +++- fs/nfs/inode.c | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 6bd127e6683d..7f1295475a90 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -339,6 +339,7 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, struct page **pagep, void **fsdata) { + fgf_t fgp = FGP_WRITEBEGIN; struct folio *folio; int once_thru = 0; int ret; @@ -346,8 +347,9 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n", file, mapping->host->i_ino, len, (long long) pos); + fgp |= fgf_set_order(len); start: - folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, FGP_WRITEBEGIN, + folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp, mapping_gfp_mask(mapping)); if (IS_ERR(folio)) return PTR_ERR(folio); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index acef52ecb1bb..6d185af4cb29 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -491,6 +491,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops; inode->i_data.a_ops = &nfs_file_aops; nfs_inode_init_regular(nfsi); + mapping_set_large_folios(inode->i_mapping); } else if (S_ISDIR(inode->i_mode)) { inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; inode->i_fop = &nfs_dir_operations; -- cgit v1.2.3 From 53185f2df0ea0d2d2d0922ab91055e958efcda41 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 31 May 2024 01:00:33 +0100 Subject: NFS: remove unused struct 'mnt_fhstatus' 'mnt_fhstatus' has been unused since commit 065015e5efff ("NFS: Remove unused XDR decoder functions"). Remove it. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/mount_clnt.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 68e76b626371..57c9dd700b58 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -128,11 +128,6 @@ struct mountres { rpc_authflavor_t *auth_flavors; }; -struct mnt_fhstatus { - u32 status; - struct nfs_fh *fh; -}; - /** * nfs_mount - Obtain an NFS file handle for the given host and path * @info: pointer to mount request arguments -- cgit v1.2.3 From d3318990193df72260447d9de064014c0ce74ebc Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 25 Jun 2024 09:42:49 -0700 Subject: fs: nfs: add missing MODULE_DESCRIPTION() macros Fix the 'make W=1' warnings: WARNING: modpost: missing MODULE_DESCRIPTION() in fs/nfs_common/nfs_acl.o WARNING: modpost: missing MODULE_DESCRIPTION() in fs/nfs_common/grace.o WARNING: modpost: missing MODULE_DESCRIPTION() in fs/nfs/nfs.o WARNING: modpost: missing MODULE_DESCRIPTION() in fs/nfs/nfsv2.o WARNING: modpost: missing MODULE_DESCRIPTION() in fs/nfs/nfsv3.o WARNING: modpost: missing MODULE_DESCRIPTION() in fs/nfs/nfsv4.o Signed-off-by: Jeff Johnson Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/inode.c | 1 + fs/nfs/nfs2super.c | 1 + fs/nfs/nfs3super.c | 1 + fs/nfs/nfs4super.c | 1 + fs/nfs_common/grace.c | 1 + fs/nfs_common/nfsacl.c | 1 + 6 files changed, 6 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6d185af4cb29..d81c5bcc64e3 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2539,6 +2539,7 @@ static void __exit exit_nfs_fs(void) /* Not quite true; I just maintain it */ MODULE_AUTHOR("Olaf Kirch "); +MODULE_DESCRIPTION("NFS client support"); MODULE_LICENSE("GPL"); module_param(enable_ino64, bool, 0644); diff --git a/fs/nfs/nfs2super.c b/fs/nfs/nfs2super.c index 467f21ee6a35..b1badc70bd71 100644 --- a/fs/nfs/nfs2super.c +++ b/fs/nfs/nfs2super.c @@ -26,6 +26,7 @@ static void __exit exit_nfs_v2(void) unregister_nfs_version(&nfs_v2); } +MODULE_DESCRIPTION("NFSv2 client support"); MODULE_LICENSE("GPL"); module_init(init_nfs_v2); diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c index 8a9be9e47f76..20a80478449e 100644 --- a/fs/nfs/nfs3super.c +++ b/fs/nfs/nfs3super.c @@ -27,6 +27,7 @@ static void __exit exit_nfs_v3(void) unregister_nfs_version(&nfs_v3); } +MODULE_DESCRIPTION("NFSv3 client support"); MODULE_LICENSE("GPL"); module_init(init_nfs_v3); diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 8da5a9c000f4..b29a26923ce0 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -332,6 +332,7 @@ static void __exit exit_nfs_v4(void) nfs_dns_resolver_destroy(); } +MODULE_DESCRIPTION("NFSv4 client support"); MODULE_LICENSE("GPL"); module_init(init_nfs_v4); diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index 1479583fbb62..27cd0d13143b 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -139,6 +139,7 @@ exit_grace(void) } MODULE_AUTHOR("Jeff Layton "); +MODULE_DESCRIPTION("NFS client and server infrastructure"); MODULE_LICENSE("GPL"); module_init(init_grace) module_exit(exit_grace) diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c index 5a5bd85d08f8..ea382b75b26c 100644 --- a/fs/nfs_common/nfsacl.c +++ b/fs/nfs_common/nfsacl.c @@ -29,6 +29,7 @@ #include #include +MODULE_DESCRIPTION("NFS ACL support"); MODULE_LICENSE("GPL"); struct nfsacl_encode_desc { -- cgit v1.2.3 From 820620516993c151c88e9b59edc79ed12d1c31cd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:19 -0400 Subject: NFSv4: Clean up open delegation return structure Instead of having the fields open coded in the struct nfs_openres, add a separate structure for them so that we can reuse that code for the WANT_DELEGATION case. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 30 ++++++++++++++++++------------ fs/nfs/nfs4xdr.c | 38 +++++++++++++++++++------------------- include/linux/nfs_xdr.h | 21 +++++++++++++++++---- 3 files changed, 54 insertions(+), 35 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a691fa10b3e9..7a74dc1bcfbd 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1960,6 +1960,13 @@ nfs4_opendata_check_deleg(struct nfs4_opendata *data, struct nfs4_state *state) struct nfs_delegation *delegation; int delegation_flags = 0; + switch (data->o_res.delegation.open_delegation_type) { + case NFS4_OPEN_DELEGATE_READ: + case NFS4_OPEN_DELEGATE_WRITE: + break; + default: + return; + }; rcu_read_lock(); delegation = rcu_dereference(NFS_I(state->inode)->delegation); if (delegation) @@ -1979,19 +1986,19 @@ nfs4_opendata_check_deleg(struct nfs4_opendata *data, struct nfs4_state *state) if ((delegation_flags & 1UL<inode, data->owner->so_cred, - data->o_res.delegation_type, - &data->o_res.delegation, - data->o_res.pagemod_limit); + data->o_res.delegation.type, + &data->o_res.delegation.stateid, + data->o_res.delegation.pagemod_limit); else nfs_inode_reclaim_delegation(state->inode, data->owner->so_cred, - data->o_res.delegation_type, - &data->o_res.delegation, - data->o_res.pagemod_limit); + data->o_res.delegation.type, + &data->o_res.delegation.stateid, + data->o_res.delegation.pagemod_limit); - if (data->o_res.do_recall) + if (data->o_res.delegation.do_recall) nfs_async_inode_return_delegation(state->inode, - &data->o_res.delegation); + &data->o_res.delegation.stateid); } /* @@ -2015,8 +2022,7 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) if (ret) return ERR_PTR(ret); - if (data->o_res.delegation_type != 0) - nfs4_opendata_check_deleg(data, state); + nfs4_opendata_check_deleg(data, state); if (!update_open_stateid(state, &data->o_res.stateid, NULL, data->o_arg.fmode)) @@ -2083,7 +2089,7 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) if (IS_ERR(state)) goto out; - if (data->o_res.delegation_type != 0) + if (data->o_res.delegation.type != 0) nfs4_opendata_check_deleg(data, state); if (!update_open_stateid(state, &data->o_res.stateid, NULL, data->o_arg.fmode)) { @@ -3111,7 +3117,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, case NFS4_OPEN_CLAIM_DELEGATE_PREV: if (!opendata->rpc_done) break; - if (opendata->o_res.delegation_type != 0) + if (opendata->o_res.delegation.type != 0) dir_verifier = nfs_save_change_attribute(dir); nfs_set_verifier(dentry, dir_verifier); } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1416099dfcd1..119061da5298 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5148,13 +5148,12 @@ static int decode_space_limit(struct xdr_stream *xdr, } static int decode_rw_delegation(struct xdr_stream *xdr, - uint32_t delegation_type, - struct nfs_openres *res) + struct nfs4_open_delegation *res) { __be32 *p; int status; - status = decode_delegation_stateid(xdr, &res->delegation); + status = decode_delegation_stateid(xdr, &res->stateid); if (unlikely(status)) return status; p = xdr_inline_decode(xdr, 4); @@ -5162,52 +5161,53 @@ static int decode_rw_delegation(struct xdr_stream *xdr, return -EIO; res->do_recall = be32_to_cpup(p); - switch (delegation_type) { + switch (res->open_delegation_type) { case NFS4_OPEN_DELEGATE_READ: - res->delegation_type = FMODE_READ; + res->type = FMODE_READ; break; case NFS4_OPEN_DELEGATE_WRITE: - res->delegation_type = FMODE_WRITE|FMODE_READ; + res->type = FMODE_WRITE|FMODE_READ; if (decode_space_limit(xdr, &res->pagemod_limit) < 0) return -EIO; } return decode_ace(xdr, NULL); } -static int decode_no_delegation(struct xdr_stream *xdr, struct nfs_openres *res) +static int decode_no_delegation(struct xdr_stream *xdr, + struct nfs4_open_delegation *res) { __be32 *p; - uint32_t why_no_delegation; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) return -EIO; - why_no_delegation = be32_to_cpup(p); - switch (why_no_delegation) { + res->why_no_delegation = be32_to_cpup(p); + switch (res->why_no_delegation) { case WND4_CONTENTION: case WND4_RESOURCE: - xdr_inline_decode(xdr, 4); - /* Ignore for now */ + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + res->will_notify = be32_to_cpup(p); } return 0; } -static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) +static int decode_delegation(struct xdr_stream *xdr, + struct nfs4_open_delegation *res) { __be32 *p; - uint32_t delegation_type; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) return -EIO; - delegation_type = be32_to_cpup(p); - res->delegation_type = 0; - switch (delegation_type) { + res->open_delegation_type = be32_to_cpup(p); + switch (res->open_delegation_type) { case NFS4_OPEN_DELEGATE_NONE: return 0; case NFS4_OPEN_DELEGATE_READ: case NFS4_OPEN_DELEGATE_WRITE: - return decode_rw_delegation(xdr, delegation_type, res); + return decode_rw_delegation(xdr, res); case NFS4_OPEN_DELEGATE_NONE_EXT: return decode_no_delegation(xdr, res); } @@ -5248,7 +5248,7 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) for (; i < NFS4_BITMAP_SIZE; i++) res->attrset[i] = 0; - return decode_delegation(xdr, res); + return decode_delegation(xdr, &res->delegation); xdr_error: dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen); return -EIO; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d09b9773b20c..682559e19d9d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -449,6 +449,22 @@ struct stateowner_id { __u32 uniquifier; }; +struct nfs4_open_delegation { + __u32 open_delegation_type; + union { + struct { + fmode_t type; + __u32 do_recall; + nfs4_stateid stateid; + unsigned long pagemod_limit; + }; + struct { + __u32 why_no_delegation; + __u32 will_notify; + }; + }; +}; + /* * Arguments to the open call. */ @@ -490,13 +506,10 @@ struct nfs_openres { struct nfs_fattr * f_attr; struct nfs_seqid * seqid; const struct nfs_server *server; - fmode_t delegation_type; - nfs4_stateid delegation; - unsigned long pagemod_limit; - __u32 do_recall; __u32 attrset[NFS4_BITMAP_SIZE]; struct nfs4_string *owner; struct nfs4_string *group_owner; + struct nfs4_open_delegation delegation; __u32 access_request; __u32 access_supported; __u32 access_result; -- cgit v1.2.3 From 7cca0e962eb50b5321317a491d9fe6578ca8419f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:20 -0400 Subject: NFSv4: Refactor nfs4_opendata_check_deleg() Modify it to no longer depend directly on the struct opendata. This will enable sharing with WANT_DELEGATION. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 66 +++++++++++++++++++++++++------------------------------ 1 file changed, 30 insertions(+), 36 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7a74dc1bcfbd..7f294085e887 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1954,51 +1954,39 @@ out_return_state: } static void -nfs4_opendata_check_deleg(struct nfs4_opendata *data, struct nfs4_state *state) +nfs4_process_delegation(struct inode *inode, const struct cred *cred, + enum open_claim_type4 claim, + const struct nfs4_open_delegation *delegation) { - struct nfs_client *clp = NFS_SERVER(state->inode)->nfs_client; - struct nfs_delegation *delegation; - int delegation_flags = 0; - - switch (data->o_res.delegation.open_delegation_type) { + switch (delegation->open_delegation_type) { case NFS4_OPEN_DELEGATE_READ: case NFS4_OPEN_DELEGATE_WRITE: break; default: return; - }; - rcu_read_lock(); - delegation = rcu_dereference(NFS_I(state->inode)->delegation); - if (delegation) - delegation_flags = delegation->flags; - rcu_read_unlock(); - switch (data->o_arg.claim) { - default: - break; + } + switch (claim) { case NFS4_OPEN_CLAIM_DELEGATE_CUR: case NFS4_OPEN_CLAIM_DELEG_CUR_FH: pr_err_ratelimited("NFS: Broken NFSv4 server %s is " "returning a delegation for " "OPEN(CLAIM_DELEGATE_CUR)\n", - clp->cl_hostname); - return; + NFS_SERVER(inode)->nfs_client->cl_hostname); + break; + case NFS4_OPEN_CLAIM_PREVIOUS: + nfs_inode_reclaim_delegation(inode, cred, + delegation->type, + &delegation->stateid, + delegation->pagemod_limit); + break; + default: + nfs_inode_set_delegation(inode, cred, + delegation->type, + &delegation->stateid, + delegation->pagemod_limit); } - if ((delegation_flags & 1UL<inode, - data->owner->so_cred, - data->o_res.delegation.type, - &data->o_res.delegation.stateid, - data->o_res.delegation.pagemod_limit); - else - nfs_inode_reclaim_delegation(state->inode, - data->owner->so_cred, - data->o_res.delegation.type, - &data->o_res.delegation.stateid, - data->o_res.delegation.pagemod_limit); - - if (data->o_res.delegation.do_recall) - nfs_async_inode_return_delegation(state->inode, - &data->o_res.delegation.stateid); + if (delegation->do_recall) + nfs_async_inode_return_delegation(inode, &delegation->stateid); } /* @@ -2022,7 +2010,10 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) if (ret) return ERR_PTR(ret); - nfs4_opendata_check_deleg(data, state); + nfs4_process_delegation(state->inode, + data->owner->so_cred, + data->o_arg.claim, + &data->o_res.delegation); if (!update_open_stateid(state, &data->o_res.stateid, NULL, data->o_arg.fmode)) @@ -2089,8 +2080,11 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) if (IS_ERR(state)) goto out; - if (data->o_res.delegation.type != 0) - nfs4_opendata_check_deleg(data, state); + nfs4_process_delegation(state->inode, + data->owner->so_cred, + data->o_arg.claim, + &data->o_res.delegation); + if (!update_open_stateid(state, &data->o_res.stateid, NULL, data->o_arg.fmode)) { nfs4_put_open_state(state); -- cgit v1.2.3 From 6a68aed602d7b770552c7f890d0c30c53725c7b8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:21 -0400 Subject: NFSv4: Add new attribute delegation definitions Add the attribute delegation XDR definitions from the spec. Signed-off-by: Tom Haynes Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 2 +- include/linux/nfs4.h | 9 +++++++++ include/uapi/linux/nfs4.h | 2 ++ 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7f294085e887..90df37f3866a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3852,7 +3852,7 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) #define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL) #define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL) -#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_XATTR_SUPPORT - 1UL) +#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_TIME_DELEG_MODIFY - 1UL) static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 0d896ce296ce..c074e0ac390f 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -367,6 +367,8 @@ enum open_delegation_type4 { NFS4_OPEN_DELEGATE_READ = 1, NFS4_OPEN_DELEGATE_WRITE = 2, NFS4_OPEN_DELEGATE_NONE_EXT = 3, /* 4.1 */ + NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG = 4, + NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG = 5, }; enum why_no_delegation4 { /* new to v4.1 */ @@ -507,6 +509,11 @@ enum { FATTR4_XATTR_SUPPORT = 82, }; +enum { + FATTR4_TIME_DELEG_ACCESS = 84, + FATTR4_TIME_DELEG_MODIFY = 85, +}; + /* * The following internal definitions enable processing the above * attribute bits within 32-bit word boundaries. @@ -586,6 +593,8 @@ enum { #define FATTR4_WORD2_SECURITY_LABEL BIT(FATTR4_SEC_LABEL - 64) #define FATTR4_WORD2_MODE_UMASK BIT(FATTR4_MODE_UMASK - 64) #define FATTR4_WORD2_XATTR_SUPPORT BIT(FATTR4_XATTR_SUPPORT - 64) +#define FATTR4_WORD2_TIME_DELEG_ACCESS BIT(FATTR4_TIME_DELEG_ACCESS - 64) +#define FATTR4_WORD2_TIME_DELEG_MODIFY BIT(FATTR4_TIME_DELEG_MODIFY - 64) /* MDS threshold bitmap bits */ #define THRESHOLD_RD (1UL << 0) diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index 1d2043708bf1..afd7e32906c3 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -69,6 +69,8 @@ #define NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL 0x10000 #define NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED 0x20000 +#define NFS4_SHARE_WANT_DELEG_TIMESTAMPS 0x100000 + #define NFS4_CDFC4_FORE 0x1 #define NFS4_CDFC4_BACK 0x2 #define NFS4_CDFC4_BOTH 0x3 -- cgit v1.2.3 From 90f9ae74422d7e7447cb0ea21e1227142b58c52a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:22 -0400 Subject: NFSv4: Plumb in XDR support for the new delegation-only setattr op We want to send the updated atime and mtime as part of the delegreturn compound. Add a special structure to hold those variables. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 25 ++++++++++++++++++++++++ fs/nfs/nfs4xdr.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/nfs_xdr.h | 10 ++++++++++ 3 files changed, 86 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 90df37f3866a..af0758210162 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6575,6 +6575,7 @@ struct nfs4_delegreturndata { u32 roc_barrier; bool roc; } lr; + struct nfs4_delegattr sattr; struct nfs_fattr fattr; int rpc_status; struct inode *inode; @@ -6599,6 +6600,30 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) &data->res.lr_ret) == -EAGAIN) goto out_restart; + if (data->args.sattr_args && task->tk_status != 0) { + switch(data->res.sattr_ret) { + case 0: + data->args.sattr_args = NULL; + data->res.sattr_res = false; + break; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_EXPIRED: + case -NFS4ERR_BAD_STATEID: + /* Let the main handler below do stateid recovery */ + break; + case -NFS4ERR_OLD_STATEID: + if (nfs4_refresh_delegation_stateid(&data->stateid, + data->inode)) + goto out_restart; + fallthrough; + default: + data->args.sattr_args = NULL; + data->res.sattr_res = false; + goto out_restart; + } + } + switch (task->tk_status) { case 0: renew_lease(data->res.server, data->timestamp); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 119061da5298..4c22b865b9c9 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -224,6 +224,11 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, encode_attrs_maxsz) #define decode_setattr_maxsz (op_decode_hdr_maxsz + \ nfs4_fattr_bitmap_maxsz) +#define encode_delegattr_maxsz (op_encode_hdr_maxsz + \ + encode_stateid_maxsz + \ + nfs4_fattr_bitmap_maxsz + \ + 2*nfstime4_maxsz) +#define decode_delegattr_maxsz (decode_setattr_maxsz) #define encode_read_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + 3) #define decode_read_maxsz (op_decode_hdr_maxsz + 2 + pagepad_maxsz) @@ -758,12 +763,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_layoutreturn_maxsz + \ + encode_delegattr_maxsz + \ encode_delegreturn_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_layoutreturn_maxsz + \ + decode_delegattr_maxsz + \ decode_delegreturn_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \ @@ -1735,6 +1742,33 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs server->attr_bitmask); } +static void encode_delegattr(struct xdr_stream *xdr, + const nfs4_stateid *stateid, + const struct nfs4_delegattr *attr, + struct compound_hdr *hdr) +{ + uint32_t bitmap[3] = { 0 }; + uint32_t len = 0; + __be32 *p; + + encode_op_hdr(xdr, OP_SETATTR, encode_delegattr_maxsz, hdr); + encode_nfs4_stateid(xdr, stateid); + if (attr->atime_set) { + bitmap[2] |= FATTR4_WORD2_TIME_DELEG_ACCESS; + len += (nfstime4_maxsz << 2); + } + if (attr->mtime_set) { + bitmap[2] |= FATTR4_WORD2_TIME_DELEG_MODIFY; + len += (nfstime4_maxsz << 2); + } + xdr_encode_bitmap4(xdr, bitmap, ARRAY_SIZE(bitmap)); + xdr_stream_encode_opaque_inline(xdr, (void **)&p, len); + if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_ACCESS) + p = xdr_encode_nfstime4(p, &attr->atime); + if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_MODIFY) + p = xdr_encode_nfstime4(p, &attr->mtime); +} + static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr) { __be32 *p; @@ -2812,6 +2846,8 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, encode_putfh(xdr, args->fhandle, &hdr); if (args->lr_args) encode_layoutreturn(xdr, args->lr_args, &hdr); + if (args->sattr_args) + encode_delegattr(xdr, args->stateid, args->sattr_args, &hdr); if (args->bitmask) encode_getfattr(xdr, args->bitmask, &hdr); encode_delegreturn(xdr, args->stateid, &hdr); @@ -5163,9 +5199,11 @@ static int decode_rw_delegation(struct xdr_stream *xdr, switch (res->open_delegation_type) { case NFS4_OPEN_DELEGATE_READ: + case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: res->type = FMODE_READ; break; case NFS4_OPEN_DELEGATE_WRITE: + case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: res->type = FMODE_WRITE|FMODE_READ; if (decode_space_limit(xdr, &res->pagemod_limit) < 0) return -EIO; @@ -5207,6 +5245,8 @@ static int decode_delegation(struct xdr_stream *xdr, return 0; case NFS4_OPEN_DELEGATE_READ: case NFS4_OPEN_DELEGATE_WRITE: + case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: + case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: return decode_rw_delegation(xdr, res); case NFS4_OPEN_DELEGATE_NONE_EXT: return decode_no_delegation(xdr, res); @@ -5480,6 +5520,11 @@ static int decode_setattr(struct xdr_stream *xdr) return -EIO; } +static int decode_delegattr(struct xdr_stream *xdr) +{ + return decode_setattr(xdr); +} + static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid_res *res) { __be32 *p; @@ -7052,6 +7097,12 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, if (status) goto out; } + if (res->sattr_res) { + status = decode_delegattr(xdr); + res->sattr_ret = status; + if (status) + goto out; + } if (res->fattr) { status = decode_getfattr(xdr, res->fattr, res->server); if (status != 0) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 682559e19d9d..f40be64ce942 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -622,6 +622,13 @@ struct nfs_release_lockowner_res { struct nfs4_sequence_res seq_res; }; +struct nfs4_delegattr { + struct timespec64 atime; + struct timespec64 mtime; + bool atime_set; + bool mtime_set; +}; + struct nfs4_delegreturnargs { struct nfs4_sequence_args seq_args; const struct nfs_fh *fhandle; @@ -629,6 +636,7 @@ struct nfs4_delegreturnargs { const u32 *bitmask; u32 bitmask_store[NFS_BITMASK_SZ]; struct nfs4_layoutreturn_args *lr_args; + struct nfs4_delegattr *sattr_args; }; struct nfs4_delegreturnres { @@ -637,6 +645,8 @@ struct nfs4_delegreturnres { struct nfs_server *server; struct nfs4_layoutreturn_res *lr_res; int lr_ret; + bool sattr_res; + int sattr_ret; }; /* -- cgit v1.2.3 From 43df7110f4a90dbdb4be1bc71ca71e4f6a03a11f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:23 -0400 Subject: NFSv4: Add CB_GETATTR support for delegated attributes When the client holds an attribute delegation, the server may retrieve all the timestamps through a CB_GETATTR callback. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/callback.h | 5 +++-- fs/nfs/callback_proc.c | 14 +++++++++----- fs/nfs/callback_xdr.c | 39 +++++++++++++++++++++++++++++++++++++-- 3 files changed, 49 insertions(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 650758ee0d5f..154a6ed1299f 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -46,14 +46,15 @@ struct cb_compound_hdr_res { struct cb_getattrargs { struct nfs_fh fh; - uint32_t bitmap[2]; + uint32_t bitmap[3]; }; struct cb_getattrres { __be32 status; - uint32_t bitmap[2]; + uint32_t bitmap[3]; uint64_t size; uint64_t change_attr; + struct timespec64 atime; struct timespec64 ctime; struct timespec64 mtime; }; diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 76cea34477ae..199c52788640 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -37,7 +37,7 @@ __be32 nfs4_callback_getattr(void *argp, void *resp, if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */ goto out; - res->bitmap[0] = res->bitmap[1] = 0; + memset(res->bitmap, 0, sizeof(res->bitmap)); res->status = htonl(NFS4ERR_BADHANDLE); dprintk_rcu("NFS: GETATTR callback request from %s\n", @@ -59,12 +59,16 @@ __be32 nfs4_callback_getattr(void *argp, void *resp, res->change_attr = delegation->change_attr; if (nfs_have_writebacks(inode)) res->change_attr++; + res->atime = inode_get_atime(inode); res->ctime = inode_get_ctime(inode); res->mtime = inode_get_mtime(inode); - res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) & - args->bitmap[0]; - res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) & - args->bitmap[1]; + res->bitmap[0] = (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE) & + args->bitmap[0]; + res->bitmap[1] = (FATTR4_WORD1_TIME_ACCESS | + FATTR4_WORD1_TIME_METADATA | + FATTR4_WORD1_TIME_MODIFY) & args->bitmap[1]; + res->bitmap[2] = (FATTR4_WORD2_TIME_DELEG_ACCESS | + FATTR4_WORD2_TIME_DELEG_MODIFY) & args->bitmap[2]; res->status = 0; out_iput: rcu_read_unlock(); diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 9369488f2ed4..29c49a7e5fe1 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -25,8 +25,9 @@ #define CB_OP_GETATTR_BITMAP_MAXSZ (4 * 4) // bitmap length, 3 bitmaps #define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ CB_OP_GETATTR_BITMAP_MAXSZ + \ - /* change, size, ctime, mtime */\ - (2 + 2 + 3 + 3) * 4) + /* change, size, atime, ctime, + * mtime, deleg_atime, deleg_mtime */\ + (2 + 2 + 3 + 3 + 3 + 3 + 3) * 4) #define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #if defined(CONFIG_NFS_V4_1) @@ -635,6 +636,13 @@ static __be32 encode_attr_time(struct xdr_stream *xdr, const struct timespec64 * return 0; } +static __be32 encode_attr_atime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec64 *time) +{ + if (!(bitmap[1] & FATTR4_WORD1_TIME_ACCESS)) + return 0; + return encode_attr_time(xdr,time); +} + static __be32 encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec64 *time) { if (!(bitmap[1] & FATTR4_WORD1_TIME_METADATA)) @@ -649,6 +657,24 @@ static __be32 encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, return encode_attr_time(xdr,time); } +static __be32 encode_attr_delegatime(struct xdr_stream *xdr, + const uint32_t *bitmap, + const struct timespec64 *time) +{ + if (!(bitmap[2] & FATTR4_WORD2_TIME_DELEG_ACCESS)) + return 0; + return encode_attr_time(xdr,time); +} + +static __be32 encode_attr_delegmtime(struct xdr_stream *xdr, + const uint32_t *bitmap, + const struct timespec64 *time) +{ + if (!(bitmap[2] & FATTR4_WORD2_TIME_DELEG_MODIFY)) + return 0; + return encode_attr_time(xdr,time); +} + static __be32 encode_compound_hdr_res(struct xdr_stream *xdr, struct cb_compound_hdr_res *hdr) { __be32 status; @@ -697,12 +723,21 @@ static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, if (unlikely(status != 0)) goto out; status = encode_attr_size(xdr, res->bitmap, res->size); + if (unlikely(status != 0)) + goto out; + status = encode_attr_atime(xdr, res->bitmap, &res->atime); if (unlikely(status != 0)) goto out; status = encode_attr_ctime(xdr, res->bitmap, &res->ctime); if (unlikely(status != 0)) goto out; status = encode_attr_mtime(xdr, res->bitmap, &res->mtime); + if (unlikely(status != 0)) + goto out; + status = encode_attr_delegatime(xdr, res->bitmap, &res->atime); + if (unlikely(status != 0)) + goto out; + status = encode_attr_delegmtime(xdr, res->bitmap, &res->mtime); *savep = htonl((unsigned int)((char *)xdr->p - (char *)(savep+1))); out: return status; -- cgit v1.2.3 From 4201916f2ab13577d45876f4bc784be55e4a83da Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:24 -0400 Subject: NFSv4: Add a flags argument to the 'have_delegation' callback This argument will be used to allow the caller to specify whether or not they need to know that this is an attribute delegation. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 26 +++++++++++++------------- fs/nfs/delegation.h | 16 +++++++++++++--- fs/nfs/dir.c | 2 +- fs/nfs/file.c | 4 ++-- fs/nfs/inode.c | 7 +++---- fs/nfs/nfs3proc.c | 2 +- fs/nfs/nfs4proc.c | 14 +++++++------- fs/nfs/proc.c | 2 +- fs/nfs/write.c | 2 +- include/linux/nfs_xdr.h | 2 +- 10 files changed, 43 insertions(+), 34 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 6bace5fece04..6fdffd25cb2b 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -82,11 +82,10 @@ static void nfs_mark_return_delegation(struct nfs_server *server, set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); } -static bool -nfs4_is_valid_delegation(const struct nfs_delegation *delegation, - fmode_t flags) +static bool nfs4_is_valid_delegation(const struct nfs_delegation *delegation, + fmode_t type) { - if (delegation != NULL && (delegation->type & flags) == flags && + if (delegation != NULL && (delegation->type & type) == type && !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) && !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) return true; @@ -103,16 +102,16 @@ struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode) return NULL; } -static int -nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark) +static int nfs4_do_check_delegation(struct inode *inode, fmode_t type, + int flags, bool mark) { struct nfs_delegation *delegation; int ret = 0; - flags &= FMODE_READ|FMODE_WRITE; + type &= FMODE_READ|FMODE_WRITE; rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); - if (nfs4_is_valid_delegation(delegation, flags)) { + if (nfs4_is_valid_delegation(delegation, type)) { if (mark) nfs_mark_delegation_referenced(delegation); ret = 1; @@ -124,22 +123,23 @@ nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark) * nfs4_have_delegation - check if inode has a delegation, mark it * NFS_DELEGATION_REFERENCED if there is one. * @inode: inode to check - * @flags: delegation types to check for + * @type: delegation types to check for + * @flags: various modifiers * * Returns one if inode has the indicated delegation, otherwise zero. */ -int nfs4_have_delegation(struct inode *inode, fmode_t flags) +int nfs4_have_delegation(struct inode *inode, fmode_t type, int flags) { - return nfs4_do_check_delegation(inode, flags, true); + return nfs4_do_check_delegation(inode, type, flags, true); } /* * nfs4_check_delegation - check if inode has a delegation, do not mark * NFS_DELEGATION_REFERENCED if it has one. */ -int nfs4_check_delegation(struct inode *inode, fmode_t flags) +int nfs4_check_delegation(struct inode *inode, fmode_t type) { - return nfs4_do_check_delegation(inode, flags, false); + return nfs4_do_check_delegation(inode, type, 0, false); } static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid) diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index a6f495d012cf..257b3d726043 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -75,8 +75,8 @@ bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode); struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); -int nfs4_have_delegation(struct inode *inode, fmode_t flags); -int nfs4_check_delegation(struct inode *inode, fmode_t flags); +int nfs4_have_delegation(struct inode *inode, fmode_t type, int flags); +int nfs4_check_delegation(struct inode *inode, fmode_t type); bool nfs4_delegation_flush_on_close(const struct inode *inode); void nfs_inode_find_delegation_state_and_recover(struct inode *inode, const nfs4_stateid *stateid); @@ -84,9 +84,19 @@ int nfs4_inode_make_writeable(struct inode *inode); #endif +static inline int nfs_have_read_or_write_delegation(struct inode *inode) +{ + return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0); +} + +static inline int nfs_have_write_delegation(struct inode *inode) +{ + return NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE, 0); +} + static inline int nfs_have_delegated_attributes(struct inode *inode) { - return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ); + return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0); } #endif diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 07a7be27182e..4cb97ef41350 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1437,7 +1437,7 @@ static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf) if (!dir || !nfs_verify_change_attribute(dir, verf)) return; - if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0)) nfs_set_verifier_delegated(&verf); dentry->d_time = verf; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 7f1295475a90..834e612262e6 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -732,7 +732,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) } fl->c.flc_type = saved_type; - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (nfs_have_read_or_write_delegation(inode)) goto out_noconflict; if (is_local) @@ -815,7 +815,7 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) * This makes locking act as a cache coherency point. */ nfs_sync_mapping(filp->f_mapping); - if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) { + if (!nfs_have_read_or_write_delegation(inode)) { nfs_zap_caches(inode); if (mapping_mapped(filp->f_mapping)) nfs_revalidate_mapping(inode, filp->f_mapping); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d81c5bcc64e3..f1bfe453aa84 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -190,9 +190,8 @@ static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi) void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) { struct nfs_inode *nfsi = NFS_I(inode); - bool have_delegation = NFS_PROTO(inode)->have_delegation(inode, FMODE_READ); - if (have_delegation) { + if (nfs_have_delegated_attributes(inode)) { if (!(flags & NFS_INO_REVAL_FORCED)) flags &= ~(NFS_INO_INVALID_MODE | NFS_INO_INVALID_OTHER | @@ -1013,7 +1012,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) if (!is_sync) return; inode = d_inode(ctx->dentry); - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (nfs_have_read_or_write_delegation(inode)) return; nfsi = NFS_I(inode); if (inode->i_mapping->nrpages == 0) @@ -1483,7 +1482,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat unsigned long invalid = 0; struct timespec64 ts; - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (nfs_have_delegated_attributes(inode)) return 0; if (!(fattr->valid & NFS_ATTR_FATTR_FILEID)) { diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 74bda639a7cf..cab6c73d25d6 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -979,7 +979,7 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) return status; } -static int nfs3_have_delegation(struct inode *inode, fmode_t flags) +static int nfs3_have_delegation(struct inode *inode, fmode_t type, int flags) { return 0; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index af0758210162..4455ee510c2f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -293,7 +293,7 @@ static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src, unsigned long cache_validity; memcpy(dst, src, NFS4_BITMASK_SZ*sizeof(*dst)); - if (!inode || !nfs4_have_delegation(inode, FMODE_READ)) + if (!inode || !nfs_have_read_or_write_delegation(inode)) return; cache_validity = READ_ONCE(NFS_I(inode)->cache_validity) | flags; @@ -1264,7 +1264,7 @@ nfs4_update_changeattr_locked(struct inode *inode, if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); - if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (!nfs_have_delegated_attributes(inode)) cache_validity |= NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_INVALID_SIZE | NFS_INO_INVALID_OTHER | @@ -3700,7 +3700,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) { /* Close-to-open cache consistency revalidation */ - if (!nfs4_have_delegation(inode, FMODE_READ)) { + if (!nfs4_have_delegation(inode, FMODE_READ, 0)) { nfs4_bitmask_set(calldata->arg.bitmask_store, server->cache_consistency_bitmask, inode, 0); @@ -4638,7 +4638,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry }; int status = 0; - if (!nfs4_have_delegation(inode, FMODE_READ)) { + if (!nfs4_have_delegation(inode, FMODE_READ, 0)) { res.fattr = nfs_alloc_fattr(); if (res.fattr == NULL) return -ENOMEM; @@ -5607,7 +5607,7 @@ bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr) /* Otherwise, request attributes if and only if we don't hold * a delegation */ - return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; + return nfs4_have_delegation(hdr->inode, FMODE_READ, 0) == 0; } void nfs4_bitmask_set(__u32 bitmask[], const __u32 src[], @@ -7654,10 +7654,10 @@ static int nfs4_add_lease(struct file *file, int arg, struct file_lease **lease, int ret; /* No delegation, no lease */ - if (!nfs4_have_delegation(inode, type)) + if (!nfs4_have_delegation(inode, type, 0)) return -EAGAIN; ret = generic_setlease(file, arg, lease, priv); - if (ret || nfs4_have_delegation(inode, type)) + if (ret || nfs4_have_delegation(inode, type, 0)) return ret; /* We raced with a delegation return */ nfs4_delete_lease(file, priv); diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index d105e5b2659d..995cc42b0fa0 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -687,7 +687,7 @@ out_einval: return -EINVAL; } -static int nfs_have_delegation(struct inode *inode, fmode_t flags) +static int nfs_have_delegation(struct inode *inode, fmode_t type, int flags) { return 0; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2329cbb0e446..be19ac3110d1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1320,7 +1320,7 @@ static int nfs_can_extend_write(struct file *file, struct folio *folio, return 0; if (!nfs_folio_write_uptodate(folio, pagelen)) return 0; - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) + if (nfs_have_write_delegation(inode)) return 1; if (!flctx || (list_empty_careful(&flctx->flc_flock) && list_empty_careful(&flctx->flc_posix))) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index f40be64ce942..51611583af51 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1830,7 +1830,7 @@ struct nfs_rpc_ops { int open_flags, struct iattr *iattr, int *); - int (*have_delegation)(struct inode *, fmode_t); + int (*have_delegation)(struct inode *, fmode_t, int); struct nfs_client *(*alloc_client) (const struct nfs_client_initdata *); struct nfs_client *(*init_client) (struct nfs_client *, const struct nfs_client_initdata *); -- cgit v1.2.3 From e12912d94137ab36ee704a91f465ff15c8b423da Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:25 -0400 Subject: NFSv4: Add support for delegated atime and mtime attributes Ensure that we update the mtime and atime correctly when we read or write data to the file and when we truncate. Let the server manage ctime on other attribute updates. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 32 ++++++++++++++++++++++++++----- fs/nfs/delegation.h | 25 +++++++++++++++++++++++-- fs/nfs/inode.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++---- fs/nfs/nfs4proc.c | 21 ++++++++++++--------- fs/nfs/read.c | 3 +++ fs/nfs/write.c | 9 +++++++++ 6 files changed, 124 insertions(+), 20 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 6fdffd25cb2b..d9117630e062 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -115,6 +115,9 @@ static int nfs4_do_check_delegation(struct inode *inode, fmode_t type, if (mark) nfs_mark_delegation_referenced(delegation); ret = 1; + if ((flags & NFS_DELEGATION_FLAG_TIME) && + !test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags)) + ret = 0; } rcu_read_unlock(); return ret; @@ -221,11 +224,12 @@ again: * @type: delegation type * @stateid: delegation stateid * @pagemod_limit: write delegation "space_limit" + * @deleg_type: raw delegation type * */ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, fmode_t type, const nfs4_stateid *stateid, - unsigned long pagemod_limit) + unsigned long pagemod_limit, u32 deleg_type) { struct nfs_delegation *delegation; const struct cred *oldcred = NULL; @@ -239,6 +243,14 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, delegation->pagemod_limit = pagemod_limit; oldcred = delegation->cred; delegation->cred = get_cred(cred); + switch (deleg_type) { + case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: + case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: + set_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags); + break; + default: + clear_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags); + } clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); if (test_and_clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) @@ -250,7 +262,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, } else { rcu_read_unlock(); nfs_inode_set_delegation(inode, cred, type, stateid, - pagemod_limit); + pagemod_limit, deleg_type); } } @@ -418,13 +430,13 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation, * @type: delegation type * @stateid: delegation stateid * @pagemod_limit: write delegation "space_limit" + * @deleg_type: raw delegation type * * Returns zero on success, or a negative errno value. */ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, - fmode_t type, - const nfs4_stateid *stateid, - unsigned long pagemod_limit) + fmode_t type, const nfs4_stateid *stateid, + unsigned long pagemod_limit, u32 deleg_type) { struct nfs_server *server = NFS_SERVER(inode); struct nfs_client *clp = server->nfs_client; @@ -444,6 +456,11 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, delegation->cred = get_cred(cred); delegation->inode = inode; delegation->flags = 1<flags |= BIT(NFS_DELEGATION_DELEGTIME); + } delegation->test_gen = 0; spin_lock_init(&delegation->lock); @@ -508,6 +525,11 @@ add_new: atomic_long_inc(&nfs_active_delegations); trace_nfs4_set_delegation(inode, type); + + /* If we hold writebacks and have delegated mtime then update */ + if (deleg_type == NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG && + nfs_have_writebacks(inode)) + nfs_update_delegated_mtime(inode); out: spin_unlock(&clp->cl_lock); if (delegation != NULL) diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 257b3d726043..001551e2ab60 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -38,12 +38,15 @@ enum { NFS_DELEGATION_TEST_EXPIRED, NFS_DELEGATION_INODE_FREEING, NFS_DELEGATION_RETURN_DELAYED, + NFS_DELEGATION_DELEGTIME, }; int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, - fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit); + fmode_t type, const nfs4_stateid *stateid, + unsigned long pagemod_limit, u32 deleg_type); void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, - fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit); + fmode_t type, const nfs4_stateid *stateid, + unsigned long pagemod_limit, u32 deleg_type); int nfs4_inode_return_delegation(struct inode *inode); void nfs4_inode_return_delegation_on_close(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); @@ -84,6 +87,12 @@ int nfs4_inode_make_writeable(struct inode *inode); #endif +#define NFS_DELEGATION_FLAG_TIME BIT(1) + +void nfs_update_delegated_atime(struct inode *inode); +void nfs_update_delegated_mtime(struct inode *inode); +void nfs_update_delegated_mtime_locked(struct inode *inode); + static inline int nfs_have_read_or_write_delegation(struct inode *inode) { return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0); @@ -99,4 +108,16 @@ static inline int nfs_have_delegated_attributes(struct inode *inode) return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0); } +static inline int nfs_have_delegated_atime(struct inode *inode) +{ + return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, + NFS_DELEGATION_FLAG_TIME); +} + +static inline int nfs_have_delegated_mtime(struct inode *inode) +{ + return NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE, + NFS_DELEGATION_FLAG_TIME); +} + #endif diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f1bfe453aa84..6185e6e15820 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -275,6 +275,8 @@ EXPORT_SYMBOL_GPL(nfs_zap_acl_cache); void nfs_invalidate_atime(struct inode *inode) { + if (nfs_have_delegated_atime(inode)) + return; spin_lock(&inode->i_lock); nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); spin_unlock(&inode->i_lock); @@ -604,6 +606,33 @@ out_no_inode: } EXPORT_SYMBOL_GPL(nfs_fhget); +void nfs_update_delegated_atime(struct inode *inode) +{ + spin_lock(&inode->i_lock); + if (nfs_have_delegated_atime(inode)) { + inode_update_timestamps(inode, S_ATIME); + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ATIME; + } + spin_unlock(&inode->i_lock); +} + +void nfs_update_delegated_mtime_locked(struct inode *inode) +{ + if (nfs_have_delegated_mtime(inode)) { + inode_update_timestamps(inode, S_CTIME | S_MTIME); + NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_CTIME | + NFS_INO_INVALID_MTIME); + } +} + +void nfs_update_delegated_mtime(struct inode *inode) +{ + spin_lock(&inode->i_lock); + nfs_update_delegated_mtime_locked(inode); + spin_unlock(&inode->i_lock); +} +EXPORT_SYMBOL_GPL(nfs_update_delegated_mtime); + #define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN) int @@ -631,6 +660,17 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, attr->ia_valid &= ~ATTR_SIZE; } + if (nfs_have_delegated_mtime(inode)) { + if (attr->ia_valid & ATTR_MTIME) { + nfs_update_delegated_mtime(inode); + attr->ia_valid &= ~ATTR_MTIME; + } + if (attr->ia_valid & ATTR_ATIME) { + nfs_update_delegated_atime(inode); + attr->ia_valid &= ~ATTR_ATIME; + } + } + /* Optimization: if the end result is no change, don't RPC */ if (((attr->ia_valid & NFS_VALID_ATTRS) & ~(ATTR_FILE|ATTR_OPEN)) == 0) return 0; @@ -686,6 +726,7 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset) spin_unlock(&inode->i_lock); truncate_pagecache(inode, offset); + nfs_update_delegated_mtime_locked(inode); spin_lock(&inode->i_lock); out: return err; @@ -709,8 +750,9 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, spin_lock(&inode->i_lock); NFS_I(inode)->attr_gencount = fattr->gencount; if ((attr->ia_valid & ATTR_SIZE) != 0) { - nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME | - NFS_INO_INVALID_BLOCKS); + if (!nfs_have_delegated_mtime(inode)) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS); nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); nfs_vmtruncate(inode, attr->ia_size); } @@ -856,8 +898,12 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path, /* Flush out writes to the server in order to update c/mtime/version. */ if ((request_mask & (STATX_CTIME | STATX_MTIME | STATX_CHANGE_COOKIE)) && - S_ISREG(inode->i_mode)) - filemap_write_and_wait(inode->i_mapping); + S_ISREG(inode->i_mode)) { + if (nfs_have_delegated_mtime(inode)) + filemap_fdatawrite(inode->i_mapping); + else + filemap_write_and_wait(inode->i_mapping); + } /* * We may force a getattr if the user cares about atime. diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4455ee510c2f..f4215dcf3614 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1245,7 +1245,8 @@ nfs4_update_changeattr_locked(struct inode *inode, struct nfs_inode *nfsi = NFS_I(inode); u64 change_attr = inode_peek_iversion_raw(inode); - cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME; + if (!nfs_have_delegated_mtime(inode)) + cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME; if (S_ISDIR(inode->i_mode)) cache_validity |= NFS_INO_INVALID_DATA; @@ -1961,6 +1962,8 @@ nfs4_process_delegation(struct inode *inode, const struct cred *cred, switch (delegation->open_delegation_type) { case NFS4_OPEN_DELEGATE_READ: case NFS4_OPEN_DELEGATE_WRITE: + case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: + case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: break; default: return; @@ -1974,16 +1977,16 @@ nfs4_process_delegation(struct inode *inode, const struct cred *cred, NFS_SERVER(inode)->nfs_client->cl_hostname); break; case NFS4_OPEN_CLAIM_PREVIOUS: - nfs_inode_reclaim_delegation(inode, cred, - delegation->type, - &delegation->stateid, - delegation->pagemod_limit); + nfs_inode_reclaim_delegation(inode, cred, delegation->type, + &delegation->stateid, + delegation->pagemod_limit, + delegation->open_delegation_type); break; default: - nfs_inode_set_delegation(inode, cred, - delegation->type, - &delegation->stateid, - delegation->pagemod_limit); + nfs_inode_set_delegation(inode, cred, delegation->type, + &delegation->stateid, + delegation->pagemod_limit, + delegation->open_delegation_type); } if (delegation->do_recall) nfs_async_inode_return_delegation(inode, &delegation->stateid); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index a142287d86f6..1b0e06c11983 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -28,6 +28,7 @@ #include "fscache.h" #include "pnfs.h" #include "nfstrace.h" +#include "delegation.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -372,6 +373,7 @@ int nfs_read_folio(struct file *file, struct folio *folio) goto out_put; nfs_pageio_complete_read(&pgio); + nfs_update_delegated_atime(inode); ret = pgio.pg_error < 0 ? pgio.pg_error : 0; if (!ret) { ret = folio_wait_locked_killable(folio); @@ -428,6 +430,7 @@ void nfs_readahead(struct readahead_control *ractl) } nfs_pageio_complete_read(&pgio); + nfs_update_delegated_atime(inode); put_nfs_open_context(ctx); out: diff --git a/fs/nfs/write.c b/fs/nfs/write.c index be19ac3110d1..f5414c96381a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -289,6 +289,8 @@ static void nfs_grow_file(struct folio *folio, unsigned int offset, NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE; nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); out: + /* Atomically update timestamps if they are delegated to us. */ + nfs_update_delegated_mtime_locked(inode); spin_unlock(&inode->i_lock); nfs_fscache_invalidate(inode, 0); } @@ -1514,6 +1516,13 @@ void nfs_writeback_update_inode(struct nfs_pgio_header *hdr) struct nfs_fattr *fattr = &hdr->fattr; struct inode *inode = hdr->inode; + if (nfs_have_delegated_mtime(inode)) { + spin_lock(&inode->i_lock); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS); + spin_unlock(&inode->i_lock); + return; + } + spin_lock(&inode->i_lock); nfs_writeback_check_extend(hdr, fattr); nfs_post_op_update_inode_force_wcc_locked(inode, fattr); -- cgit v1.2.3 From 86e1c54d152e2799671e149d6e4d1c1a4a2be857 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:26 -0400 Subject: NFSv4: Add recovery of attribute delegations After a reboot of the NFSv4.2 server, the recovery code needs to specify whether the delegation to be recovered is an attribute delegation or not. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 18 +++++++++++++++--- fs/nfs/nfs4xdr.c | 18 ++++++++---------- include/linux/nfs_xdr.h | 2 +- 3 files changed, 24 insertions(+), 14 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f4215dcf3614..34182a3c38a7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2225,7 +2225,7 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state { struct nfs_delegation *delegation; struct nfs4_opendata *opendata; - fmode_t delegation_type = 0; + u32 delegation_type = NFS4_OPEN_DELEGATE_NONE; int status; opendata = nfs4_open_recoverdata_alloc(ctx, state, @@ -2234,8 +2234,20 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state return PTR_ERR(opendata); rcu_read_lock(); delegation = rcu_dereference(NFS_I(state->inode)->delegation); - if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0) - delegation_type = delegation->type; + if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0) { + switch(delegation->type) { + case FMODE_READ: + delegation_type = NFS4_OPEN_DELEGATE_READ; + if (test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags)) + delegation_type = NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG; + break; + case FMODE_WRITE: + case FMODE_READ|FMODE_WRITE: + delegation_type = NFS4_OPEN_DELEGATE_WRITE; + if (test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags)) + delegation_type = NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG; + } + } rcu_read_unlock(); opendata->o_arg.u.delegation_type = delegation_type; status = nfs4_open_recover(opendata, state); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4c22b865b9c9..e160a275ad4a 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1475,20 +1475,18 @@ static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *a } } -static inline void encode_delegation_type(struct xdr_stream *xdr, fmode_t delegation_type) +static inline void encode_delegation_type(struct xdr_stream *xdr, u32 delegation_type) { __be32 *p; p = reserve_space(xdr, 4); switch (delegation_type) { - case 0: - *p = cpu_to_be32(NFS4_OPEN_DELEGATE_NONE); - break; - case FMODE_READ: - *p = cpu_to_be32(NFS4_OPEN_DELEGATE_READ); - break; - case FMODE_WRITE|FMODE_READ: - *p = cpu_to_be32(NFS4_OPEN_DELEGATE_WRITE); + case NFS4_OPEN_DELEGATE_NONE: + case NFS4_OPEN_DELEGATE_READ: + case NFS4_OPEN_DELEGATE_WRITE: + case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: + case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: + *p = cpu_to_be32(delegation_type); break; default: BUG(); @@ -1504,7 +1502,7 @@ static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr * encode_string(xdr, name->len, name->name); } -static inline void encode_claim_previous(struct xdr_stream *xdr, fmode_t type) +static inline void encode_claim_previous(struct xdr_stream *xdr, u32 type) { __be32 *p; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 51611583af51..d8cfa956d24c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -484,7 +484,7 @@ struct nfs_openargs { nfs4_verifier verifier; /* EXCLUSIVE */ }; nfs4_stateid delegation; /* CLAIM_DELEGATE_CUR */ - fmode_t delegation_type; /* CLAIM_PREVIOUS */ + __u32 delegation_type; /* CLAIM_PREVIOUS */ } u; const struct qstr * name; const struct nfs_server *server; /* Needed for ID mapping */ -- cgit v1.2.3 From dcb3c20f741993efbd95be78aab93fac29516323 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:27 -0400 Subject: NFSv4: Add a capability for delegated attributes Cache whether or not the server may have support for delegated attributes in a capability flag. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 2 ++ include/linux/nfs_fs_sb.h | 1 + 2 files changed, 3 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 34182a3c38a7..03835c8a180f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3930,6 +3930,8 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f #endif if (res.attr_bitmask[0] & FATTR4_WORD0_FS_LOCATIONS) server->caps |= NFS_CAP_FS_LOCATIONS; + if (res.attr_bitmask[2] & FATTR4_WORD2_TIME_DELEG_MODIFY) + server->caps |= NFS_CAP_DELEGTIME; if (!(res.attr_bitmask[0] & FATTR4_WORD0_FILEID)) server->fattr_valid &= ~NFS_ATTR_FATTR_FILEID; if (!(res.attr_bitmask[1] & FATTR4_WORD1_MODE)) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 92de074e63b9..5a76a87cd924 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -278,6 +278,7 @@ struct nfs_server { #define NFS_CAP_LGOPEN (1U << 5) #define NFS_CAP_CASE_INSENSITIVE (1U << 6) #define NFS_CAP_CASE_PRESERVING (1U << 7) +#define NFS_CAP_DELEGTIME (1U << 13) #define NFS_CAP_POSIX_LOCK (1U << 14) #define NFS_CAP_UIDGID_NOMAP (1U << 15) #define NFS_CAP_STATEID_NFSV41 (1U << 16) -- cgit v1.2.3 From e3e9d44de13f91fae4565c7d6dfc7ec3eb03b108 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:28 -0400 Subject: NFSv4: Enable attribute delegations If we see that the server supports attribute delegations, then request them by setting the appropriate OPEN arguments. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 03835c8a180f..1209ce22158e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1339,8 +1339,13 @@ nfs4_map_atomic_open_share(struct nfs_server *server, if (!(server->caps & NFS_CAP_ATOMIC_OPEN_V1)) goto out; /* Want no delegation if we're using O_DIRECT */ - if (openflags & O_DIRECT) + if (openflags & O_DIRECT) { res |= NFS4_SHARE_WANT_NO_DELEG; + goto out; + } + /* res |= NFS4_SHARE_WANT_NO_PREFERENCE; */ + if (server->caps & NFS_CAP_DELEGTIME) + res |= NFS4_SHARE_WANT_DELEG_TIMESTAMPS; out: return res; } -- cgit v1.2.3 From b81aca09617cd768d07601a0ac96135d00b82bfe Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:29 -0400 Subject: NFSv4: Delegreturn must set m/atime when they are delegated If the atime or mtime attributes were delegated, then we need to propagate their new values back to the server when returning the delegation. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 9 +++++---- fs/nfs/delegation.h | 4 +++- fs/nfs/nfs4proc.c | 27 ++++++++++++++++++++++++--- 3 files changed, 32 insertions(+), 8 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index d9117630e062..d5edb3b3eeef 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -266,7 +266,9 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, } } -static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) +static int nfs_do_return_delegation(struct inode *inode, + struct nfs_delegation *delegation, + int issync) { const struct cred *cred; int res = 0; @@ -275,9 +277,8 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation * spin_lock(&delegation->lock); cred = get_cred(delegation->cred); spin_unlock(&delegation->lock); - res = nfs4_proc_delegreturn(inode, cred, - &delegation->stateid, - issync); + res = nfs4_proc_delegreturn(inode, cred, &delegation->stateid, + delegation, issync); put_cred(cred); } return res; diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 001551e2ab60..71524d34ed20 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -70,7 +70,9 @@ void nfs_test_expired_all_delegations(struct nfs_client *clp); void nfs_reap_expired_delegations(struct nfs_client *clp); /* NFSv4 delegation-related procedures */ -int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync); +int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, + const nfs4_stateid *stateid, + struct nfs_delegation *delegation, int issync); int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid); bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, const struct cred **cred); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1209ce22158e..88edeaf5b5d5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6739,7 +6739,10 @@ static const struct rpc_call_ops nfs4_delegreturn_ops = { .rpc_release = nfs4_delegreturn_release, }; -static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync) +static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, + const nfs4_stateid *stateid, + struct nfs_delegation *delegation, + int issync) { struct nfs4_delegreturndata *data; struct nfs_server *server = NFS_SERVER(inode); @@ -6791,12 +6794,27 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, } } + if (delegation && + test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags)) { + if (delegation->type & FMODE_READ) { + data->sattr.atime = inode_get_atime(inode); + data->sattr.atime_set = true; + } + if (delegation->type & FMODE_WRITE) { + data->sattr.mtime = inode_get_mtime(inode); + data->sattr.mtime_set = true; + } + data->args.sattr_args = &data->sattr; + data->res.sattr_res = true; + } + if (!data->inode) nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 1); else nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0); + task_setup_data.callback_data = data; msg.rpc_argp = &data->args; msg.rpc_resp = &data->res; @@ -6814,13 +6832,16 @@ out: return status; } -int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync) +int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, + const nfs4_stateid *stateid, + struct nfs_delegation *delegation, int issync) { struct nfs_server *server = NFS_SERVER(inode); struct nfs4_exception exception = { }; int err; do { - err = _nfs4_proc_delegreturn(inode, cred, stateid, issync); + err = _nfs4_proc_delegreturn(inode, cred, stateid, + delegation, issync); trace_nfs4_delegreturn(inode, stateid, err); switch (err) { case -NFS4ERR_STALE_STATEID: -- cgit v1.2.3 From 0a741f59c331254a2fc7eb57cfdd25e75c2ce48f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:30 -0400 Subject: NFSv4: Fix up delegated attributes in nfs_setattr nfs_setattr calls nfs_update_inode() directly, so we have to reset the m/ctime there. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/inode.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6185e6e15820..b4914a11c3c2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -606,6 +606,28 @@ out_no_inode: } EXPORT_SYMBOL_GPL(nfs_fhget); +static void +nfs_fattr_fixup_delegated(struct inode *inode, struct nfs_fattr *fattr) +{ + unsigned long cache_validity = NFS_I(inode)->cache_validity; + + if (nfs_have_delegated_mtime(inode)) { + if (!(cache_validity & NFS_INO_INVALID_CTIME)) + fattr->valid &= ~(NFS_ATTR_FATTR_PRECTIME | + NFS_ATTR_FATTR_CTIME); + + if (!(cache_validity & NFS_INO_INVALID_MTIME)) + fattr->valid &= ~(NFS_ATTR_FATTR_PREMTIME | + NFS_ATTR_FATTR_MTIME); + + if (!(cache_validity & NFS_INO_INVALID_ATIME)) + fattr->valid &= ~NFS_ATTR_FATTR_ATIME; + } else if (nfs_have_delegated_atime(inode)) { + if (!(cache_validity & NFS_INO_INVALID_ATIME)) + fattr->valid &= ~NFS_ATTR_FATTR_ATIME; + } +} + void nfs_update_delegated_atime(struct inode *inode) { spin_lock(&inode->i_lock); @@ -2164,6 +2186,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) */ nfsi->read_cache_jiffies = fattr->time_start; + /* Fix up any delegated attributes in the struct nfs_fattr */ + nfs_fattr_fixup_delegated(inode, fattr); + save_cache_validity = nfsi->cache_validity; nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ATIME -- cgit v1.2.3 From 32215c1f893a11c5b0c6e1d70907cb40b54925ab Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:31 -0400 Subject: NFSv4: Don't request atime/mtime/size if they are delegated to us If the timestamps and size are delegated to the client, then it is authoritative w.r.t. their values, so we should not be requesting those values from the server. Note that this allows us to optimise away most GETATTR calls if the only changes to the attributes are the result of read() or write(). Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 88edeaf5b5d5..cbd340cd825e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -310,6 +310,18 @@ static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src, dst[1] &= ~FATTR4_WORD1_MODE; if (!(cache_validity & NFS_INO_INVALID_OTHER)) dst[1] &= ~(FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP); + + if (nfs_have_delegated_mtime(inode)) { + if (!(cache_validity & NFS_INO_INVALID_ATIME)) + dst[1] &= ~FATTR4_WORD1_TIME_ACCESS; + if (!(cache_validity & NFS_INO_INVALID_MTIME)) + dst[1] &= ~FATTR4_WORD1_TIME_MODIFY; + if (!(cache_validity & NFS_INO_INVALID_CTIME)) + dst[1] &= ~FATTR4_WORD1_TIME_METADATA; + } else if (nfs_have_delegated_atime(inode)) { + if (!(cache_validity & NFS_INO_INVALID_ATIME)) + dst[1] &= ~FATTR4_WORD1_TIME_ACCESS; + } } static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry, @@ -3414,7 +3426,8 @@ static int nfs4_do_setattr(struct inode *inode, const struct cred *cred, .inode = inode, .stateid = &arg.stateid, }; - unsigned long adjust_flags = NFS_INO_INVALID_CHANGE; + unsigned long adjust_flags = NFS_INO_INVALID_CHANGE | + NFS_INO_INVALID_CTIME; int err; if (sattr->ia_valid & (ATTR_MODE | ATTR_KILL_SUID | ATTR_KILL_SGID)) @@ -4978,8 +4991,9 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct goto out; nfs4_inode_make_writeable(inode); - nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.fattr->label), inode, - NFS_INO_INVALID_CHANGE); + nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.fattr->label), + inode, + NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (!status) { nfs4_update_changeattr(dir, &res.cinfo, res.fattr->time_start, -- cgit v1.2.3 From 707f13b3d081ea811c40014e7b61f2c487ee9a4f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:32 -0400 Subject: NFSv4: Add support for the FATTR4_OPEN_ARGUMENTS attribute Query the server for the OPEN arguments that it supports so that we can figure out which extensions we can use. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 20 ++++++++++++++++++-- fs/nfs/nfs4xdr.c | 24 ++++++++++++++++++++++++ include/linux/nfs4.h | 2 ++ include/linux/nfs_xdr.h | 9 +++++++++ 4 files changed, 53 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cbd340cd825e..ae91492e9521 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3885,11 +3885,14 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) #define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL) #define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL) -#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_TIME_DELEG_MODIFY - 1UL) +#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_OPEN_ARGUMENTS - 1UL) static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { - u32 bitmask[3] = {}, minorversion = server->nfs_client->cl_minorversion; + u32 minorversion = server->nfs_client->cl_minorversion; + u32 bitmask[3] = { + [0] = FATTR4_WORD0_SUPPORTED_ATTRS, + }; struct nfs4_server_caps_arg args = { .fhandle = fhandle, .bitmask = bitmask, @@ -3915,6 +3918,14 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); if (status == 0) { + bitmask[0] = (FATTR4_WORD0_SUPPORTED_ATTRS | + FATTR4_WORD0_FH_EXPIRE_TYPE | + FATTR4_WORD0_LINK_SUPPORT | + FATTR4_WORD0_SYMLINK_SUPPORT | + FATTR4_WORD0_ACLSUPPORT | + FATTR4_WORD0_CASE_INSENSITIVE | + FATTR4_WORD0_CASE_PRESERVING) & + res.attr_bitmask[0]; /* Sanity check the server answers */ switch (minorversion) { case 0: @@ -3923,9 +3934,14 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f break; case 1: res.attr_bitmask[2] &= FATTR4_WORD2_NFS41_MASK; + bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT & + res.attr_bitmask[2]; break; case 2: res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK; + bitmask[2] = (FATTR4_WORD2_SUPPATTR_EXCLCREAT | + FATTR4_WORD2_OPEN_ARGUMENTS) & + res.attr_bitmask[2]; } memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); server->caps &= ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e160a275ad4a..98aab2c324c9 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4337,6 +4337,28 @@ static int decode_attr_xattrsupport(struct xdr_stream *xdr, uint32_t *bitmap, return 0; } +static int decode_attr_open_arguments(struct xdr_stream *xdr, uint32_t *bitmap, + struct nfs4_open_caps *res) +{ + memset(res, 0, sizeof(*res)); + if (unlikely(bitmap[2] & (FATTR4_WORD2_OPEN_ARGUMENTS - 1U))) + return -EIO; + if (likely(bitmap[2] & FATTR4_WORD2_OPEN_ARGUMENTS)) { + if (decode_bitmap4(xdr, res->oa_share_access, ARRAY_SIZE(res->oa_share_access)) < 0) + return -EIO; + if (decode_bitmap4(xdr, res->oa_share_deny, ARRAY_SIZE(res->oa_share_deny)) < 0) + return -EIO; + if (decode_bitmap4(xdr, res->oa_share_access_want, ARRAY_SIZE(res->oa_share_access_want)) < 0) + return -EIO; + if (decode_bitmap4(xdr, res->oa_open_claim, ARRAY_SIZE(res->oa_open_claim)) < 0) + return -EIO; + if (decode_bitmap4(xdr, res->oa_createmode, ARRAY_SIZE(res->oa_createmode)) < 0) + return -EIO; + bitmap[2] &= ~FATTR4_WORD2_OPEN_ARGUMENTS; + } + return 0; +} + static int verify_attr_len(struct xdr_stream *xdr, unsigned int savep, uint32_t attrlen) { unsigned int attrwords = XDR_QUADLEN(attrlen); @@ -4511,6 +4533,8 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re if ((status = decode_attr_exclcreat_supported(xdr, bitmap, res->exclcreat_bitmask)) != 0) goto xdr_error; + if ((status = decode_attr_open_arguments(xdr, bitmap, &res->open_caps)) != 0) + goto xdr_error; status = verify_attr_len(xdr, savep, attrlen); xdr_error: dprintk("%s: xdr returned %d!\n", __func__, -status); diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index c074e0ac390f..f9df88091c6d 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -512,6 +512,7 @@ enum { enum { FATTR4_TIME_DELEG_ACCESS = 84, FATTR4_TIME_DELEG_MODIFY = 85, + FATTR4_OPEN_ARGUMENTS = 86, }; /* @@ -595,6 +596,7 @@ enum { #define FATTR4_WORD2_XATTR_SUPPORT BIT(FATTR4_XATTR_SUPPORT - 64) #define FATTR4_WORD2_TIME_DELEG_ACCESS BIT(FATTR4_TIME_DELEG_ACCESS - 64) #define FATTR4_WORD2_TIME_DELEG_MODIFY BIT(FATTR4_TIME_DELEG_MODIFY - 64) +#define FATTR4_WORD2_OPEN_ARGUMENTS BIT(FATTR4_OPEN_ARGUMENTS - 64) /* MDS threshold bitmap bits */ #define THRESHOLD_RD (1UL << 0) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d8cfa956d24c..af510a7ec46a 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1213,6 +1213,14 @@ struct nfs4_statfs_res { struct nfs_fsstat *fsstat; }; +struct nfs4_open_caps { + u32 oa_share_access[1]; + u32 oa_share_deny[1]; + u32 oa_share_access_want[1]; + u32 oa_open_claim[1]; + u32 oa_createmode[1]; +}; + struct nfs4_server_caps_arg { struct nfs4_sequence_args seq_args; struct nfs_fh *fhandle; @@ -1229,6 +1237,7 @@ struct nfs4_server_caps_res { u32 fh_expire_type; u32 case_insensitive; u32 case_preserving; + struct nfs4_open_caps open_caps; }; #define NFS4_PATHNAME_MAXCOMPONENTS 512 -- cgit v1.2.3 From d2a00cceb93a4df2afaceb836297628d0aeec7ad Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:33 -0400 Subject: NFSv4: Detect support for OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION If the server supports the NFSv4.2 protocol extension to optimise away returning a stateid when it returns a delegation, then we cache that information in another capability flag. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 4 ++++ include/linux/nfs_fs_sb.h | 1 + include/uapi/linux/nfs4.h | 2 ++ 3 files changed, 7 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ae91492e9521..adf4fc8610f6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3990,6 +3990,10 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f sizeof(server->attr_bitmask)); server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + if (res.open_caps.oa_share_access_want[0] & + NFS4_SHARE_WANT_OPEN_XOR_DELEGATION) + server->caps |= NFS_CAP_OPEN_XOR; + memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask)); server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 5a76a87cd924..fe5b1a8bd723 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -278,6 +278,7 @@ struct nfs_server { #define NFS_CAP_LGOPEN (1U << 5) #define NFS_CAP_CASE_INSENSITIVE (1U << 6) #define NFS_CAP_CASE_PRESERVING (1U << 7) +#define NFS_CAP_OPEN_XOR (1U << 12) #define NFS_CAP_DELEGTIME (1U << 13) #define NFS_CAP_POSIX_LOCK (1U << 14) #define NFS_CAP_UIDGID_NOMAP (1U << 15) diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index afd7e32906c3..caf4db2fcbb9 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -46,6 +46,7 @@ #define NFS4_OPEN_RESULT_CONFIRM 0x0002 #define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004 #define NFS4_OPEN_RESULT_PRESERVE_UNLINKED 0x0008 +#define NFS4_OPEN_RESULT_NO_OPEN_STATEID 0x0010 #define NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK 0x0020 #define NFS4_SHARE_ACCESS_MASK 0x000F @@ -70,6 +71,7 @@ #define NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED 0x20000 #define NFS4_SHARE_WANT_DELEG_TIMESTAMPS 0x100000 +#define NFS4_SHARE_WANT_OPEN_XOR_DELEGATION 0x200000 #define NFS4_CDFC4_FORE 0x1 #define NFS4_CDFC4_BACK 0x2 -- cgit v1.2.3 From 1f0a6b3fa7a9405b74516611afa005075a21e810 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:34 -0400 Subject: NFSv4: Add support for OPEN4_RESULT_NO_OPEN_STATEID If the server returns a delegation stateid only, then don't try to set an open stateid. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index adf4fc8610f6..5b18aac0b34a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2035,8 +2035,11 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) data->o_arg.claim, &data->o_res.delegation); - if (!update_open_stateid(state, &data->o_res.stateid, - NULL, data->o_arg.fmode)) + if (!(data->o_res.rflags & NFS4_OPEN_RESULT_NO_OPEN_STATEID)) { + if (!update_open_stateid(state, &data->o_res.stateid, + NULL, data->o_arg.fmode)) + return ERR_PTR(-EAGAIN); + } else if (!update_open_stateid(state, NULL, NULL, data->o_arg.fmode)) return ERR_PTR(-EAGAIN); refcount_inc(&state->count); @@ -2105,8 +2108,13 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) data->o_arg.claim, &data->o_res.delegation); - if (!update_open_stateid(state, &data->o_res.stateid, - NULL, data->o_arg.fmode)) { + if (!(data->o_res.rflags & NFS4_OPEN_RESULT_NO_OPEN_STATEID)) { + if (!update_open_stateid(state, &data->o_res.stateid, + NULL, data->o_arg.fmode)) { + nfs4_put_open_state(state); + state = ERR_PTR(-EAGAIN); + } + } else if (!update_open_stateid(state, NULL, NULL, data->o_arg.fmode)) { nfs4_put_open_state(state); state = ERR_PTR(-EAGAIN); } -- cgit v1.2.3 From d79ed371d51c57b2af74781466f1c0e821964b48 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:35 -0400 Subject: NFSv4: Ask for a delegation or an open stateid in OPEN Turn on the optimisation to allow the client to request that the server not return the open stateid when it returns a delegation. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5b18aac0b34a..b1376571f6ef 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1358,6 +1358,8 @@ nfs4_map_atomic_open_share(struct nfs_server *server, /* res |= NFS4_SHARE_WANT_NO_PREFERENCE; */ if (server->caps & NFS_CAP_DELEGTIME) res |= NFS4_SHARE_WANT_DELEG_TIMESTAMPS; + if (server->caps & NFS_CAP_OPEN_XOR) + res |= NFS4_SHARE_WANT_OPEN_XOR_DELEGATION; out: return res; } -- cgit v1.2.3 From adb4b42d19aea91826621a8d0bac94cf2c08f8bc Mon Sep 17 00:00:00 2001 From: Lance Shelton Date: Sun, 16 Jun 2024 21:21:36 -0400 Subject: Return the delegation when deleting sillyrenamed files Add a callback to return the delegation in order to allow generic NFS code to return the delegation when appropriate. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs3proc.c | 8 ++++++++ fs/nfs/nfs4proc.c | 1 + fs/nfs/proc.c | 8 ++++++++ fs/nfs/unlink.c | 2 ++ include/linux/nfs_xdr.h | 1 + 5 files changed, 20 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index cab6c73d25d6..1566163c6d85 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -984,6 +984,13 @@ static int nfs3_have_delegation(struct inode *inode, fmode_t type, int flags) return 0; } +static int nfs3_return_delegation(struct inode *inode) +{ + if (S_ISREG(inode->i_mode)) + nfs_wb_all(inode); + return 0; +} + static const struct inode_operations nfs3_dir_inode_operations = { .create = nfs_create, .atomic_open = nfs_atomic_open_v23, @@ -1062,6 +1069,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .clear_acl_cache = forget_all_cached_acls, .close_context = nfs_close_context, .have_delegation = nfs3_have_delegation, + .return_delegation = nfs3_return_delegation, .alloc_client = nfs_alloc_client, .init_client = nfs_init_client, .free_client = nfs_free_client, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b1376571f6ef..9376b5031acf 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -10871,6 +10871,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .close_context = nfs4_close_context, .open_context = nfs4_atomic_open, .have_delegation = nfs4_have_delegation, + .return_delegation = nfs4_inode_return_delegation, .alloc_client = nfs4_alloc_client, .init_client = nfs4_init_client, .free_client = nfs4_free_client, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 995cc42b0fa0..6c09cd090c34 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -692,6 +692,13 @@ static int nfs_have_delegation(struct inode *inode, fmode_t type, int flags) return 0; } +static int nfs_return_delegation(struct inode *inode) +{ + if (S_ISREG(inode->i_mode)) + nfs_wb_all(inode); + return 0; +} + static const struct inode_operations nfs_dir_inode_operations = { .create = nfs_create, .lookup = nfs_lookup, @@ -757,6 +764,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .lock_check_bounds = nfs_lock_check_bounds, .close_context = nfs_close_context, .have_delegation = nfs_have_delegation, + .return_delegation = nfs_return_delegation, .alloc_client = nfs_alloc_client, .init_client = nfs_init_client, .free_client = nfs_free_client, diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 0110299643a2..bf77399696a7 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -232,6 +232,8 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode) dentry->d_fsdata = NULL; spin_unlock(&dentry->d_lock); + NFS_PROTO(inode)->return_delegation(inode); + if (NFS_STALE(inode) || !nfs_call_unlink(dentry, inode, data)) nfs_free_unlinkdata(data); } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index af510a7ec46a..01efacae4634 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1840,6 +1840,7 @@ struct nfs_rpc_ops { struct iattr *iattr, int *); int (*have_delegation)(struct inode *, fmode_t, int); + int (*return_delegation)(struct inode *); struct nfs_client *(*alloc_client) (const struct nfs_client_initdata *); struct nfs_client *(*init_client) (struct nfs_client *, const struct nfs_client_initdata *); -- cgit v1.2.3 From cf453bfe92fd5a2a189305130c5e076b9871ec43 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:37 -0400 Subject: NFSv4: Don't send delegation-related share access modes to CLOSE When we set the new share access modes for CLOSE in nfs4_close_prepare(). we should only set a mode of NFS4_SHARE_ACCESS_READ, NFS4_SHARE_ACCESS_WRITE or NFS4_SHARE_ACCESS_BOTH. Currently, we may also be passing in the NFSv4.1 share modes for controlling delegation requests in OPEN, which is wrong. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9376b5031acf..26758acba3a6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1333,8 +1333,7 @@ static fmode_t _nfs4_ctx_to_openmode(const struct nfs_open_context *ctx) } static u32 -nfs4_map_atomic_open_share(struct nfs_server *server, - fmode_t fmode, int openflags) +nfs4_fmode_to_share_access(fmode_t fmode) { u32 res = 0; @@ -1348,6 +1347,15 @@ nfs4_map_atomic_open_share(struct nfs_server *server, case FMODE_READ|FMODE_WRITE: res = NFS4_SHARE_ACCESS_BOTH; } + return res; +} + +static u32 +nfs4_map_atomic_open_share(struct nfs_server *server, + fmode_t fmode, int openflags) +{ + u32 res = nfs4_fmode_to_share_access(fmode); + if (!(server->caps & NFS_CAP_ATOMIC_OPEN_V1)) goto out; /* Want no delegation if we're using O_DIRECT */ @@ -3753,8 +3761,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) } calldata->arg.share_access = - nfs4_map_atomic_open_share(NFS_SERVER(inode), - calldata->arg.fmode, 0); + nfs4_fmode_to_share_access(calldata->arg.fmode); if (calldata->res.fattr == NULL) calldata->arg.bitmask = NULL; -- cgit v1.2.3 From b8ec59cbba383cced18bafb316ed426ec68fbe6b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 13 Jun 2024 01:00:45 -0400 Subject: NFSv4/pnfs: Remove redundant list check pnfs_layout_free_bulk_destroy_list() already checks for whether the list is empty or not. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b5834728f31b..bbbb692b2a47 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -923,8 +923,6 @@ restart: rcu_read_unlock(); spin_unlock(&clp->cl_lock); - if (list_empty(&layout_list)) - return 0; return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); } @@ -947,8 +945,6 @@ restart: rcu_read_unlock(); spin_unlock(&clp->cl_lock); - if (list_empty(&layout_list)) - return 0; return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); } -- cgit v1.2.3 From 924cf3c91fe29c7ebd1b9d56e10f513c1bd7d4bd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 13 Jun 2024 01:00:46 -0400 Subject: NFSv4.1: constify the stateid argument in nfs41_test_stateid() Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/nfs4_fs.h | 3 ++- fs/nfs/nfs4proc.c | 24 ++++++++++++------------ fs/nfs/nfs4xdr.c | 2 +- include/linux/nfs_xdr.h | 2 +- 4 files changed, 16 insertions(+), 15 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 7024230f0d1d..c2045a2a9d0f 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -67,7 +67,8 @@ struct nfs4_minor_version_ops { void (*free_lock_state)(struct nfs_server *, struct nfs4_lock_state *); int (*test_and_free_expired)(struct nfs_server *, - nfs4_stateid *, const struct cred *); + const nfs4_stateid *, + const struct cred *); struct nfs_seqid * (*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); void (*session_trunk)(struct rpc_clnt *clnt, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 26758acba3a6..db585a6a8f0d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -103,10 +103,10 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, const struct cred *cred, struct nfs4_slot *slot, bool is_privileged); -static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *, - const struct cred *); +static int nfs41_test_stateid(struct nfs_server *, const nfs4_stateid *, + const struct cred *); static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *, - const struct cred *, bool); + const struct cred *, bool); #endif #ifdef CONFIG_NFS_V4_SECURITY_LABEL @@ -2875,16 +2875,16 @@ static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st } static int nfs40_test_and_free_expired_stateid(struct nfs_server *server, - nfs4_stateid *stateid, - const struct cred *cred) + const nfs4_stateid *stateid, + const struct cred *cred) { return -NFS4ERR_BAD_STATEID; } #if defined(CONFIG_NFS_V4_1) static int nfs41_test_and_free_expired_stateid(struct nfs_server *server, - nfs4_stateid *stateid, - const struct cred *cred) + const nfs4_stateid *stateid, + const struct cred *cred) { int status; @@ -10386,12 +10386,12 @@ out: } static int _nfs41_test_stateid(struct nfs_server *server, - nfs4_stateid *stateid, - const struct cred *cred) + const nfs4_stateid *stateid, + const struct cred *cred) { int status; struct nfs41_test_stateid_args args = { - .stateid = stateid, + .stateid = *stateid, }; struct nfs41_test_stateid_res res; struct rpc_message msg = { @@ -10447,8 +10447,8 @@ static void nfs4_handle_delay_or_session_error(struct nfs_server *server, * failed or the state ID is not currently valid. */ static int nfs41_test_stateid(struct nfs_server *server, - nfs4_stateid *stateid, - const struct cred *cred) + const nfs4_stateid *stateid, + const struct cred *cred) { struct nfs4_exception exception = { .interruptible = true, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 98aab2c324c9..4bf7d5c09282 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2137,7 +2137,7 @@ static void encode_test_stateid(struct xdr_stream *xdr, { encode_op_hdr(xdr, OP_TEST_STATEID, decode_test_stateid_maxsz, hdr); encode_uint32(xdr, 1); - encode_nfs4_stateid(xdr, args->stateid); + encode_nfs4_stateid(xdr, &args->stateid); } static void encode_free_stateid(struct xdr_stream *xdr, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 01efacae4634..45623af3e7b8 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1438,7 +1438,7 @@ struct nfs41_secinfo_no_name_args { struct nfs41_test_stateid_args { struct nfs4_sequence_args seq_args; - nfs4_stateid *stateid; + nfs4_stateid stateid; }; struct nfs41_test_stateid_res { -- cgit v1.2.3 From 5d2db0898a569984c37c7afcfbb902fc5ff2817d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 13 Jun 2024 01:00:47 -0400 Subject: NFSv4: Clean up encode_nfs4_stateid() Ensure that we encode the actual stateid, and not any metadata. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/nfs4xdr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4bf7d5c09282..7704a4509676 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1067,9 +1067,10 @@ static void encode_nops(struct compound_hdr *hdr) *hdr->nops_p = htonl(hdr->nops); } -static void encode_nfs4_stateid(struct xdr_stream *xdr, const nfs4_stateid *stateid) +static void encode_nfs4_stateid(struct xdr_stream *xdr, + const nfs4_stateid *stateid) { - encode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE); + encode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE); } static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf) -- cgit v1.2.3 From 8adc8302109f868a7f17f29b8a336cea8deaa96e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 13 Jun 2024 01:00:48 -0400 Subject: pNFS: Add a flag argument to pnfs_destroy_layouts_byclid() Change the bool argument to a flag so that we can add different modes for doing bulk destroy of a layout. In particular, we will want the ability to schedule return of all the layouts associated with a given NFS server when it reboots. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/callback_proc.c | 5 +++-- fs/nfs/pnfs.c | 21 +++++++++------------ fs/nfs/pnfs.h | 14 +++++++++----- 3 files changed, 21 insertions(+), 19 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 199c52788640..7832fb0369a1 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -323,9 +323,10 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, int stat; if (args->cbl_recall_type == RETURN_FSID) - stat = pnfs_destroy_layouts_byfsid(clp, &args->cbl_fsid, true); + stat = pnfs_layout_destroy_byfsid(clp, &args->cbl_fsid, + PNFS_LAYOUT_BULK_RETURN); else - stat = pnfs_destroy_layouts_byclid(clp, true); + stat = pnfs_layout_destroy_byclid(clp, PNFS_LAYOUT_BULK_RETURN); if (stat != 0) return NFS4ERR_DELAY; return NFS4ERR_NOMATCHING_LAYOUT; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index bbbb692b2a47..0e188bc303ee 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -868,7 +868,7 @@ pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, static int pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, - bool is_bulk_recall) + enum pnfs_layout_destroy_mode mode) { struct pnfs_layout_hdr *lo; struct inode *inode; @@ -887,7 +887,7 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, spin_lock(&inode->i_lock); list_del_init(&lo->plh_bulk_destroy); if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) { - if (is_bulk_recall) + if (mode == PNFS_LAYOUT_BULK_RETURN) set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); ret = -EAGAIN; } @@ -901,10 +901,8 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, return ret; } -int -pnfs_destroy_layouts_byfsid(struct nfs_client *clp, - struct nfs_fsid *fsid, - bool is_recall) +int pnfs_layout_destroy_byfsid(struct nfs_client *clp, struct nfs_fsid *fsid, + enum pnfs_layout_destroy_mode mode) { struct nfs_server *server; LIST_HEAD(layout_list); @@ -923,12 +921,11 @@ restart: rcu_read_unlock(); spin_unlock(&clp->cl_lock); - return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); + return pnfs_layout_free_bulk_destroy_list(&layout_list, mode); } -int -pnfs_destroy_layouts_byclid(struct nfs_client *clp, - bool is_recall) +int pnfs_layout_destroy_byclid(struct nfs_client *clp, + enum pnfs_layout_destroy_mode mode) { struct nfs_server *server; LIST_HEAD(layout_list); @@ -945,7 +942,7 @@ restart: rcu_read_unlock(); spin_unlock(&clp->cl_lock); - return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); + return pnfs_layout_free_bulk_destroy_list(&layout_list, mode); } /* @@ -958,7 +955,7 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) nfs4_deviceid_mark_client_invalid(clp); nfs4_deviceid_purge_client(clp); - pnfs_destroy_layouts_byclid(clp, false); + pnfs_layout_destroy_byclid(clp, PNFS_LAYOUT_INVALIDATE); } static void diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index fa5beeaaf5da..a6f9427782c2 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -118,6 +118,11 @@ enum layoutdriver_policy_flags { PNFS_LAYOUTGET_ON_OPEN = 1 << 3, }; +enum pnfs_layout_destroy_mode { + PNFS_LAYOUT_INVALIDATE = 0, + PNFS_LAYOUT_BULK_RETURN, +}; + struct nfs4_deviceid_node; /* Per-layout driver specific registration structure */ @@ -273,11 +278,10 @@ void pnfs_free_lseg_list(struct list_head *tmp_list); void pnfs_destroy_layout(struct nfs_inode *); void pnfs_destroy_layout_final(struct nfs_inode *); void pnfs_destroy_all_layouts(struct nfs_client *); -int pnfs_destroy_layouts_byfsid(struct nfs_client *clp, - struct nfs_fsid *fsid, - bool is_recall); -int pnfs_destroy_layouts_byclid(struct nfs_client *clp, - bool is_recall); +int pnfs_layout_destroy_byfsid(struct nfs_client *clp, struct nfs_fsid *fsid, + enum pnfs_layout_destroy_mode mode); +int pnfs_layout_destroy_byclid(struct nfs_client *clp, + enum pnfs_layout_destroy_mode mode); bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst, struct pnfs_layout_range *dst_range, struct inode *inode); -- cgit v1.2.3 From 41d0a8ead97209c0e3e1f64e6484c31a98e31154 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 13 Jun 2024 01:00:49 -0400 Subject: NFSv4/pnfs: Add support for the PNFS_LAYOUT_FILE_BULK_RETURN flag Add a flag PNFS_LAYOUT_FILE_BULK_RETURN, that will attempt to return all the layouts in a pnfs_layout_destroy_byfsid/pnfs_layout_destroy_byclid call, instead of just invalidating them. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.c | 35 +++++++++++++++++++---------------- fs/nfs/pnfs.h | 1 + 2 files changed, 20 insertions(+), 16 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0e188bc303ee..3bfc74841831 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -476,6 +476,18 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, return !list_empty(&lo->plh_segs); } +static int pnfs_mark_layout_stateid_return(struct pnfs_layout_hdr *lo, + struct list_head *lseg_list, + enum pnfs_iomode iomode, u32 seq) +{ + struct pnfs_layout_range range = { + .iomode = iomode, + .length = NFS4_MAX_UINT64, + }; + + return pnfs_mark_matching_lsegs_return(lo, lseg_list, &range, seq); +} + static int pnfs_iomode_to_fail_bit(u32 iomode) { @@ -886,7 +898,10 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, spin_lock(&inode->i_lock); list_del_init(&lo->plh_bulk_destroy); - if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) { + if (mode == PNFS_LAYOUT_FILE_BULK_RETURN) { + pnfs_mark_layout_stateid_return(lo, &lseg_list, + IOMODE_ANY, 0); + } else if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) { if (mode == PNFS_LAYOUT_BULK_RETURN) set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); ret = -EAGAIN; @@ -1265,27 +1280,15 @@ out: return status; } -static bool -pnfs_layout_segments_returnable(struct pnfs_layout_hdr *lo, - enum pnfs_iomode iomode, - u32 seq) -{ - struct pnfs_layout_range recall_range = { - .length = NFS4_MAX_UINT64, - .iomode = iomode, - }; - return pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, - &recall_range, seq) != -EBUSY; -} - /* Return true if layoutreturn is needed */ static bool pnfs_layout_need_return(struct pnfs_layout_hdr *lo) { if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) return false; - return pnfs_layout_segments_returnable(lo, lo->plh_return_iomode, - lo->plh_return_seq); + return pnfs_mark_layout_stateid_return(lo, &lo->plh_return_segs, + lo->plh_return_iomode, + lo->plh_return_seq) != EBUSY; } static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo) diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index a6f9427782c2..8fa0f152ed19 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -121,6 +121,7 @@ enum layoutdriver_policy_flags { enum pnfs_layout_destroy_mode { PNFS_LAYOUT_INVALIDATE = 0, PNFS_LAYOUT_BULK_RETURN, + PNFS_LAYOUT_FILE_BULK_RETURN, }; struct nfs4_deviceid_node; -- cgit v1.2.3 From 6e7be9e7b71e38d5965f0a52323316e4e8202b40 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 13 Jun 2024 01:00:50 -0400 Subject: NFSv4/pNFS: Add a helper to defer failed layoutreturn calls If the layoutreturn-on-close fails due to an RPC layer problem, such as a timeout, then we want to retry at a later time. Add a helper function to allow this. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3bfc74841831..a79ae47b3842 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1171,6 +1171,26 @@ static void pnfs_clear_layoutcommit(struct inode *inode, } } +static void +pnfs_layoutreturn_retry_later_locked(struct pnfs_layout_hdr *lo, + const nfs4_stateid *arg_stateid, + const struct pnfs_layout_range *range) +{ + const struct pnfs_layout_segment *lseg; + u32 seq = be32_to_cpu(arg_stateid->seqid); + + if (pnfs_layout_is_valid(lo) && + nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) { + list_for_each_entry(lseg, &lo->plh_return_segs, pls_list) { + if (pnfs_seqid_is_newer(lseg->pls_seq, seq) || + !pnfs_should_free_range(&lseg->pls_range, range)) + continue; + pnfs_set_plh_return_info(lo, range->iomode, seq); + break; + } + } +} + void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, const nfs4_stateid *arg_stateid, const struct pnfs_layout_range *range, @@ -1577,9 +1597,8 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, switch (ret) { case -NFS4ERR_NOMATCHING_LAYOUT: spin_lock(&inode->i_lock); - if (pnfs_layout_is_valid(lo) && - nfs4_stateid_match_other(&args->stateid, &lo->plh_stateid)) - pnfs_set_plh_return_info(lo, args->range.iomode, 0); + pnfs_layoutreturn_retry_later_locked(lo, &args->stateid, + &args->range); pnfs_clear_layoutreturn_waitbit(lo); spin_unlock(&inode->i_lock); break; -- cgit v1.2.3 From 50379c9f095115147f6c9666c0bbb71ee1cd4796 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 13 Jun 2024 01:00:51 -0400 Subject: NFSv4/pNFS: Handle server reboots in pnfs_poc_release() If the server reboots, then handle it by deferring the layout return. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a79ae47b3842..c8b1be1810e2 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1586,8 +1586,7 @@ int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, } void pnfs_roc_release(struct nfs4_layoutreturn_args *args, - struct nfs4_layoutreturn_res *res, - int ret) + struct nfs4_layoutreturn_res *res, int ret) { struct pnfs_layout_hdr *lo = args->layout; struct inode *inode = args->inode; @@ -1595,6 +1594,9 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, struct nfs4_xdr_opaque_data *ld_private = args->ld_private; switch (ret) { + case -NFS4ERR_BADSESSION: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: case -NFS4ERR_NOMATCHING_LAYOUT: spin_lock(&inode->i_lock); pnfs_layoutreturn_retry_later_locked(lo, &args->stateid, -- cgit v1.2.3 From bbbff6d5edd1079b9984da927d9cd4d49095fca9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 13 Jun 2024 01:00:52 -0400 Subject: NFSv4/pNFS: Retry the layout return later in case of a timeout or reboot If the layout return failed due to a timeout or reboot, then leave the layout segments on the list so that the layout return gets replayed later. The exception would be if we're freeing the inode. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 21 ++++++++++++++++++++- fs/nfs/pnfs.c | 12 ++++++++++++ fs/nfs/pnfs.h | 3 +++ 3 files changed, 35 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index db585a6a8f0d..e5103fa9d410 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -9973,6 +9973,11 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) if (!nfs41_sequence_process(task, &lrp->res.seq_res)) return; + if (task->tk_rpc_status == -ETIMEDOUT) { + lrp->rpc_status = -EAGAIN; + lrp->res.lrs_present = 0; + return; + } /* * Was there an RPC level error? Assume the call succeeded, * and that we need to release the layout @@ -9995,6 +10000,15 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) fallthrough; case 0: break; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + nfs4_schedule_session_recovery(server->nfs_client->cl_session, + task->tk_status); + lrp->res.lrs_present = 0; + lrp->rpc_status = -EAGAIN; + task->tk_status = 0; + break; case -NFS4ERR_DELAY: if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN) break; @@ -10012,8 +10026,13 @@ static void nfs4_layoutreturn_release(void *calldata) struct nfs4_layoutreturn *lrp = calldata; struct pnfs_layout_hdr *lo = lrp->args.layout; - pnfs_layoutreturn_free_lsegs(lo, &lrp->args.stateid, &lrp->args.range, + if (lrp->rpc_status == 0 || !lrp->inode) + pnfs_layoutreturn_free_lsegs( + lo, &lrp->args.stateid, &lrp->args.range, lrp->res.lrs_present ? &lrp->res.stateid : NULL); + else + pnfs_layoutreturn_retry_later(lo, &lrp->args.stateid, + &lrp->args.range); nfs4_sequence_free_slot(&lrp->res.seq_res); if (lrp->ld_private.ops && lrp->ld_private.ops->free) lrp->ld_private.ops->free(&lrp->ld_private); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c8b1be1810e2..04a52fa3d28c 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1191,6 +1191,18 @@ pnfs_layoutreturn_retry_later_locked(struct pnfs_layout_hdr *lo, } } +void pnfs_layoutreturn_retry_later(struct pnfs_layout_hdr *lo, + const nfs4_stateid *arg_stateid, + const struct pnfs_layout_range *range) +{ + struct inode *inode = lo->plh_inode; + + spin_lock(&inode->i_lock); + pnfs_layoutreturn_retry_later_locked(lo, arg_stateid, range); + pnfs_clear_layoutreturn_waitbit(lo); + spin_unlock(&inode->i_lock); +} + void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, const nfs4_stateid *arg_stateid, const struct pnfs_layout_range *range, diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 8fa0f152ed19..cd23a38eac75 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -328,6 +328,9 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, enum pnfs_iomode iomode, bool strict_iomode, gfp_t gfp_flags); +void pnfs_layoutreturn_retry_later(struct pnfs_layout_hdr *lo, + const nfs4_stateid *arg_stateid, + const struct pnfs_layout_range *range); void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, const nfs4_stateid *arg_stateid, const struct pnfs_layout_range *range, -- cgit v1.2.3 From 42375c2bfa3f7673f438e6dbf9acf17f69a98f77 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 13 Jun 2024 01:00:53 -0400 Subject: NFSv4/pnfs: Give nfs4_proc_layoutreturn() a flags argument Replace the boolean in nfs4_proc_layoutreturn() with a set of flags that will allow us to craft a version that is appropriate for reboot recovery. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 8 +++++--- fs/nfs/pnfs.c | 18 +++++++++++------- fs/nfs/pnfs.h | 6 +++++- 3 files changed, 21 insertions(+), 11 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e5103fa9d410..8e89b5e0ce31 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -10048,7 +10048,7 @@ static const struct rpc_call_ops nfs4_layoutreturn_call_ops = { .rpc_release = nfs4_layoutreturn_release, }; -int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) +int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, unsigned int flags) { struct rpc_task *task; struct rpc_message msg = { @@ -10071,7 +10071,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) &task_setup_data.rpc_client, &msg); lrp->inode = nfs_igrab_and_active(lrp->args.inode); - if (!sync) { + if (flags & PNFS_FL_LAYOUTRETURN_ASYNC) { if (!lrp->inode) { nfs4_layoutreturn_release(lrp); return -EAGAIN; @@ -10079,6 +10079,8 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) task_setup_data.flags |= RPC_TASK_ASYNC; } if (!lrp->inode) + flags |= PNFS_FL_LAYOUTRETURN_PRIVILEGED; + if (flags & PNFS_FL_LAYOUTRETURN_PRIVILEGED) nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1, 1); else @@ -10087,7 +10089,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); - if (sync) + if (!(flags & PNFS_FL_LAYOUTRETURN_ASYNC)) status = task->tk_status; trace_nfs4_layoutreturn(lrp->args.inode, &lrp->args.stateid, status); dprintk("<-- %s status=%d\n", __func__, status); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 04a52fa3d28c..c482088cb485 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1279,7 +1279,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, const struct cred **pcred, enum pnfs_iomode iomode, - bool sync) + unsigned int flags) { struct inode *ino = lo->plh_inode; struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; @@ -1306,7 +1306,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, if (ld->prepare_layoutreturn) ld->prepare_layoutreturn(&lrp->args); - status = nfs4_proc_layoutreturn(lrp, sync); + status = nfs4_proc_layoutreturn(lrp, flags); out: dprintk("<-- %s status: %d\n", __func__, status); return status; @@ -1340,7 +1340,8 @@ static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo) spin_unlock(&inode->i_lock); if (send) { /* Send an async layoutreturn so we dont deadlock */ - pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false); + pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, + PNFS_FL_LAYOUTRETURN_ASYNC); } } else spin_unlock(&inode->i_lock); @@ -1407,7 +1408,8 @@ _pnfs_return_layout(struct inode *ino) send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, NULL); spin_unlock(&ino->i_lock); if (send) - status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY, true); + status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY, + 0); out_wait_layoutreturn: wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, TASK_UNINTERRUPTIBLE); out_put_layout_hdr: @@ -1548,7 +1550,7 @@ out_noroc: return true; } if (layoutreturn) - pnfs_send_layoutreturn(lo, &stateid, &lc_cred, iomode, true); + pnfs_send_layoutreturn(lo, &stateid, &lc_cred, iomode, 0); pnfs_put_layout_hdr(lo); return false; } @@ -2595,7 +2597,8 @@ pnfs_mark_layout_for_return(struct inode *inode, return_now = pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode); spin_unlock(&inode->i_lock); if (return_now) - pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false); + pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, + PNFS_FL_LAYOUTRETURN_ASYNC); } else { spin_unlock(&inode->i_lock); nfs_commit_inode(inode, 0); @@ -2711,7 +2714,8 @@ restart: } spin_unlock(&inode->i_lock); rcu_read_unlock(); - pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false); + pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, + PNFS_FL_LAYOUTRETURN_ASYNC); pnfs_put_layout_hdr(lo); cond_resched(); goto restart; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index cd23a38eac75..d192feb346b4 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -248,6 +248,9 @@ extern const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id); extern void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld); /* nfs4proc.c */ +#define PNFS_FL_LAYOUTRETURN_ASYNC (1U << 0) +#define PNFS_FL_LAYOUTRETURN_PRIVILEGED (1U << 1) + extern size_t max_response_pages(struct nfs_server *server); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev, @@ -255,7 +258,8 @@ extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, extern struct pnfs_layout_segment * nfs4_proc_layoutget(struct nfs4_layoutget *lgp, struct nfs4_exception *exception); -extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync); +extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, + unsigned int flags); /* pnfs.c */ void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); -- cgit v1.2.3 From ad3c436dace5538c8d4b424958a723bc23b901dc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 13 Jun 2024 01:00:54 -0400 Subject: NFSv4/pNFS: Remove redundant call to unhash the layout The layout will be automatically unhashed on final release of the reference count. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c482088cb485..31df5fae7acb 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -858,8 +858,6 @@ pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, break; inode = pnfs_grab_inode_layout_hdr(lo); if (inode != NULL) { - if (test_and_clear_bit(NFS_LAYOUT_HASHED, &lo->plh_flags)) - list_del_rcu(&lo->plh_layouts); if (pnfs_layout_add_bulk_destroy_list(inode, layout_list)) continue; -- cgit v1.2.3 From 5468fc8298a97ca504aca8ac0c7b9e6fd7c1b588 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 13 Jun 2024 01:00:55 -0400 Subject: NFSv4/pNFS: Do layout state recovery upon reboot Some pNFS implementations, such as flexible files, want the client to send the layout stats and layout errors that may have incurred while the metadata server was booting. To do so, the client sends a layoutreturn with an all-zero stateid while the server is in grace during reboot recovery. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/flexfilelayout/flexfilelayout.c | 2 +- fs/nfs/nfs4state.c | 4 +- fs/nfs/pnfs.c | 106 ++++++++++++++++++++++++++++++--- fs/nfs/pnfs.h | 6 ++ include/linux/nfs_fs_sb.h | 1 + 5 files changed, 110 insertions(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 24188af56d5b..39ba9f4208aa 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -2548,7 +2548,7 @@ ff_layout_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *dummy) { #if IS_ENABLED(CONFIG_NFS_V4_2) - server->caps |= NFS_CAP_LAYOUTSTATS; + server->caps |= NFS_CAP_LAYOUTSTATS | NFS_CAP_REBOOT_LAYOUTRETURN; #endif return 0; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 5b452411e8fd..877f682b45f2 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1863,6 +1863,7 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) if (!nfs4_state_clear_reclaim_reboot(clp)) return; + pnfs_destroy_all_layouts(clp); ops = clp->cl_mvops->reboot_recovery_ops; cred = nfs4_get_clid_cred(clp); err = nfs4_reclaim_complete(clp, ops, cred); @@ -2068,7 +2069,6 @@ static int nfs4_establish_lease(struct nfs_client *clp) put_cred(cred); if (status != 0) return status; - pnfs_destroy_all_layouts(clp); return 0; } @@ -2680,6 +2680,8 @@ static void nfs4_state_manager(struct nfs_client *clp) section = "reclaim reboot"; status = nfs4_do_reclaim(clp, clp->cl_mvops->reboot_recovery_ops); + if (status == 0) + status = pnfs_layout_handle_reboot(clp); if (status == -EAGAIN) continue; if (status < 0) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 31df5fae7acb..aa698481bec8 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -61,6 +61,7 @@ static void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo, u32 seq); static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, struct list_head *tmp_list); +static int pnfs_layout_return_on_reboot(struct pnfs_layout_hdr *lo); /* Return the registered pnfs layout driver module matching given id */ static struct pnfs_layoutdriver_type * @@ -937,25 +938,37 @@ restart: return pnfs_layout_free_bulk_destroy_list(&layout_list, mode); } -int pnfs_layout_destroy_byclid(struct nfs_client *clp, - enum pnfs_layout_destroy_mode mode) +static void pnfs_layout_build_destroy_list_byclient(struct nfs_client *clp, + struct list_head *list) { struct nfs_server *server; - LIST_HEAD(layout_list); spin_lock(&clp->cl_lock); rcu_read_lock(); restart: list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { - if (pnfs_layout_bulk_destroy_byserver_locked(clp, - server, - &layout_list) != 0) + if (pnfs_layout_bulk_destroy_byserver_locked(clp, server, + list) != 0) goto restart; } rcu_read_unlock(); spin_unlock(&clp->cl_lock); +} - return pnfs_layout_free_bulk_destroy_list(&layout_list, mode); +static int pnfs_layout_do_destroy_byclid(struct nfs_client *clp, + struct list_head *list, + enum pnfs_layout_destroy_mode mode) +{ + pnfs_layout_build_destroy_list_byclient(clp, list); + return pnfs_layout_free_bulk_destroy_list(list, mode); +} + +int pnfs_layout_destroy_byclid(struct nfs_client *clp, + enum pnfs_layout_destroy_mode mode) +{ + LIST_HEAD(layout_list); + + return pnfs_layout_do_destroy_byclid(clp, &layout_list, mode); } /* @@ -971,6 +984,67 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) pnfs_layout_destroy_byclid(clp, PNFS_LAYOUT_INVALIDATE); } +static void pnfs_layout_build_recover_list_byclient(struct nfs_client *clp, + struct list_head *list) +{ + struct nfs_server *server; + + spin_lock(&clp->cl_lock); + rcu_read_lock(); +restart: + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + if (!(server->caps & NFS_CAP_REBOOT_LAYOUTRETURN)) + continue; + if (pnfs_layout_bulk_destroy_byserver_locked(clp, server, + list) != 0) + goto restart; + } + rcu_read_unlock(); + spin_unlock(&clp->cl_lock); +} + +static int pnfs_layout_bulk_list_reboot(struct list_head *list) +{ + struct pnfs_layout_hdr *lo; + struct nfs_server *server; + int ret; + + list_for_each_entry(lo, list, plh_bulk_destroy) { + server = NFS_SERVER(lo->plh_inode); + ret = pnfs_layout_return_on_reboot(lo); + switch (ret) { + case 0: + continue; + case -NFS4ERR_BAD_STATEID: + server->caps &= ~NFS_CAP_REBOOT_LAYOUTRETURN; + break; + case -NFS4ERR_NO_GRACE: + break; + default: + goto err; + } + break; + } + return 0; +err: + return ret; +} + +int pnfs_layout_handle_reboot(struct nfs_client *clp) +{ + LIST_HEAD(list); + int ret = 0, ret2; + + pnfs_layout_build_recover_list_byclient(clp, &list); + if (!list_empty(&list)) + ret = pnfs_layout_bulk_list_reboot(&list); + ret2 = pnfs_layout_do_destroy_byclid(clp, &list, + PNFS_LAYOUT_INVALIDATE); + if (!ret) + ret = ret2; + return (ret == 0) ? 0 : -EAGAIN; +} + static void pnfs_set_layout_cred(struct pnfs_layout_hdr *lo, const struct cred *cred) { @@ -1445,6 +1519,24 @@ pnfs_commit_and_return_layout(struct inode *inode) return ret; } +static int pnfs_layout_return_on_reboot(struct pnfs_layout_hdr *lo) +{ + struct inode *inode = lo->plh_inode; + const struct cred *cred; + + spin_lock(&inode->i_lock); + if (!pnfs_layout_is_valid(lo)) { + spin_unlock(&inode->i_lock); + return 0; + } + cred = get_cred(lo->plh_lc_cred); + pnfs_get_layout_hdr(lo); + spin_unlock(&inode->i_lock); + + return pnfs_send_layoutreturn(lo, &zero_stateid, &cred, IOMODE_ANY, + PNFS_FL_LAYOUTRETURN_PRIVILEGED); +} + bool pnfs_roc(struct inode *ino, struct nfs4_layoutreturn_args *args, struct nfs4_layoutreturn_res *res, diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index d192feb346b4..bb5142b4e67a 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -356,6 +356,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, struct pnfs_layout_segment *lseg); void pnfs_layout_return_unused_byclid(struct nfs_client *clp, enum pnfs_iomode iomode); +int pnfs_layout_handle_reboot(struct nfs_client *clp); /* nfs4_deviceid_flags */ enum { @@ -737,6 +738,11 @@ static inline void pnfs_destroy_layout_final(struct nfs_inode *nfsi) { } +static inline int pnfs_layout_handle_reboot(struct nfs_client *clp) +{ + return 0; +} + static inline struct pnfs_layout_segment * pnfs_get_lseg(struct pnfs_layout_segment *lseg) { diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index fe5b1a8bd723..ba9df1848b35 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -278,6 +278,7 @@ struct nfs_server { #define NFS_CAP_LGOPEN (1U << 5) #define NFS_CAP_CASE_INSENSITIVE (1U << 6) #define NFS_CAP_CASE_PRESERVING (1U << 7) +#define NFS_CAP_REBOOT_LAYOUTRETURN (1U << 8) #define NFS_CAP_OPEN_XOR (1U << 12) #define NFS_CAP_DELEGTIME (1U << 13) #define NFS_CAP_POSIX_LOCK (1U << 14) -- cgit v1.2.3 From d869da91cccb90320e101a2758f1e2b3803ade5c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 25 Jun 2024 16:02:06 -0400 Subject: nfs/blocklayout: Fix premature PR key unregistration During generic/069 runs with pNFS SCSI layouts, the NFS client emits the following in the system journal: kernel: pNFS: failed to open device /dev/disk/by-id/dm-uuid-mpath-0x6001405e3366f045b7949eb8e4540b51 (-2) kernel: pNFS: using block device sdb (reservation key 0x666b60901e7b26b3) kernel: pNFS: failed to open device /dev/disk/by-id/dm-uuid-mpath-0x6001405e3366f045b7949eb8e4540b51 (-2) kernel: pNFS: using block device sdb (reservation key 0x666b60901e7b26b3) kernel: sd 6:0:0:1: reservation conflict kernel: sd 6:0:0:1: [sdb] tag#16 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s kernel: sd 6:0:0:1: [sdb] tag#16 CDB: Write(10) 2a 00 00 00 00 50 00 00 08 00 kernel: reservation conflict error, dev sdb, sector 80 op 0x1:(WRITE) flags 0x0 phys_seg 1 prio class 2 kernel: sd 6:0:0:1: reservation conflict kernel: sd 6:0:0:1: reservation conflict kernel: sd 6:0:0:1: [sdb] tag#18 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s kernel: sd 6:0:0:1: [sdb] tag#17 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s kernel: sd 6:0:0:1: [sdb] tag#18 CDB: Write(10) 2a 00 00 00 00 60 00 00 08 00 kernel: sd 6:0:0:1: [sdb] tag#17 CDB: Write(10) 2a 00 00 00 00 58 00 00 08 00 kernel: reservation conflict error, dev sdb, sector 96 op 0x1:(WRITE) flags 0x0 phys_seg 1 prio class 0 kernel: reservation conflict error, dev sdb, sector 88 op 0x1:(WRITE) flags 0x0 phys_seg 1 prio class 0 systemd[1]: fstests-generic-069.scope: Deactivated successfully. systemd[1]: fstests-generic-069.scope: Consumed 5.092s CPU time. systemd[1]: media-test.mount: Deactivated successfully. systemd[1]: media-scratch.mount: Deactivated successfully. kernel: sd 6:0:0:1: reservation conflict kernel: failed to unregister PR key. This appears to be due to a race. bl_alloc_lseg() calls this: 561 static struct nfs4_deviceid_node * 562 bl_find_get_deviceid(struct nfs_server *server, 563 const struct nfs4_deviceid *id, const struct cred *cred, 564 gfp_t gfp_mask) 565 { 566 struct nfs4_deviceid_node *node; 567 unsigned long start, end; 568 569 retry: 570 node = nfs4_find_get_deviceid(server, id, cred, gfp_mask); 571 if (!node) 572 return ERR_PTR(-ENODEV); nfs4_find_get_deviceid() does a lookup without the spin lock first. If it can't find a matching deviceid, it creates a new device_info (which calls bl_alloc_deviceid_node, and that registers the device's PR key). Then it takes the nfs4_deviceid_lock and looks up the deviceid again. If it finds it this time, bl_find_get_deviceid() frees the spare (new) device_info, which unregisters the PR key for the same device. Any subsequent I/O from this client on that device gets EBADE. The umount later unregisters the device's PR key again. To prevent this problem, register the PR key after the deviceid_node lookup. Signed-off-by: Christoph Hellwig Signed-off-by: Chuck Lever Reviewed-by: Christoph Hellwig Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/blocklayout/blocklayout.c | 25 +++++++---- fs/nfs/blocklayout/blocklayout.h | 9 +++- fs/nfs/blocklayout/dev.c | 91 ++++++++++++++++++++++++++++++---------- 3 files changed, 94 insertions(+), 31 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 6be13e0ec170..0becdec12970 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -564,25 +564,32 @@ bl_find_get_deviceid(struct nfs_server *server, gfp_t gfp_mask) { struct nfs4_deviceid_node *node; - unsigned long start, end; + int err = -ENODEV; retry: node = nfs4_find_get_deviceid(server, id, cred, gfp_mask); if (!node) return ERR_PTR(-ENODEV); - if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0) - return node; + if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) { + unsigned long end = jiffies; + unsigned long start = end - PNFS_DEVICE_RETRY_TIMEOUT; - end = jiffies; - start = end - PNFS_DEVICE_RETRY_TIMEOUT; - if (!time_in_range(node->timestamp_unavailable, start, end)) { - nfs4_delete_deviceid(node->ld, node->nfs_client, id); - goto retry; + if (!time_in_range(node->timestamp_unavailable, start, end)) { + nfs4_delete_deviceid(node->ld, node->nfs_client, id); + goto retry; + } + goto out_put; } + if (!bl_register_dev(container_of(node, struct pnfs_block_dev, node))) + goto out_put; + + return node; + +out_put: nfs4_put_deviceid_node(node); - return ERR_PTR(-ENODEV); + return ERR_PTR(err); } static int diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index f1eeb4914199..6da40ca19570 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -104,20 +104,26 @@ struct pnfs_block_dev { u64 start; u64 len; + enum pnfs_block_volume_type type; u32 nr_children; struct pnfs_block_dev *children; u64 chunk_size; struct file *bdev_file; u64 disk_offset; + unsigned long flags; u64 pr_key; - bool pr_registered; bool (*map)(struct pnfs_block_dev *dev, u64 offset, struct pnfs_block_dev_map *map); }; +/* pnfs_block_dev flag bits */ +enum { + PNFS_BDEV_REGISTERED = 0, +}; + /* sector_t fields are all in 512-byte sectors */ struct pnfs_block_extent { union { @@ -172,6 +178,7 @@ struct bl_msg_hdr { #define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */ /* dev.c */ +bool bl_register_dev(struct pnfs_block_dev *d); struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, gfp_t gfp_mask); void bl_free_deviceid_node(struct nfs4_deviceid_node *d); diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index 93ef7f864980..1785162d4a47 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -13,9 +13,75 @@ #define NFSDBG_FACILITY NFSDBG_PNFS_LD +static void bl_unregister_scsi(struct pnfs_block_dev *dev) +{ + struct block_device *bdev = file_bdev(dev->bdev_file); + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + + if (!test_and_clear_bit(PNFS_BDEV_REGISTERED, &dev->flags)) + return; + + if (ops->pr_register(bdev, dev->pr_key, 0, false)) + pr_err("failed to unregister PR key.\n"); +} + +static bool bl_register_scsi(struct pnfs_block_dev *dev) +{ + struct block_device *bdev = file_bdev(dev->bdev_file); + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + int status; + + if (test_and_set_bit(PNFS_BDEV_REGISTERED, &dev->flags)) + return true; + + status = ops->pr_register(bdev, 0, dev->pr_key, true); + if (status) { + pr_err("pNFS: failed to register key for block device %s.", + bdev->bd_disk->disk_name); + return false; + } + return true; +} + +static void bl_unregister_dev(struct pnfs_block_dev *dev) +{ + u32 i; + + if (dev->nr_children) { + for (i = 0; i < dev->nr_children; i++) + bl_unregister_dev(&dev->children[i]); + return; + } + + if (dev->type == PNFS_BLOCK_VOLUME_SCSI) + bl_unregister_scsi(dev); +} + +bool bl_register_dev(struct pnfs_block_dev *dev) +{ + u32 i; + + if (dev->nr_children) { + for (i = 0; i < dev->nr_children; i++) { + if (!bl_register_dev(&dev->children[i])) { + while (i > 0) + bl_unregister_dev(&dev->children[--i]); + return false; + } + } + return true; + } + + if (dev->type == PNFS_BLOCK_VOLUME_SCSI) + return bl_register_scsi(dev); + return true; +} + static void bl_free_device(struct pnfs_block_dev *dev) { + bl_unregister_dev(dev); + if (dev->nr_children) { int i; @@ -23,17 +89,6 @@ bl_free_device(struct pnfs_block_dev *dev) bl_free_device(&dev->children[i]); kfree(dev->children); } else { - if (dev->pr_registered) { - const struct pr_ops *ops = - file_bdev(dev->bdev_file)->bd_disk->fops->pr_ops; - int error; - - error = ops->pr_register(file_bdev(dev->bdev_file), - dev->pr_key, 0, false); - if (error) - pr_err("failed to unregister PR key.\n"); - } - if (dev->bdev_file) fput(dev->bdev_file); } @@ -365,14 +420,6 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, goto out_blkdev_put; } - error = ops->pr_register(file_bdev(d->bdev_file), 0, d->pr_key, true); - if (error) { - pr_err("pNFS: failed to register key for block device %s.", - file_bdev(d->bdev_file)->bd_disk->disk_name); - goto out_blkdev_put; - } - - d->pr_registered = true; return 0; out_blkdev_put: @@ -458,7 +505,9 @@ static int bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) { - switch (volumes[idx].type) { + d->type = volumes[idx].type; + + switch (d->type) { case PNFS_BLOCK_VOLUME_SIMPLE: return bl_parse_simple(server, d, volumes, idx, gfp_mask); case PNFS_BLOCK_VOLUME_SLICE: @@ -470,7 +519,7 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, case PNFS_BLOCK_VOLUME_SCSI: return bl_parse_scsi(server, d, volumes, idx, gfp_mask); default: - dprintk("unsupported volume type: %d\n", volumes[idx].type); + dprintk("unsupported volume type: %d\n", d->type); return -EIO; } } -- cgit v1.2.3 From 450b4b3b2f742d5b3fce51e8f808ff1ed2fe9f89 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 25 Jun 2024 16:02:07 -0400 Subject: nfs/blocklayout: Report only when /no/ device is found Since commit f931d8374cad ("nfs/blocklayout: refactor block device opening"), an error is reported when no multi-path device is found. But this isn't a fatal error if the subsequent device open is successful. On systems without multi-path devices, this message always appears whether there is a problem or not. Instead, generate less system journal noise by reporting an error only when both open attempts fail. The new error message is more actionable since it indicates that there is a real configuration issue to be addressed. Reviewed-by: Christoph Hellwig Reviewed-by: Benjamin Coddington Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/blocklayout/dev.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index 1785162d4a47..e2b9e8d1ac4d 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -369,7 +369,7 @@ bl_open_path(struct pnfs_block_volume *v, const char *prefix) bdev_file = bdev_file_open_by_path(devname, BLK_OPEN_READ | BLK_OPEN_WRITE, NULL, NULL); if (IS_ERR(bdev_file)) { - pr_warn("pNFS: failed to open device %s (%ld)\n", + dprintk("failed to open device %s (%ld)\n", devname, PTR_ERR(bdev_file)); } @@ -398,8 +398,11 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, bdev_file = bl_open_path(v, "dm-uuid-mpath-0x"); if (IS_ERR(bdev_file)) bdev_file = bl_open_path(v, "wwn-0x"); - if (IS_ERR(bdev_file)) + if (IS_ERR(bdev_file)) { + pr_warn("pNFS: no device found for volume %*phN\n", + v->scsi.designator_len, v->scsi.designator); return PTR_ERR(bdev_file); + } d->bdev_file = bdev_file; d->len = bdev_nr_bytes(file_bdev(d->bdev_file)); -- cgit v1.2.3 From 7d09d6bb66709f33e85b9fc78b25b0055f4b9d7c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 25 Jun 2024 16:02:08 -0400 Subject: nfs/blocklayout: SCSI layout trace points for reservation key reg/unreg An administrator cannot take action on these messages, but the reported errors might be helpful for troubleshooting. Transition them to trace points so these events appear in the trace log and can be easily lined up with other traced NFS client operations. Examples: append_writer-6147 [000] 80.247393: bl_pr_key_reg: dev=8,0 (sda) key=0x6675bfcf59112e98 append_writer-6147 [000] 80.247842: bl_pr_key_unreg: dev=8,0 (sda) key=0x6675bfcf59112e98 umount.nfs4-6172 [002] 84.950409: bl_pr_key_unreg_err: dev=8,0 (sda) key=0x6675bfcf59112e98 status=RESERVATION_CONFLICT Reviewed-by: Benjamin Coddington Reviewed-by: Christoph Hellwig Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/blocklayout/dev.c | 26 ++++++++------ fs/nfs/nfs4trace.c | 7 ++++ fs/nfs/nfs4trace.h | 88 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+), 11 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index e2b9e8d1ac4d..221781fb9cf1 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -10,6 +10,7 @@ #include #include "blocklayout.h" +#include "../nfs4trace.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD @@ -17,12 +18,16 @@ static void bl_unregister_scsi(struct pnfs_block_dev *dev) { struct block_device *bdev = file_bdev(dev->bdev_file); const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + int status; if (!test_and_clear_bit(PNFS_BDEV_REGISTERED, &dev->flags)) return; - if (ops->pr_register(bdev, dev->pr_key, 0, false)) - pr_err("failed to unregister PR key.\n"); + status = ops->pr_register(bdev, dev->pr_key, 0, false); + if (status) + trace_bl_pr_key_unreg_err(bdev, dev->pr_key, status); + else + trace_bl_pr_key_unreg(bdev, dev->pr_key); } static bool bl_register_scsi(struct pnfs_block_dev *dev) @@ -36,10 +41,10 @@ static bool bl_register_scsi(struct pnfs_block_dev *dev) status = ops->pr_register(bdev, 0, dev->pr_key, true); if (status) { - pr_err("pNFS: failed to register key for block device %s.", - bdev->bd_disk->disk_name); + trace_bl_pr_key_reg_err(bdev, dev->pr_key, status); return false; } + trace_bl_pr_key_reg(bdev, dev->pr_key); return true; } @@ -382,8 +387,9 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) { struct pnfs_block_volume *v = &volumes[idx]; - struct file *bdev_file; + struct block_device *bdev; const struct pr_ops *ops; + struct file *bdev_file; int error; if (!bl_validate_designator(v)) @@ -404,21 +410,19 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, return PTR_ERR(bdev_file); } d->bdev_file = bdev_file; + bdev = file_bdev(bdev_file); - d->len = bdev_nr_bytes(file_bdev(d->bdev_file)); + d->len = bdev_nr_bytes(bdev); d->map = bl_map_simple; d->pr_key = v->scsi.pr_key; if (d->len == 0) return -ENODEV; - pr_info("pNFS: using block device %s (reservation key 0x%llx)\n", - file_bdev(d->bdev_file)->bd_disk->disk_name, d->pr_key); - - ops = file_bdev(d->bdev_file)->bd_disk->fops->pr_ops; + ops = bdev->bd_disk->fops->pr_ops; if (!ops) { pr_err("pNFS: block device %s does not support reservations.", - file_bdev(d->bdev_file)->bd_disk->disk_name); + bdev->bd_disk->disk_name); error = -EINVAL; goto out_blkdev_put; } diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c index d22c6670f770..389941ccc9c9 100644 --- a/fs/nfs/nfs4trace.c +++ b/fs/nfs/nfs4trace.c @@ -2,6 +2,8 @@ /* * Copyright (c) 2013 Trond Myklebust */ +#include +#include #include #include "nfs4_fs.h" #include "internal.h" @@ -29,5 +31,10 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_read_error); EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_write_error); EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_commit_error); +EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_reg); +EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_reg_err); +EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_unreg); +EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_unreg_err); + EXPORT_TRACEPOINT_SYMBOL_GPL(fl_getdevinfo); #endif diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 4de8780a7c48..22c973316f0b 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -2153,6 +2153,94 @@ TRACE_EVENT(ff_layout_commit_error, ) ); +DECLARE_EVENT_CLASS(pnfs_bl_pr_key_class, + TP_PROTO( + const struct block_device *bdev, + u64 key + ), + TP_ARGS(bdev, key), + TP_STRUCT__entry( + __field(u64, key) + __field(dev_t, dev) + __string(device, bdev->bd_disk->disk_name) + ), + TP_fast_assign( + __entry->key = key; + __entry->dev = bdev->bd_dev; + __assign_str(device); + ), + TP_printk("dev=%d,%d (%s) key=0x%016llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __get_str(device), __entry->key + ) +); + +#define DEFINE_NFS4_BLOCK_PRKEY_EVENT(name) \ + DEFINE_EVENT(pnfs_bl_pr_key_class, name, \ + TP_PROTO( \ + const struct block_device *bdev, \ + u64 key \ + ), \ + TP_ARGS(bdev, key)) +DEFINE_NFS4_BLOCK_PRKEY_EVENT(bl_pr_key_reg); +DEFINE_NFS4_BLOCK_PRKEY_EVENT(bl_pr_key_unreg); + +/* + * From uapi/linux/pr.h + */ +TRACE_DEFINE_ENUM(PR_STS_SUCCESS); +TRACE_DEFINE_ENUM(PR_STS_IOERR); +TRACE_DEFINE_ENUM(PR_STS_RESERVATION_CONFLICT); +TRACE_DEFINE_ENUM(PR_STS_RETRY_PATH_FAILURE); +TRACE_DEFINE_ENUM(PR_STS_PATH_FAST_FAILED); +TRACE_DEFINE_ENUM(PR_STS_PATH_FAILED); + +#define show_pr_status(x) \ + __print_symbolic(x, \ + { PR_STS_SUCCESS, "SUCCESS" }, \ + { PR_STS_IOERR, "IOERR" }, \ + { PR_STS_RESERVATION_CONFLICT, "RESERVATION_CONFLICT" }, \ + { PR_STS_RETRY_PATH_FAILURE, "RETRY_PATH_FAILURE" }, \ + { PR_STS_PATH_FAST_FAILED, "PATH_FAST_FAILED" }, \ + { PR_STS_PATH_FAILED, "PATH_FAILED" }) + +DECLARE_EVENT_CLASS(pnfs_bl_pr_key_err_class, + TP_PROTO( + const struct block_device *bdev, + u64 key, + int status + ), + TP_ARGS(bdev, key, status), + TP_STRUCT__entry( + __field(u64, key) + __field(dev_t, dev) + __field(unsigned long, status) + __string(device, bdev->bd_disk->disk_name) + ), + TP_fast_assign( + __entry->key = key; + __entry->dev = bdev->bd_dev; + __entry->status = status; + __assign_str(device); + ), + TP_printk("dev=%d,%d (%s) key=0x%016llx status=%s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __get_str(device), __entry->key, + show_pr_status(__entry->status) + ) +); + +#define DEFINE_NFS4_BLOCK_PRKEY_ERR_EVENT(name) \ + DEFINE_EVENT(pnfs_bl_pr_key_err_class, name, \ + TP_PROTO( \ + const struct block_device *bdev, \ + u64 key, \ + int status \ + ), \ + TP_ARGS(bdev, key, status)) +DEFINE_NFS4_BLOCK_PRKEY_ERR_EVENT(bl_pr_key_reg_err); +DEFINE_NFS4_BLOCK_PRKEY_ERR_EVENT(bl_pr_key_unreg_err); + #ifdef CONFIG_NFS_V4_2 TRACE_DEFINE_ENUM(NFS4_CONTENT_DATA); TRACE_DEFINE_ENUM(NFS4_CONTENT_HOLE); -- cgit v1.2.3 From 37d4159dd25ade59ce0fecc75984240e5f7abc14 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 1 Jul 2024 12:50:46 +0200 Subject: nfs: Drop pointless check from nfs_commit_release_pages() nfss->writeback is updated only when we are ending page writeback and at that moment we also clear nfss->write_congested. So there's no point in rechecking congestion state in nfs_commit_release_pages(). Drop the pointless check. Reviewed-by: Sagi Grimberg Reviewed-by: Jeff Layton Signed-off-by: Jan Kara Signed-off-by: Anna Schumaker --- fs/nfs/write.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f5414c96381a..4321cdc581bb 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1846,7 +1846,6 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) struct nfs_page *req; int status = data->task.tk_status; struct nfs_commit_info cinfo; - struct nfs_server *nfss; struct folio *folio; while (!list_empty(&data->pages)) { @@ -1889,9 +1888,6 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) /* Latency breaker */ cond_resched(); } - nfss = NFS_SERVER(data->inode); - if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) - nfss->write_congested = 0; nfs_init_cinfo(&cinfo, data->inode, data->dreq); nfs_commit_end(cinfo.mds); -- cgit v1.2.3 From f8a3955083f5cc5d62257c6e1103f89f566d3a87 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 1 Jul 2024 12:50:47 +0200 Subject: nfs: Properly initialize server->writeback Atomic types should better be initialized with atomic_long_set() instead of relying on zeroing done by kzalloc(). Clean this up. Reviewed-by: Sagi Grimberg Signed-off-by: Jan Kara Acked-by: Jeff Layton Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index de77848ae654..3b252dceebf5 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -994,6 +994,8 @@ struct nfs_server *nfs_alloc_server(void) server->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED; + atomic_long_set(&server->writeback, 0); + ida_init(&server->openowner_id); ida_init(&server->lockowner_id); pnfs_init_server(server); -- cgit v1.2.3 From 2f1f31042ef07719a0d5cb4784b8a32d20c13110 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 1 Jul 2024 12:50:48 +0200 Subject: nfs: Block on write congestion Commit 6df25e58532b ("nfs: remove reliance on bdi congestion") introduced NFS-private solution for limiting number of writes outstanding against a particular server. Unlike previous bdi congestion this algorithm actually works and limits number of outstanding writeback pages to nfs_congestion_kb which scales with amount of client's memory and is capped at 256 MB. As a result some workloads such as random buffered writes over NFS got slower (from ~170 MB/s to ~126 MB/s). The fio command to reproduce is: fio --direct=0 --ioengine=sync --thread --invalidate=1 --group_reporting=1 --runtime=300 --fallocate=posix --ramp_time=10 --new_group --rw=randwrite --size=64256m --numjobs=4 --bs=4k --fsync_on_close=1 --end_fsync=1 This happens because the client sends ~256 MB worth of dirty pages to the server and any further background writeback request is ignored until the number of writeback pages gets below the threshold of 192 MB. By the time this happens and clients decides to trigger another round of writeback, the server often has no pages to write and the disk is idle. To fix this problem and make the client react faster to eased congestion of the server by blocking waiting for congestion to resolve instead of aborting writeback. This improves the random 4k buffered write throughput to 184 MB/s. Reviewed-by: Sagi Grimberg Reviewed-by: Jeff Layton Signed-off-by: Jan Kara Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 1 + fs/nfs/write.c | 15 +++++++++++---- include/linux/nfs_fs_sb.h | 1 + 3 files changed, 13 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 3b252dceebf5..8286edd6062d 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -994,6 +994,7 @@ struct nfs_server *nfs_alloc_server(void) server->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED; + init_waitqueue_head(&server->write_congestion_wait); atomic_long_set(&server->writeback, 0); ida_init(&server->openowner_id); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 4321cdc581bb..81845ab2e00a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -425,8 +425,10 @@ static void nfs_folio_end_writeback(struct folio *folio) folio_end_writeback(folio); if (atomic_long_dec_return(&nfss->writeback) < - NFS_CONGESTION_OFF_THRESH) + NFS_CONGESTION_OFF_THRESH) { nfss->write_congested = 0; + wake_up_all(&nfss->write_congestion_wait); + } } static void nfs_page_end_writeback(struct nfs_page *req) @@ -700,12 +702,17 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) struct nfs_pageio_descriptor pgio; struct nfs_io_completion *ioc = NULL; unsigned int mntflags = NFS_SERVER(inode)->flags; + struct nfs_server *nfss = NFS_SERVER(inode); int priority = 0; int err; - if (wbc->sync_mode == WB_SYNC_NONE && - NFS_SERVER(inode)->write_congested) - return 0; + /* Wait with writeback until write congestion eases */ + if (wbc->sync_mode == WB_SYNC_NONE && nfss->write_congested) { + err = wait_event_killable(nfss->write_congestion_wait, + nfss->write_congested == 0); + if (err) + return err; + } nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index ba9df1848b35..1df86ab98c77 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -140,6 +140,7 @@ struct nfs_server { struct rpc_clnt * client_acl; /* ACL RPC client handle */ struct nlm_host *nlm_host; /* NLM client handle */ struct nfs_iostats __percpu *io_stats; /* I/O statistics */ + wait_queue_head_t write_congestion_wait; /* wait until write congestion eases */ atomic_long_t writeback; /* number of writeback pages */ unsigned int write_congested;/* flag set when writeback gets too high */ unsigned int flags; /* various flags */ -- cgit v1.2.3 From 4840c00003a2275668a13b82c9f5b1aed80183aa Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 24 Jun 2024 09:28:27 -0400 Subject: NFSv4.1 another fix for EXCHGID4_FLAG_USE_PNFS_DS for DS server Previously in order to mark the communication with the DS server, we tried to use NFS_CS_DS in cl_flags. However, this flag would only be saved for the DS server and in case where DS equals MDS, the client would not find a matching nfs_client in nfs_match_client that represents the MDS (but is also a DS). Instead, don't rely on the NFS_CS_DS but instead use NFS_CS_PNFS. Fixes: 379e4adfddd6 ("NFSv4.1: fixup use EXCHGID4_FLAG_USE_PNFS_DS for DS server") Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- fs/nfs/nfs4client.c | 6 ++---- fs/nfs/nfs4proc.c | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 84573df5cf5a..83378f69b35e 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -231,9 +231,8 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) __set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags); __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); __set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags); - - if (test_bit(NFS_CS_DS, &cl_init->init_flags)) - __set_bit(NFS_CS_DS, &clp->cl_flags); + if (test_bit(NFS_CS_PNFS, &cl_init->init_flags)) + __set_bit(NFS_CS_PNFS, &clp->cl_flags); /* * Set up the connection to the server before we add add to the * global list. @@ -1013,7 +1012,6 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, if (mds_srv->flags & NFS_MOUNT_NORESVPORT) __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); - __set_bit(NFS_CS_DS, &cl_init.init_flags); __set_bit(NFS_CS_PNFS, &cl_init.init_flags); cl_init.max_connect = NFS_MAX_TRANSPORTS; /* diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8e89b5e0ce31..8883016c551c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8959,7 +8959,7 @@ nfs4_run_exchange_id(struct nfs_client *clp, const struct cred *cred, #ifdef CONFIG_NFS_V4_1_MIGRATION calldata->args.flags |= EXCHGID4_FLAG_SUPP_MOVED_MIGR; #endif - if (test_bit(NFS_CS_DS, &clp->cl_flags)) + if (test_bit(NFS_CS_PNFS, &clp->cl_flags)) calldata->args.flags |= EXCHGID4_FLAG_USE_PNFS_DS; msg.rpc_argp = &calldata->args; msg.rpc_resp = &calldata->res; -- cgit v1.2.3 From 7e8e78a0ba00c88f0ded86de64bdddc82e06b196 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 1 Jul 2024 07:26:48 +0200 Subject: nfs: remove dead code for the old swap over NFS implementation Remove the code testing folio_test_swapcache either explicitly or implicitly in pagemap.h headers, as is now handled using the direct I/O path and not the buffered I/O path that these helpers are located in. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Anna Schumaker --- fs/nfs/file.c | 6 +- fs/nfs/filelayout/filelayout.c | 1 - fs/nfs/fscache.c | 2 +- fs/nfs/internal.h | 8 +-- fs/nfs/pagelist.c | 2 +- fs/nfs/pnfs.h | 22 ------- fs/nfs/pnfs_nfs.c | 47 --------------- fs/nfs/read.c | 2 +- fs/nfs/write.c | 128 ++++++++--------------------------------- include/linux/nfs_page.h | 4 +- 10 files changed, 36 insertions(+), 186 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 834e612262e6..0e2f87120cb8 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -427,7 +427,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, static void nfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; dfprintk(PAGECACHE, "NFS: invalidate_folio(%lu, %zu, %zu)\n", folio->index, offset, length); @@ -454,7 +454,7 @@ static bool nfs_release_folio(struct folio *folio, gfp_t gfp) if ((current_gfp_context(gfp) & GFP_KERNEL) != GFP_KERNEL || current_is_kswapd()) return false; - if (nfs_wb_folio(folio_file_mapping(folio)->host, folio) < 0) + if (nfs_wb_folio(folio->mapping->host, folio) < 0) return false; } return nfs_fscache_release_folio(folio, gfp); @@ -606,7 +606,7 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf) TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); folio_lock(folio); - mapping = folio_file_mapping(folio); + mapping = folio->mapping; if (mapping != inode->i_mapping) goto out_unlock; diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 29d84dc66ca3..b6e9aeaf4ce2 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -1110,7 +1110,6 @@ static const struct pnfs_commit_ops filelayout_commit_ops = { .clear_request_commit = pnfs_generic_clear_request_commit, .scan_commit_lists = pnfs_generic_scan_commit_lists, .recover_commit_reqs = pnfs_generic_recover_commit_reqs, - .search_commit_reqs = pnfs_generic_search_commit_reqs, .commit_pagelist = filelayout_commit_pagelist, }; diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index ddc1ee031955..7202ce84d0eb 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -341,7 +341,7 @@ void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr) int nfs_netfs_folio_unlock(struct folio *folio) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; /* * If fscache is enabled, netfs will unlock pages. diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 9f0f4534744b..87ebc4608c31 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -785,7 +785,7 @@ static inline void nfs_folio_mark_unstable(struct folio *folio, struct nfs_commit_info *cinfo) { if (folio && !cinfo->dreq) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; long nr = folio_nr_pages(folio); /* This page is really still in write-back - just that the @@ -803,7 +803,7 @@ static inline void nfs_folio_mark_unstable(struct folio *folio, static inline unsigned int nfs_page_length(struct page *page) { - loff_t i_size = i_size_read(page_file_mapping(page)->host); + loff_t i_size = i_size_read(page->mapping->host); if (i_size > 0) { pgoff_t index = page_index(page); @@ -821,10 +821,10 @@ unsigned int nfs_page_length(struct page *page) */ static inline size_t nfs_folio_length(struct folio *folio) { - loff_t i_size = i_size_read(folio_file_mapping(folio)->host); + loff_t i_size = i_size_read(folio->mapping->host); if (i_size > 0) { - pgoff_t index = folio_index(folio) >> folio_order(folio); + pgoff_t index = folio->index >> folio_order(folio); pgoff_t end_index = (i_size - 1) >> folio_shift(folio); if (index < end_index) return folio_size(folio); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 040b6b79c75e..3b006bcbcc87 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -569,7 +569,7 @@ struct nfs_page *nfs_page_create_from_folio(struct nfs_open_context *ctx, if (IS_ERR(l_ctx)) return ERR_CAST(l_ctx); - ret = nfs_page_create(l_ctx, offset, folio_index(folio), offset, count); + ret = nfs_page_create(l_ctx, offset, folio->index, offset, count); if (!IS_ERR(ret)) { nfs_page_assign_folio(ret, folio); nfs_page_group_init(ret, NULL); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index bb5142b4e67a..1fc40afcbf1f 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -199,8 +199,6 @@ struct pnfs_commit_ops { int max); void (*recover_commit_reqs) (struct list_head *list, struct nfs_commit_info *cinfo); - struct nfs_page * (*search_commit_reqs)(struct nfs_commit_info *cinfo, - struct folio *folio); }; struct pnfs_layout_hdr { @@ -409,8 +407,6 @@ void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data); void pnfs_generic_rw_release(void *data); void pnfs_generic_recover_commit_reqs(struct list_head *dst, struct nfs_commit_info *cinfo); -struct nfs_page *pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo, - struct folio *folio); int pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, int how, @@ -570,17 +566,6 @@ pnfs_recover_commit_reqs(struct list_head *head, struct nfs_commit_info *cinfo) fl_cinfo->ops->recover_commit_reqs(head, cinfo); } -static inline struct nfs_page * -pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, - struct folio *folio) -{ - struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; - - if (!fl_cinfo->ops || !fl_cinfo->ops->search_commit_reqs) - return NULL; - return fl_cinfo->ops->search_commit_reqs(cinfo, folio); -} - /* Should the pNFS client commit and return the layout upon a setattr */ static inline bool pnfs_ld_layoutret_on_setattr(struct inode *inode) @@ -882,13 +867,6 @@ pnfs_recover_commit_reqs(struct list_head *head, struct nfs_commit_info *cinfo) { } -static inline struct nfs_page * -pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, - struct folio *folio) -{ - return NULL; -} - static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) { return 0; diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 88e061bd711b..a74ee69a2fa6 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -351,53 +351,6 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst, } EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs); -static struct nfs_page * -pnfs_bucket_search_commit_reqs(struct pnfs_commit_bucket *buckets, - unsigned int nbuckets, struct folio *folio) -{ - struct nfs_page *req; - struct pnfs_commit_bucket *b; - unsigned int i; - - /* Linearly search the commit lists for each bucket until a matching - * request is found */ - for (i = 0, b = buckets; i < nbuckets; i++, b++) { - list_for_each_entry(req, &b->written, wb_list) { - if (nfs_page_to_folio(req) == folio) - return req->wb_head; - } - list_for_each_entry(req, &b->committing, wb_list) { - if (nfs_page_to_folio(req) == folio) - return req->wb_head; - } - } - return NULL; -} - -/* pnfs_generic_search_commit_reqs - Search lists in @cinfo for the head request - * for @folio - * @cinfo - commit info for current inode - * @folio - page to search for matching head request - * - * Return: the head request if one is found, otherwise %NULL. - */ -struct nfs_page *pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo, - struct folio *folio) -{ - struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; - struct pnfs_commit_array *array; - struct nfs_page *req; - - list_for_each_entry(array, &fl_cinfo->commits, cinfo_list) { - req = pnfs_bucket_search_commit_reqs(array->buckets, - array->nbuckets, folio); - if (req) - return req; - } - return NULL; -} -EXPORT_SYMBOL_GPL(pnfs_generic_search_commit_reqs); - static struct pnfs_layout_segment * pnfs_bucket_get_committing(struct list_head *head, struct pnfs_commit_bucket *bucket, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 1b0e06c11983..036ede4875ca 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -289,7 +289,7 @@ int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio, struct nfs_open_context *ctx, struct folio *folio) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; struct nfs_server *server = NFS_SERVER(inode); size_t fsize = folio_size(folio); unsigned int rsize = server->rsize; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 81845ab2e00a..dc432f581fa7 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -63,9 +63,6 @@ static void nfs_clear_request_commit(struct nfs_commit_info *cinfo, struct nfs_page *req); static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, struct inode *inode); -static struct nfs_page * -nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, - struct folio *folio); static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; @@ -178,16 +175,16 @@ static struct nfs_page *nfs_folio_private_request(struct folio *folio) } /** - * nfs_folio_find_private_request - find head request associated with a folio + * nfs_folio_find_head_request - find head request associated with a folio * @folio: pointer to folio * * must be called while holding the inode lock. * * returns matching head request with reference held, or NULL if not found. */ -static struct nfs_page *nfs_folio_find_private_request(struct folio *folio) +static struct nfs_page *nfs_folio_find_head_request(struct folio *folio) { - struct address_space *mapping = folio_file_mapping(folio); + struct address_space *mapping = folio->mapping; struct nfs_page *req; if (!folio_test_private(folio)) @@ -202,45 +199,9 @@ static struct nfs_page *nfs_folio_find_private_request(struct folio *folio) return req; } -static struct nfs_page *nfs_folio_find_swap_request(struct folio *folio) -{ - struct inode *inode = folio_file_mapping(folio)->host; - struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_page *req = NULL; - if (!folio_test_swapcache(folio)) - return NULL; - mutex_lock(&nfsi->commit_mutex); - if (folio_test_swapcache(folio)) { - req = nfs_page_search_commits_for_head_request_locked(nfsi, - folio); - if (req) { - WARN_ON_ONCE(req->wb_head != req); - kref_get(&req->wb_kref); - } - } - mutex_unlock(&nfsi->commit_mutex); - return req; -} - -/** - * nfs_folio_find_head_request - find head request associated with a folio - * @folio: pointer to folio - * - * returns matching head request with reference held, or NULL if not found. - */ -static struct nfs_page *nfs_folio_find_head_request(struct folio *folio) -{ - struct nfs_page *req; - - req = nfs_folio_find_private_request(folio); - if (!req) - req = nfs_folio_find_swap_request(folio); - return req; -} - static struct nfs_page *nfs_folio_find_and_lock_request(struct folio *folio) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; struct nfs_page *req, *head; int ret; @@ -261,8 +222,6 @@ static struct nfs_page *nfs_folio_find_and_lock_request(struct folio *folio) /* Ensure that nobody removed the request before we locked it */ if (head == nfs_folio_private_request(folio)) break; - if (folio_test_swapcache(folio)) - break; nfs_unlock_and_release_request(head); } return head; @@ -272,14 +231,14 @@ static struct nfs_page *nfs_folio_find_and_lock_request(struct folio *folio) static void nfs_grow_file(struct folio *folio, unsigned int offset, unsigned int count) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; loff_t end, i_size; pgoff_t end_index; spin_lock(&inode->i_lock); i_size = i_size_read(inode); end_index = ((i_size - 1) >> folio_shift(folio)) << folio_order(folio); - if (i_size > 0 && folio_index(folio) < end_index) + if (i_size > 0 && folio->index < end_index) goto out; end = folio_file_pos(folio) + (loff_t)offset + (loff_t)count; if (i_size >= end) @@ -311,7 +270,7 @@ static void nfs_set_pageerror(struct address_space *mapping) static void nfs_mapping_set_error(struct folio *folio, int error) { - struct address_space *mapping = folio_file_mapping(folio); + struct address_space *mapping = folio->mapping; folio_set_error(folio); filemap_set_wb_err(mapping, error); @@ -412,7 +371,7 @@ int nfs_congestion_kb; static void nfs_folio_set_writeback(struct folio *folio) { - struct nfs_server *nfss = NFS_SERVER(folio_file_mapping(folio)->host); + struct nfs_server *nfss = NFS_SERVER(folio->mapping->host); folio_start_writeback(folio); if (atomic_long_inc_return(&nfss->writeback) > NFS_CONGESTION_ON_THRESH) @@ -421,7 +380,7 @@ static void nfs_folio_set_writeback(struct folio *folio) static void nfs_folio_end_writeback(struct folio *folio) { - struct nfs_server *nfss = NFS_SERVER(folio_file_mapping(folio)->host); + struct nfs_server *nfss = NFS_SERVER(folio->mapping->host); folio_end_writeback(folio); if (atomic_long_dec_return(&nfss->writeback) < @@ -569,7 +528,7 @@ void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, */ static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; struct nfs_page *head; struct nfs_commit_info cinfo; int ret; @@ -645,7 +604,7 @@ static int nfs_page_async_flush(struct folio *folio, nfs_redirty_request(req); pgio->pg_error = 0; } else - nfs_add_stats(folio_file_mapping(folio)->host, + nfs_add_stats(folio->mapping->host, NFSIOS_WRITEPAGES, 1); out: return ret; @@ -657,7 +616,7 @@ out_launder: static int nfs_do_writepage(struct folio *folio, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) { - nfs_pageio_cond_complete(pgio, folio_index(folio)); + nfs_pageio_cond_complete(pgio, folio->index); return nfs_page_async_flush(folio, wbc, pgio); } @@ -668,7 +627,7 @@ static int nfs_writepage_locked(struct folio *folio, struct writeback_control *wbc) { struct nfs_pageio_descriptor pgio; - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; int err; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); @@ -751,24 +710,17 @@ out_err: static void nfs_inode_add_request(struct nfs_page *req) { struct folio *folio = nfs_page_to_folio(req); - struct address_space *mapping = folio_file_mapping(folio); + struct address_space *mapping = folio->mapping; struct nfs_inode *nfsi = NFS_I(mapping->host); WARN_ON_ONCE(req->wb_this_page != req); /* Lock the request! */ nfs_lock_request(req); - - /* - * Swap-space should not get truncated. Hence no need to plug the race - * with invalidate/truncate. - */ spin_lock(&mapping->i_private_lock); - if (likely(!folio_test_swapcache(folio))) { - set_bit(PG_MAPPED, &req->wb_flags); - folio_set_private(folio); - folio->private = req; - } + set_bit(PG_MAPPED, &req->wb_flags); + folio_set_private(folio); + folio->private = req; spin_unlock(&mapping->i_private_lock); atomic_long_inc(&nfsi->nrequests); /* this a head request for a page group - mark it as having an @@ -788,10 +740,10 @@ static void nfs_inode_remove_request(struct nfs_page *req) if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { struct folio *folio = nfs_page_to_folio(req->wb_head); - struct address_space *mapping = folio_file_mapping(folio); + struct address_space *mapping = folio->mapping; spin_lock(&mapping->i_private_lock); - if (likely(folio && !folio_test_swapcache(folio))) { + if (likely(folio)) { folio->private = NULL; folio_clear_private(folio); clear_bit(PG_MAPPED, &req->wb_head->wb_flags); @@ -812,38 +764,6 @@ static void nfs_mark_request_dirty(struct nfs_page *req) filemap_dirty_folio(folio_mapping(folio), folio); } -/* - * nfs_page_search_commits_for_head_request_locked - * - * Search through commit lists on @inode for the head request for @folio. - * Must be called while holding the inode (which is cinfo) lock. - * - * Returns the head request if found, or NULL if not found. - */ -static struct nfs_page * -nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, - struct folio *folio) -{ - struct nfs_page *freq, *t; - struct nfs_commit_info cinfo; - struct inode *inode = &nfsi->vfs_inode; - - nfs_init_cinfo_from_inode(&cinfo, inode); - - /* search through pnfs commit lists */ - freq = pnfs_search_commit_reqs(inode, &cinfo, folio); - if (freq) - return freq->wb_head; - - /* Linearly search the commit list for the correct request */ - list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) { - if (nfs_page_to_folio(freq) == folio) - return freq->wb_head; - } - - return NULL; -} - /** * nfs_request_add_commit_list_locked - add request to a commit list * @req: pointer to a struct nfs_page @@ -950,7 +870,7 @@ static void nfs_folio_clear_commit(struct folio *folio) long nr = folio_nr_pages(folio); node_stat_mod_folio(folio, NR_WRITEBACK, -nr); - wb_stat_mod(&inode_to_bdi(folio_file_mapping(folio)->host)->wb, + wb_stat_mod(&inode_to_bdi(folio->mapping->host)->wb, WB_WRITEBACK, -nr); } } @@ -1135,7 +1055,7 @@ out_flushme: */ nfs_mark_request_dirty(req); nfs_unlock_and_release_request(req); - error = nfs_wb_folio(folio_file_mapping(folio)->host, folio); + error = nfs_wb_folio(folio->mapping->host, folio); return (error < 0) ? ERR_PTR(error) : NULL; } @@ -1211,7 +1131,7 @@ int nfs_flush_incompatible(struct file *file, struct folio *folio) nfs_release_request(req); if (!do_flush) return 0; - status = nfs_wb_folio(folio_file_mapping(folio)->host, folio); + status = nfs_wb_folio(folio->mapping->host, folio); } while (status == 0); return status; } @@ -1285,7 +1205,7 @@ out: */ static bool nfs_folio_write_uptodate(struct folio *folio, unsigned int pagelen) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; struct nfs_inode *nfsi = NFS_I(inode); if (nfs_have_delegated_attributes(inode)) @@ -1363,7 +1283,7 @@ int nfs_update_folio(struct file *file, struct folio *folio, unsigned int offset, unsigned int count) { struct nfs_open_context *ctx = nfs_file_open_context(file); - struct address_space *mapping = folio_file_mapping(folio); + struct address_space *mapping = folio->mapping; struct inode *inode = mapping->host; unsigned int pagelen = nfs_folio_length(folio); int status = 0; diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 1c315f854ea8..7bc31df457ea 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -208,8 +208,8 @@ static inline struct inode *nfs_page_to_inode(const struct nfs_page *req) struct folio *folio = nfs_page_to_folio(req); if (folio == NULL) - return page_file_mapping(req->wb_page)->host; - return folio_file_mapping(folio)->host; + return req->wb_page->mapping->host; + return folio->mapping->host; } /** -- cgit v1.2.3 From 02e61ec1e2c1da136bbf7f6bbabc46733c53b035 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 1 Jul 2024 07:26:49 +0200 Subject: nfs: remove nfs_folio_private_request nfs_folio_private_request is a trivial wrapper around, which itself has fallen out of favor and has been replaced with plain ->private dereferences in recent folio conversions. Do the same for nfs. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Anna Schumaker --- fs/nfs/write.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index dc432f581fa7..a56bb49af55a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -169,11 +169,6 @@ nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) return 0; } -static struct nfs_page *nfs_folio_private_request(struct folio *folio) -{ - return folio_get_private(folio); -} - /** * nfs_folio_find_head_request - find head request associated with a folio * @folio: pointer to folio @@ -190,7 +185,7 @@ static struct nfs_page *nfs_folio_find_head_request(struct folio *folio) if (!folio_test_private(folio)) return NULL; spin_lock(&mapping->i_private_lock); - req = nfs_folio_private_request(folio); + req = folio->private; if (req) { WARN_ON_ONCE(req->wb_head != req); kref_get(&req->wb_kref); @@ -220,7 +215,7 @@ static struct nfs_page *nfs_folio_find_and_lock_request(struct folio *folio) return ERR_PTR(ret); } /* Ensure that nobody removed the request before we locked it */ - if (head == nfs_folio_private_request(folio)) + if (head == folio->private) break; nfs_unlock_and_release_request(head); } -- cgit v1.2.3 From 9eb7c484db1ae993648fc9b9d48a295f4d99afb8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 1 Jul 2024 07:26:50 +0200 Subject: nfs: simplify nfs_folio_find_and_lock_request nfs_folio_find_and_lock_request and the nfs_page_group_lock_head helper called by it spend quite some effort to deal with head vs subrequests. But given that only the head request can be stashed in the folio private data, non of that is required. Fold the locking logic from nfs_page_group_lock_head into nfs_folio_find_and_lock_request and simplify the result based on the invariant that we always find the head request in the folio private data. Signed-off-by: Christoph Hellwig Signed-off-by: Anna Schumaker --- fs/nfs/pagelist.c | 19 ------------------- fs/nfs/write.c | 38 +++++++++++++++++++++----------------- include/linux/nfs_page.h | 1 - 3 files changed, 21 insertions(+), 37 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 3b006bcbcc87..e48cc69a2361 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -187,25 +187,6 @@ nfs_async_iocounter_wait(struct rpc_task *task, struct nfs_lock_context *l_ctx) } EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait); -/* - * nfs_page_lock_head_request - page lock the head of the page group - * @req: any member of the page group - */ -struct nfs_page * -nfs_page_group_lock_head(struct nfs_page *req) -{ - struct nfs_page *head = req->wb_head; - - while (!nfs_lock_request(head)) { - int ret = nfs_wait_on_request(head); - if (ret < 0) - return ERR_PTR(ret); - } - if (head != req) - kref_get(&head->wb_kref); - return head; -} - /* * nfs_unroll_locks - unlock all newly locked reqs and wait on @req * @head: head request of page group, must be holding head lock diff --git a/fs/nfs/write.c b/fs/nfs/write.c index a56bb49af55a..69336bca26f5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -197,28 +197,32 @@ static struct nfs_page *nfs_folio_find_head_request(struct folio *folio) static struct nfs_page *nfs_folio_find_and_lock_request(struct folio *folio) { struct inode *inode = folio->mapping->host; - struct nfs_page *req, *head; + struct nfs_page *head; int ret; - for (;;) { - req = nfs_folio_find_head_request(folio); - if (!req) - return req; - head = nfs_page_group_lock_head(req); - if (head != req) - nfs_release_request(req); - if (IS_ERR(head)) - return head; - ret = nfs_cancel_remove_inode(head, inode); - if (ret < 0) { - nfs_unlock_and_release_request(head); +retry: + head = nfs_folio_find_head_request(folio); + if (!head) + return NULL; + + while (!nfs_lock_request(head)) { + ret = nfs_wait_on_request(head); + if (ret < 0) return ERR_PTR(ret); - } - /* Ensure that nobody removed the request before we locked it */ - if (head == folio->private) - break; + } + + /* Ensure that nobody removed the request before we locked it */ + if (head != folio->private) { nfs_unlock_and_release_request(head); + goto retry; } + + ret = nfs_cancel_remove_inode(head, inode); + if (ret < 0) { + nfs_unlock_and_release_request(head); + return ERR_PTR(ret); + } + return head; } diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 7bc31df457ea..e799d93626f1 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -155,7 +155,6 @@ extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); -extern struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req); extern int nfs_page_group_lock_subrequests(struct nfs_page *head); extern void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, -- cgit v1.2.3 From c3f2235782c395896e835650f25f985713146592 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 1 Jul 2024 07:26:51 +0200 Subject: nfs: fold nfs_folio_find_and_lock_request into nfs_lock_and_join_requests Fold nfs_folio_find_and_lock_request into the only caller to prepare for changes to this code. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Anna Schumaker --- fs/nfs/write.c | 68 +++++++++++++++++++++++----------------------------------- 1 file changed, 27 insertions(+), 41 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 69336bca26f5..93833f1dcbad 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -194,38 +194,6 @@ static struct nfs_page *nfs_folio_find_head_request(struct folio *folio) return req; } -static struct nfs_page *nfs_folio_find_and_lock_request(struct folio *folio) -{ - struct inode *inode = folio->mapping->host; - struct nfs_page *head; - int ret; - -retry: - head = nfs_folio_find_head_request(folio); - if (!head) - return NULL; - - while (!nfs_lock_request(head)) { - ret = nfs_wait_on_request(head); - if (ret < 0) - return ERR_PTR(ret); - } - - /* Ensure that nobody removed the request before we locked it */ - if (head != folio->private) { - nfs_unlock_and_release_request(head); - goto retry; - } - - ret = nfs_cancel_remove_inode(head, inode); - if (ret < 0) { - nfs_unlock_and_release_request(head); - return ERR_PTR(ret); - } - - return head; -} - /* Adjust the file length if we're writing beyond the end */ static void nfs_grow_file(struct folio *folio, unsigned int offset, unsigned int count) @@ -532,26 +500,44 @@ static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio) struct nfs_commit_info cinfo; int ret; - nfs_init_cinfo_from_inode(&cinfo, inode); /* * A reference is taken only on the head request which acts as a * reference to the whole page group - the group will not be destroyed * until the head reference is released. */ - head = nfs_folio_find_and_lock_request(folio); - if (IS_ERR_OR_NULL(head)) - return head; +retry: + head = nfs_folio_find_head_request(folio); + if (!head) + return NULL; - /* lock each request in the page group */ - ret = nfs_page_group_lock_subrequests(head); - if (ret < 0) { + while (!nfs_lock_request(head)) { + ret = nfs_wait_on_request(head); + if (ret < 0) + return ERR_PTR(ret); + } + + /* Ensure that nobody removed the request before we locked it */ + if (head != folio->private) { nfs_unlock_and_release_request(head); - return ERR_PTR(ret); + goto retry; } - nfs_join_page_group(head, &cinfo, inode); + ret = nfs_cancel_remove_inode(head, inode); + if (ret < 0) + goto out_unlock; + /* lock each request in the page group */ + ret = nfs_page_group_lock_subrequests(head); + if (ret < 0) + goto out_unlock; + + nfs_init_cinfo_from_inode(&cinfo, inode); + nfs_join_page_group(head, &cinfo, inode); return head; + +out_unlock: + nfs_unlock_and_release_request(head); + return ERR_PTR(ret); } static void nfs_write_error(struct nfs_page *req, int error) -- cgit v1.2.3 From 25edbcac6e32eab345e470d56ca9974a577b878b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 1 Jul 2024 07:26:52 +0200 Subject: nfs: fold nfs_page_group_lock_subrequests into nfs_lock_and_join_requests Fold nfs_page_group_lock_subrequests into nfs_lock_and_join_requests to prepare for future changes to this code, and move the helpers to write.c as well. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Anna Schumaker --- fs/nfs/pagelist.c | 77 ------------------------------------------------ fs/nfs/write.c | 67 +++++++++++++++++++++++++++++++++++++++-- include/linux/nfs_page.h | 1 - 3 files changed, 64 insertions(+), 81 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index e48cc69a2361..fa7971072900 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -187,83 +187,6 @@ nfs_async_iocounter_wait(struct rpc_task *task, struct nfs_lock_context *l_ctx) } EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait); -/* - * nfs_unroll_locks - unlock all newly locked reqs and wait on @req - * @head: head request of page group, must be holding head lock - * @req: request that couldn't lock and needs to wait on the req bit lock - * - * This is a helper function for nfs_lock_and_join_requests - * returns 0 on success, < 0 on error. - */ -static void -nfs_unroll_locks(struct nfs_page *head, struct nfs_page *req) -{ - struct nfs_page *tmp; - - /* relinquish all the locks successfully grabbed this run */ - for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) { - if (!kref_read(&tmp->wb_kref)) - continue; - nfs_unlock_and_release_request(tmp); - } -} - -/* - * nfs_page_group_lock_subreq - try to lock a subrequest - * @head: head request of page group - * @subreq: request to lock - * - * This is a helper function for nfs_lock_and_join_requests which - * must be called with the head request and page group both locked. - * On error, it returns with the page group unlocked. - */ -static int -nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq) -{ - int ret; - - if (!kref_get_unless_zero(&subreq->wb_kref)) - return 0; - while (!nfs_lock_request(subreq)) { - nfs_page_group_unlock(head); - ret = nfs_wait_on_request(subreq); - if (!ret) - ret = nfs_page_group_lock(head); - if (ret < 0) { - nfs_unroll_locks(head, subreq); - nfs_release_request(subreq); - return ret; - } - } - return 0; -} - -/* - * nfs_page_group_lock_subrequests - try to lock the subrequests - * @head: head request of page group - * - * This is a helper function for nfs_lock_and_join_requests which - * must be called with the head request locked. - */ -int nfs_page_group_lock_subrequests(struct nfs_page *head) -{ - struct nfs_page *subreq; - int ret; - - ret = nfs_page_group_lock(head); - if (ret < 0) - return ret; - /* lock each request in the page group */ - for (subreq = head->wb_this_page; subreq != head; - subreq = subreq->wb_this_page) { - ret = nfs_page_group_lock_subreq(head, subreq); - if (ret < 0) - return ret; - } - nfs_page_group_unlock(head); - return 0; -} - /* * nfs_page_set_headlock - set the request PG_HEADLOCK * @req: request that is to be locked diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 93833f1dcbad..0fe9d7bf34db 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -478,6 +478,57 @@ void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, nfs_destroy_unlinked_subrequests(destroy_list, head, inode); } +/* + * nfs_unroll_locks - unlock all newly locked reqs and wait on @req + * @head: head request of page group, must be holding head lock + * @req: request that couldn't lock and needs to wait on the req bit lock + * + * This is a helper function for nfs_lock_and_join_requests + * returns 0 on success, < 0 on error. + */ +static void +nfs_unroll_locks(struct nfs_page *head, struct nfs_page *req) +{ + struct nfs_page *tmp; + + /* relinquish all the locks successfully grabbed this run */ + for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) { + if (!kref_read(&tmp->wb_kref)) + continue; + nfs_unlock_and_release_request(tmp); + } +} + +/* + * nfs_page_group_lock_subreq - try to lock a subrequest + * @head: head request of page group + * @subreq: request to lock + * + * This is a helper function for nfs_lock_and_join_requests which + * must be called with the head request and page group both locked. + * On error, it returns with the page group unlocked. + */ +static int +nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq) +{ + int ret; + + if (!kref_get_unless_zero(&subreq->wb_kref)) + return 0; + while (!nfs_lock_request(subreq)) { + nfs_page_group_unlock(head); + ret = nfs_wait_on_request(subreq); + if (!ret) + ret = nfs_page_group_lock(head); + if (ret < 0) { + nfs_unroll_locks(head, subreq); + nfs_release_request(subreq); + return ret; + } + } + return 0; +} + /* * nfs_lock_and_join_requests - join all subreqs to the head req * @folio: the folio used to lookup the "page group" of nfs_page structures @@ -496,7 +547,7 @@ void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio) { struct inode *inode = folio->mapping->host; - struct nfs_page *head; + struct nfs_page *head, *subreq; struct nfs_commit_info cinfo; int ret; @@ -526,11 +577,21 @@ retry: if (ret < 0) goto out_unlock; - /* lock each request in the page group */ - ret = nfs_page_group_lock_subrequests(head); + ret = nfs_page_group_lock(head); if (ret < 0) goto out_unlock; + /* lock each request in the page group */ + for (subreq = head->wb_this_page; + subreq != head; + subreq = subreq->wb_this_page) { + ret = nfs_page_group_lock_subreq(head, subreq); + if (ret < 0) + goto out_unlock; + } + + nfs_page_group_unlock(head); + nfs_init_cinfo_from_inode(&cinfo, inode); nfs_join_page_group(head, &cinfo, inode); return head; diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index e799d93626f1..63eed97a18ad 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -155,7 +155,6 @@ extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); -extern int nfs_page_group_lock_subrequests(struct nfs_page *head); extern void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, struct inode *inode); -- cgit v1.2.3 From f1b7c7552cbcf89e56b15ff481f3d19b53046291 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 1 Jul 2024 07:26:53 +0200 Subject: nfs: move nfs_wait_on_request to write.c nfs_wait_on_request is now only used in write.c. Move it there and mark it static. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Anna Schumaker --- fs/nfs/pagelist.c | 19 ------------------- fs/nfs/write.c | 17 +++++++++++++++++ include/linux/nfs_page.h | 1 - 3 files changed, 17 insertions(+), 20 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index fa7971072900..04124f226665 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -598,25 +598,6 @@ void nfs_release_request(struct nfs_page *req) } EXPORT_SYMBOL_GPL(nfs_release_request); -/** - * nfs_wait_on_request - Wait for a request to complete. - * @req: request to wait upon. - * - * Interruptible by fatal signals only. - * The user is responsible for holding a count on the request. - */ -int -nfs_wait_on_request(struct nfs_page *req) -{ - if (!test_bit(PG_BUSY, &req->wb_flags)) - return 0; - set_bit(PG_CONTENDED2, &req->wb_flags); - smp_mb__after_atomic(); - return wait_on_bit_io(&req->wb_flags, PG_BUSY, - TASK_UNINTERRUPTIBLE); -} -EXPORT_SYMBOL_GPL(nfs_wait_on_request); - /* * nfs_generic_pg_test - determine if requests can be coalesced * @desc: pointer to descriptor diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0fe9d7bf34db..089c242efac0 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -478,6 +478,23 @@ void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, nfs_destroy_unlinked_subrequests(destroy_list, head, inode); } +/** + * nfs_wait_on_request - Wait for a request to complete. + * @req: request to wait upon. + * + * Interruptible by fatal signals only. + * The user is responsible for holding a count on the request. + */ +static int nfs_wait_on_request(struct nfs_page *req) +{ + if (!test_bit(PG_BUSY, &req->wb_flags)) + return 0; + set_bit(PG_CONTENDED2, &req->wb_flags); + smp_mb__after_atomic(); + return wait_on_bit_io(&req->wb_flags, PG_BUSY, + TASK_UNINTERRUPTIBLE); +} + /* * nfs_unroll_locks - unlock all newly locked reqs and wait on @req * @head: head request of page group, must be holding head lock diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 63eed97a18ad..169b4ae30ff4 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -152,7 +152,6 @@ extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t); extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req); -extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); extern void nfs_join_page_group(struct nfs_page *head, -- cgit v1.2.3 From b571cfcb9dcac187c6d967987792d37cb0688610 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 1 Jul 2024 07:26:54 +0200 Subject: nfs: don't reuse partially completed requests in nfs_lock_and_join_requests When NFS requests are split into sub-requests, nfs_inode_remove_request calls nfs_page_group_sync_on_bit to set PG_REMOVE on this sub-request and only completes the head requests once PG_REMOVE is set on all requests. This means that when nfs_lock_and_join_requests sees a PG_REMOVE bit, I/O on the request is in progress and has partially completed. If such a request is returned to nfs_try_to_update_request, it could be extended with the newly dirtied region and I/O for the combined range will be re-scheduled, leading to extra I/O. Change the logic to instead restart the search for a request when any PG_REMOVE bit is set, as the completion handler will remove the request as soon as it can take the page group lock. This not only avoid extending the I/O but also does the right thing for the callers that want to cancel or flush the request. Signed-off-by: Christoph Hellwig Signed-off-by: Anna Schumaker --- fs/nfs/write.c | 49 ++++++++++++++++++++----------------------------- 1 file changed, 20 insertions(+), 29 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 089c242efac0..f0a124b45291 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -144,31 +144,6 @@ static void nfs_io_completion_put(struct nfs_io_completion *ioc) kref_put(&ioc->refcount, nfs_io_completion_release); } -static void -nfs_page_set_inode_ref(struct nfs_page *req, struct inode *inode) -{ - if (!test_and_set_bit(PG_INODE_REF, &req->wb_flags)) { - kref_get(&req->wb_kref); - atomic_long_inc(&NFS_I(inode)->nrequests); - } -} - -static int -nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) -{ - int ret; - - if (!test_bit(PG_REMOVE, &req->wb_flags)) - return 0; - ret = nfs_page_group_lock(req); - if (ret) - return ret; - if (test_and_clear_bit(PG_REMOVE, &req->wb_flags)) - nfs_page_set_inode_ref(req, inode); - nfs_page_group_unlock(req); - return 0; -} - /** * nfs_folio_find_head_request - find head request associated with a folio * @folio: pointer to folio @@ -566,6 +541,7 @@ static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio) struct inode *inode = folio->mapping->host; struct nfs_page *head, *subreq; struct nfs_commit_info cinfo; + bool removed; int ret; /* @@ -590,18 +566,18 @@ retry: goto retry; } - ret = nfs_cancel_remove_inode(head, inode); - if (ret < 0) - goto out_unlock; - ret = nfs_page_group_lock(head); if (ret < 0) goto out_unlock; + removed = test_bit(PG_REMOVE, &head->wb_flags); + /* lock each request in the page group */ for (subreq = head->wb_this_page; subreq != head; subreq = subreq->wb_this_page) { + if (test_bit(PG_REMOVE, &subreq->wb_flags)) + removed = true; ret = nfs_page_group_lock_subreq(head, subreq); if (ret < 0) goto out_unlock; @@ -609,6 +585,21 @@ retry: nfs_page_group_unlock(head); + /* + * If PG_REMOVE is set on any request, I/O on that request has + * completed, but some requests were still under I/O at the time + * we locked the head request. + * + * In that case the above wait for all requests means that all I/O + * has now finished, and we can restart from a clean slate. Let the + * old requests go away and start from scratch instead. + */ + if (removed) { + nfs_unroll_locks(head, head); + nfs_unlock_and_release_request(head); + goto retry; + } + nfs_init_cinfo_from_inode(&cinfo, inode); nfs_join_page_group(head, &cinfo, inode); return head; -- cgit v1.2.3 From b1043a3304d24e1be39e18dcb3fdee5220561ef1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 Jul 2024 15:32:59 +0200 Subject: nfs: remove the unused max_deviceinfo_size field from struct pnfs_layoutdriver_type max_deviceinfo_size is not set anywhere, remove it. Signed-off-by: Christoph Hellwig Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.h | 1 - fs/nfs/pnfs_dev.c | 3 --- 2 files changed, 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 1fc40afcbf1f..30d2613e912b 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -133,7 +133,6 @@ struct pnfs_layoutdriver_type { const char *name; struct module *owner; unsigned flags; - unsigned max_deviceinfo_size; unsigned max_layoutget_response; int (*set_layoutdriver) (struct nfs_server *, const struct nfs_fh *); diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index 178001c90156..bf0f2d67e96c 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -110,9 +110,6 @@ nfs4_get_device_info(struct nfs_server *server, * GETDEVICEINFO's maxcount */ max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; - if (server->pnfs_curr_ld->max_deviceinfo_size && - server->pnfs_curr_ld->max_deviceinfo_size < max_resp_sz) - max_resp_sz = server->pnfs_curr_ld->max_deviceinfo_size; max_pages = nfs_page_array_len(0, max_resp_sz); dprintk("%s: server %p max_resp_sz %u max_pages %d\n", __func__, server, max_resp_sz, max_pages); -- cgit v1.2.3 From 7f296b25f2a6453bf052b03ed0676a18bee312a5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 Jul 2024 07:38:38 +0200 Subject: nfs: remove nfs_page_length The nfs_page_length is not used anywhere, remove it. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Anna Schumaker --- fs/nfs/internal.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 87ebc4608c31..5902a9beca1f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -797,25 +797,6 @@ static inline void nfs_folio_mark_unstable(struct folio *folio, } } -/* - * Determine the number of bytes of data the page contains - */ -static inline -unsigned int nfs_page_length(struct page *page) -{ - loff_t i_size = i_size_read(page->mapping->host); - - if (i_size > 0) { - pgoff_t index = page_index(page); - pgoff_t end_index = (i_size - 1) >> PAGE_SHIFT; - if (index < end_index) - return PAGE_SIZE; - if (index == end_index) - return ((i_size - 1) & ~PAGE_MASK) + 1; - } - return 0; -} - /* * Determine the number of bytes of data the page contains */ -- cgit v1.2.3 From 3921ae0850a31f06791e3c5d8f628864b5ae088a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 Jul 2024 18:51:41 +0200 Subject: nfs/blocklayout: add support for NVMe Look for the udev generated persistent device name for NVMe devices in addition to the SCSI ones and the Redhat-specific device mapper name. This is the client side implementation of RFC 9561 "Using the Parallel NFS (pNFS) SCSI Layout to Access Non-Volatile Memory Express (NVMe) Storage Devices". Note that the udev rules for nvme are a bit of a mess and udev will only create a link for the uuid if the NVMe namespace has one, and not the NGUID. As the current RFCs don't support UUID based identifications this means the layout can't be used on such namespaces out of the box. A small tweak to the udev rules can work around it, and as the real fix I will submit a draft to the IETF NFSv4 working group to support UUID-based identifiers for SCSI and NVMe. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/blocklayout/dev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index 221781fb9cf1..6252f4447945 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -404,6 +404,8 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, bdev_file = bl_open_path(v, "dm-uuid-mpath-0x"); if (IS_ERR(bdev_file)) bdev_file = bl_open_path(v, "wwn-0x"); + if (IS_ERR(bdev_file)) + bdev_file = bl_open_path(v, "nvme-eui."); if (IS_ERR(bdev_file)) { pr_warn("pNFS: no device found for volume %*phN\n", v->scsi.designator_len, v->scsi.designator); -- cgit v1.2.3 From 39c910a430370fd25d5b5e4b2f4b24581a705499 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 Jul 2024 07:42:51 +0200 Subject: nfs: do not extend writes to the entire folio nfs_update_folio has code to extend a write to the entire page under certain conditions. With the support for large folios this now suddenly extents to the variable sized and potentially much larger folio. Add code to limit the extension to the page boundaries of the start and end of the write, which matches the historic expecation and the code comments. Fixes: b73fe2dd6cd5 ("nfs: add support for large folios") Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/write.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f0a124b45291..acf2d942d78f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1351,8 +1351,12 @@ int nfs_update_folio(struct file *file, struct folio *folio, goto out; if (nfs_can_extend_write(file, folio, pagelen)) { - count = max(count + offset, pagelen); - offset = 0; + unsigned int end = count + offset; + + offset = round_down(offset, PAGE_SIZE); + if (end < pagelen) + end = min(round_up(end, PAGE_SIZE), pagelen); + count = end - offset; } status = nfs_writepage_setup(ctx, folio, offset, count); -- cgit v1.2.3 From fada32ed6dbc748f447c8d050a961b75d946055a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Jul 2024 09:17:02 +0200 Subject: nfs: pass explicit offset/count to trace events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit nfs_folio_length is unsafe to use without having the folio locked and a check for a NULL ->f_mapping that protects against truncations and can lead to kernel crashes. E.g. when running xfstests generic/065 with all nfs trace points enabled. Follow the model of the XFS trace points and pass in an explŃ–cit offset and length. This has the additional benefit that these values can be more accurate as some of the users touch partial folio ranges. Fixes: eb5654b3b89d ("NFS: Enable tracing of nfs_invalidate_folio() and nfs_launder_folio()") Reported-by: Chuck Lever Signed-off-by: Christoph Hellwig Signed-off-by: Anna Schumaker --- fs/nfs/file.c | 5 +++-- fs/nfs/nfstrace.h | 36 ++++++++++++++++++++---------------- fs/nfs/read.c | 8 +++++--- fs/nfs/write.c | 10 +++++----- 4 files changed, 33 insertions(+), 26 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 0e2f87120cb8..9aa2ab218c0a 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -436,7 +436,7 @@ static void nfs_invalidate_folio(struct folio *folio, size_t offset, /* Cancel any unstarted writes on this page */ nfs_wb_folio_cancel(inode, folio); folio_wait_private_2(folio); /* [DEPRECATED] */ - trace_nfs_invalidate_folio(inode, folio); + trace_nfs_invalidate_folio(inode, folio_pos(folio) + offset, length); } /* @@ -504,7 +504,8 @@ static int nfs_launder_folio(struct folio *folio) folio_wait_private_2(folio); /* [DEPRECATED] */ ret = nfs_wb_folio(inode, folio); - trace_nfs_launder_folio_done(inode, folio, ret); + trace_nfs_launder_folio_done(inode, folio_pos(folio), + folio_size(folio), ret); return ret; } diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 1e710654af11..352fdaed4075 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -939,10 +939,11 @@ TRACE_EVENT(nfs_sillyrename_unlink, DECLARE_EVENT_CLASS(nfs_folio_event, TP_PROTO( const struct inode *inode, - struct folio *folio + loff_t offset, + size_t count ), - TP_ARGS(inode, folio), + TP_ARGS(inode, offset, count), TP_STRUCT__entry( __field(dev_t, dev) @@ -950,7 +951,7 @@ DECLARE_EVENT_CLASS(nfs_folio_event, __field(u64, fileid) __field(u64, version) __field(loff_t, offset) - __field(u32, count) + __field(size_t, count) ), TP_fast_assign( @@ -960,13 +961,13 @@ DECLARE_EVENT_CLASS(nfs_folio_event, __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(inode); - __entry->offset = folio_file_pos(folio); - __entry->count = nfs_folio_length(folio); + __entry->offset = offset, + __entry->count = count; ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu " - "offset=%lld count=%u", + "offset=%lld count=%zu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->version, @@ -978,18 +979,20 @@ DECLARE_EVENT_CLASS(nfs_folio_event, DEFINE_EVENT(nfs_folio_event, name, \ TP_PROTO( \ const struct inode *inode, \ - struct folio *folio \ + loff_t offset, \ + size_t count \ ), \ - TP_ARGS(inode, folio)) + TP_ARGS(inode, offset, count)) DECLARE_EVENT_CLASS(nfs_folio_event_done, TP_PROTO( const struct inode *inode, - struct folio *folio, + loff_t offset, + size_t count, int ret ), - TP_ARGS(inode, folio, ret), + TP_ARGS(inode, offset, count, ret), TP_STRUCT__entry( __field(dev_t, dev) @@ -998,7 +1001,7 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done, __field(u64, fileid) __field(u64, version) __field(loff_t, offset) - __field(u32, count) + __field(size_t, count) ), TP_fast_assign( @@ -1008,14 +1011,14 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done, __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(inode); - __entry->offset = folio_file_pos(folio); - __entry->count = nfs_folio_length(folio); + __entry->offset = offset, + __entry->count = count, __entry->ret = ret; ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu " - "offset=%lld count=%u ret=%d", + "offset=%lld count=%zu ret=%d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->version, @@ -1027,10 +1030,11 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done, DEFINE_EVENT(nfs_folio_event_done, name, \ TP_PROTO( \ const struct inode *inode, \ - struct folio *folio, \ + loff_t offset, \ + size_t count, \ int ret \ ), \ - TP_ARGS(inode, folio, ret)) + TP_ARGS(inode, offset, count, ret)) DEFINE_NFS_FOLIO_EVENT(nfs_aop_readpage); DEFINE_NFS_FOLIO_EVENT_DONE(nfs_aop_readpage_done); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 036ede4875ca..6fee86a2eb42 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -333,13 +333,15 @@ out: int nfs_read_folio(struct file *file, struct folio *folio) { struct inode *inode = file_inode(file); + loff_t pos = folio_pos(folio); + size_t len = folio_size(folio); struct nfs_pageio_descriptor pgio; struct nfs_open_context *ctx; int ret; - trace_nfs_aop_readpage(inode, folio); + trace_nfs_aop_readpage(inode, pos, len); nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); - task_io_account_read(folio_size(folio)); + task_io_account_read(len); /* * Try to flush any pending writes to the file.. @@ -383,7 +385,7 @@ int nfs_read_folio(struct file *file, struct folio *folio) out_put: put_nfs_open_context(ctx); out: - trace_nfs_aop_readpage_done(inode, folio, ret); + trace_nfs_aop_readpage_done(inode, pos, len, ret); return ret; out_unlock: folio_unlock(folio); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index acf2d942d78f..b297833a4514 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -2063,17 +2063,17 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio) */ int nfs_wb_folio(struct inode *inode, struct folio *folio) { - loff_t range_start = folio_file_pos(folio); - loff_t range_end = range_start + (loff_t)folio_size(folio) - 1; + loff_t range_start = folio_pos(folio); + size_t len = folio_size(folio); struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = 0, .range_start = range_start, - .range_end = range_end, + .range_end = range_start + len - 1, }; int ret; - trace_nfs_writeback_folio(inode, folio); + trace_nfs_writeback_folio(inode, range_start, len); for (;;) { folio_wait_writeback(folio); @@ -2091,7 +2091,7 @@ int nfs_wb_folio(struct inode *inode, struct folio *folio) goto out_error; } out_error: - trace_nfs_writeback_folio_done(inode, folio, ret); + trace_nfs_writeback_folio_done(inode, range_start, len, ret); return ret; } -- cgit v1.2.3 From a308996ed7c02d1cd6504ce73436ef3f1c1c75c0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Jul 2024 09:17:03 +0200 Subject: nfs: split nfs_read_folio nfs_read_folio is a bit hard to follow because it mixes highlevel logic with the actual data read. Split the latter into a helper and update the comments to be more accurate. Signed-off-by: Christoph Hellwig Signed-off-by: Anna Schumaker --- fs/nfs/read.c | 69 +++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 28 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 6fee86a2eb42..77fa5458b4bd 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -325,18 +325,52 @@ out: } /* - * Read a page over NFS. - * We read the page synchronously in the following case: - * - The error flag is set for this page. This happens only when a - * previous async read operation failed. + * Actually read a folio over the wire. + */ +static int nfs_do_read_folio(struct file *file, struct folio *folio) +{ + struct inode *inode = file_inode(file); + struct nfs_pageio_descriptor pgio; + struct nfs_open_context *ctx; + int ret; + + ctx = get_nfs_open_context(nfs_file_open_context(file)); + + xchg(&ctx->error, 0); + nfs_pageio_init_read(&pgio, inode, false, + &nfs_async_read_completion_ops); + + ret = nfs_read_add_folio(&pgio, ctx, folio); + if (ret) + goto out_put; + + nfs_pageio_complete_read(&pgio); + nfs_update_delegated_atime(inode); + if (pgio.pg_error < 0) { + ret = pgio.pg_error; + goto out_put; + } + + ret = folio_wait_locked_killable(folio); + if (!folio_test_uptodate(folio) && !ret) + ret = xchg(&ctx->error, 0); + +out_put: + put_nfs_open_context(ctx); + return ret; +} + +/* + * Synchronously read a folio. + * + * This is not heavily used as most users to try an asynchronous + * large read through ->readahead first. */ int nfs_read_folio(struct file *file, struct folio *folio) { struct inode *inode = file_inode(file); loff_t pos = folio_pos(folio); size_t len = folio_size(folio); - struct nfs_pageio_descriptor pgio; - struct nfs_open_context *ctx; int ret; trace_nfs_aop_readpage(inode, pos, len); @@ -361,29 +395,8 @@ int nfs_read_folio(struct file *file, struct folio *folio) goto out_unlock; ret = nfs_netfs_read_folio(file, folio); - if (!ret) - goto out; - - ctx = get_nfs_open_context(nfs_file_open_context(file)); - - xchg(&ctx->error, 0); - nfs_pageio_init_read(&pgio, inode, false, - &nfs_async_read_completion_ops); - - ret = nfs_read_add_folio(&pgio, ctx, folio); if (ret) - goto out_put; - - nfs_pageio_complete_read(&pgio); - nfs_update_delegated_atime(inode); - ret = pgio.pg_error < 0 ? pgio.pg_error : 0; - if (!ret) { - ret = folio_wait_locked_killable(folio); - if (!folio_test_uptodate(folio) && !ret) - ret = xchg(&ctx->error, 0); - } -out_put: - put_nfs_open_context(ctx); + ret = nfs_do_read_folio(file, folio); out: trace_nfs_aop_readpage_done(inode, pos, len, ret); return ret; -- cgit v1.2.3