summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_addr.c3
-rw-r--r--fs/afs/file.c3
-rw-r--r--fs/bcachefs/acl.c11
-rw-r--r--fs/bcachefs/acl.h2
-rw-r--r--fs/bcachefs/alloc_foreground.c2
-rw-r--r--fs/bcachefs/bcachefs_format.h3
-rw-r--r--fs/bcachefs/buckets.c12
-rw-r--r--fs/bcachefs/buckets.h2
-rw-r--r--fs/bcachefs/disk_accounting.c65
-rw-r--r--fs/bcachefs/disk_accounting_format.h15
-rw-r--r--fs/bcachefs/fs.c8
-rw-r--r--fs/bcachefs/replicas.c1
-rw-r--r--fs/bcachefs/sb-downgrade.c27
-rw-r--r--fs/bcachefs/sb-errors_format.h6
-rw-r--r--fs/binfmt_flat.c4
-rw-r--r--fs/btrfs/delayed-ref.c67
-rw-r--r--fs/btrfs/delayed-ref.h2
-rw-r--r--fs/btrfs/extent-tree.c51
-rw-r--r--fs/btrfs/extent_io.c14
-rw-r--r--fs/btrfs/extent_map.c22
-rw-r--r--fs/btrfs/send.c52
-rw-r--r--fs/btrfs/super.c10
-rw-r--r--fs/btrfs/tree-checker.c5
-rw-r--r--fs/ceph/addr.c28
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/exec.c8
-rw-r--r--fs/file.c30
-rw-r--r--fs/inode.c39
-rw-r--r--fs/libfs.c35
-rw-r--r--fs/locks.c2
-rw-r--r--fs/netfs/Kconfig2
-rw-r--r--fs/netfs/buffered_read.c123
-rw-r--r--fs/netfs/buffered_write.c2
-rw-r--r--fs/netfs/fscache_cookie.c4
-rw-r--r--fs/netfs/io.c161
-rw-r--r--fs/netfs/objects.c10
-rw-r--r--fs/netfs/write_issue.c4
-rw-r--r--fs/nfs/fscache.c5
-rw-r--r--fs/nfs/fscache.h2
-rw-r--r--fs/nfsd/nfsctl.c3
-rw-r--r--fs/smb/client/cifs_debug.c2
-rw-r--r--fs/smb/client/cifsglob.h12
-rw-r--r--fs/smb/client/file.c3
-rw-r--r--fs/smb/client/misc.c11
-rw-r--r--fs/smb/client/smb2pdu.c3
-rw-r--r--fs/smb/client/smbdirect.c8
-rw-r--r--fs/smb/client/transport.c2
-rw-r--r--fs/smb/server/mgmt/share_config.c15
-rw-r--r--fs/smb/server/mgmt/share_config.h4
-rw-r--r--fs/smb/server/mgmt/tree_connect.c9
-rw-r--r--fs/smb/server/mgmt/tree_connect.h4
-rw-r--r--fs/smb/server/smb2pdu.c9
-rw-r--r--fs/smb/server/smb_common.c9
-rw-r--r--fs/smb/server/smb_common.h2
-rw-r--r--fs/squashfs/inode.c7
55 files changed, 765 insertions, 182 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index a97ceb105cd8..24fdc74caeba 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -75,7 +75,8 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
/* if we just extended the file size, any portion not in
* cache won't be on server and is zeroes */
- __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+ if (subreq->rreq->origin != NETFS_DIO_READ)
+ __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
netfs_subreq_terminated(subreq, err ?: total, false);
}
diff --git a/fs/afs/file.c b/fs/afs/file.c
index c3f0c45ae9a9..ec1be0091fdb 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -242,7 +242,8 @@ static void afs_fetch_data_notify(struct afs_operation *op)
req->error = error;
if (subreq) {
- __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+ if (subreq->rreq->origin != NETFS_DIO_READ)
+ __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
netfs_subreq_terminated(subreq, error ?: req->actual_len, false);
req->subreq = NULL;
} else if (req->done) {
diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c
index a7b425d3c8a0..331a17f3f113 100644
--- a/fs/bcachefs/acl.c
+++ b/fs/bcachefs/acl.c
@@ -272,16 +272,19 @@ bch2_acl_to_xattr(struct btree_trans *trans,
return xattr;
}
-struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap,
- struct dentry *dentry, int type)
+struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu)
{
- struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
+ struct bch_inode_info *inode = to_bch_ei(vinode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0);
- struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter = { NULL };
struct posix_acl *acl = NULL;
+
+ if (rcu)
+ return ERR_PTR(-ECHILD);
+
+ struct btree_trans *trans = bch2_trans_get(c);
retry:
bch2_trans_begin(trans);
diff --git a/fs/bcachefs/acl.h b/fs/bcachefs/acl.h
index 27e7eec0f278..fe730a6bf0c1 100644
--- a/fs/bcachefs/acl.h
+++ b/fs/bcachefs/acl.h
@@ -28,7 +28,7 @@ void bch2_acl_to_text(struct printbuf *, const void *, size_t);
#ifdef CONFIG_BCACHEFS_POSIX_ACL
-struct posix_acl *bch2_get_acl(struct mnt_idmap *, struct dentry *, int);
+struct posix_acl *bch2_get_acl(struct inode *, int, bool);
int bch2_set_acl_trans(struct btree_trans *, subvol_inum,
struct bch_inode_unpacked *,
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 02de5ad2be2c..8563c2d26847 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -1740,7 +1740,7 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
printbuf_tabstop_push(out, 16);
printbuf_tabstop_push(out, 16);
- bch2_dev_usage_to_text(out, &stats);
+ bch2_dev_usage_to_text(out, ca, &stats);
prt_newline(out);
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index ad893684db52..b25f86356728 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -675,7 +675,8 @@ struct bch_sb_field_ext {
x(btree_subvolume_children, BCH_VERSION(1, 6)) \
x(mi_btree_bitmap, BCH_VERSION(1, 7)) \
x(bucket_stripe_sectors, BCH_VERSION(1, 8)) \
- x(disk_accounting_v2, BCH_VERSION(1, 9))
+ x(disk_accounting_v2, BCH_VERSION(1, 9)) \
+ x(disk_accounting_v3, BCH_VERSION(1, 10))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 2650a0d24663..9f7004e941ce 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -71,17 +71,21 @@ bch2_fs_usage_read_short(struct bch_fs *c)
return ret;
}
-void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev_usage *usage)
+void bch2_dev_usage_to_text(struct printbuf *out,
+ struct bch_dev *ca,
+ struct bch_dev_usage *usage)
{
prt_printf(out, "\tbuckets\rsectors\rfragmented\r\n");
for (unsigned i = 0; i < BCH_DATA_NR; i++) {
bch2_prt_data_type(out, i);
prt_printf(out, "\t%llu\r%llu\r%llu\r\n",
- usage->d[i].buckets,
- usage->d[i].sectors,
- usage->d[i].fragmented);
+ usage->d[i].buckets,
+ usage->d[i].sectors,
+ usage->d[i].fragmented);
}
+
+ prt_printf(out, "capacity\t%llu\r\n", ca->mi.nbuckets);
}
static int bch2_check_fix_ptr(struct btree_trans *trans,
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index 2d35eeb24a2d..edbdffd508fc 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -212,7 +212,7 @@ static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca)
return ret;
}
-void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev_usage *);
+void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev *, struct bch_dev_usage *);
static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_watermark watermark)
{
diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c
index dcdd59249c23..046ac92b6639 100644
--- a/fs/bcachefs/disk_accounting.c
+++ b/fs/bcachefs/disk_accounting.c
@@ -114,11 +114,74 @@ int bch2_mod_dev_cached_sectors(struct btree_trans *trans,
return bch2_disk_accounting_mod(trans, &acc, &sectors, 1, gc);
}
+static inline bool is_zero(char *start, char *end)
+{
+ BUG_ON(start > end);
+
+ for (; start < end; start++)
+ if (*start)
+ return false;
+ return true;
+}
+
+#define field_end(p, member) (((void *) (&p.member)) + sizeof(p.member))
+
int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k,
enum bch_validate_flags flags,
struct printbuf *err)
{
- return 0;
+ struct disk_accounting_pos acc_k;
+ bpos_to_disk_accounting_pos(&acc_k, k.k->p);
+ void *end = &acc_k + 1;
+ int ret = 0;
+
+ switch (acc_k.type) {
+ case BCH_DISK_ACCOUNTING_nr_inodes:
+ end = field_end(acc_k, nr_inodes);
+ break;
+ case BCH_DISK_ACCOUNTING_persistent_reserved:
+ end = field_end(acc_k, persistent_reserved);
+ break;
+ case BCH_DISK_ACCOUNTING_replicas:
+ bkey_fsck_err_on(!acc_k.replicas.nr_devs,
+ c, err, accounting_key_replicas_nr_devs_0,
+ "accounting key replicas entry with nr_devs=0");
+
+ bkey_fsck_err_on(acc_k.replicas.nr_required > acc_k.replicas.nr_devs ||
+ (acc_k.replicas.nr_required > 1 &&
+ acc_k.replicas.nr_required == acc_k.replicas.nr_devs),
+ c, err, accounting_key_replicas_nr_required_bad,
+ "accounting key replicas entry with bad nr_required");
+
+ for (unsigned i = 0; i + 1 < acc_k.replicas.nr_devs; i++)
+ bkey_fsck_err_on(acc_k.replicas.devs[i] > acc_k.replicas.devs[i + 1],
+ c, err, accounting_key_replicas_devs_unsorted,
+ "accounting key replicas entry with unsorted devs");
+
+ end = (void *) &acc_k.replicas + replicas_entry_bytes(&acc_k.replicas);
+ break;
+ case BCH_DISK_ACCOUNTING_dev_data_type:
+ end = field_end(acc_k, dev_data_type);
+ break;
+ case BCH_DISK_ACCOUNTING_compression:
+ end = field_end(acc_k, compression);
+ break;
+ case BCH_DISK_ACCOUNTING_snapshot:
+ end = field_end(acc_k, snapshot);
+ break;
+ case BCH_DISK_ACCOUNTING_btree:
+ end = field_end(acc_k, btree);
+ break;
+ case BCH_DISK_ACCOUNTING_rebalance_work:
+ end = field_end(acc_k, rebalance_work);
+ break;
+ }
+
+ bkey_fsck_err_on(!is_zero(end, (void *) (&acc_k + 1)),
+ c, err, accounting_key_junk_at_end,
+ "junk at end of accounting key");
+fsck_err:
+ return ret;
}
void bch2_accounting_key_to_text(struct printbuf *out, struct disk_accounting_pos *k)
diff --git a/fs/bcachefs/disk_accounting_format.h b/fs/bcachefs/disk_accounting_format.h
index cba417060b33..a93cf26ff4a9 100644
--- a/fs/bcachefs/disk_accounting_format.h
+++ b/fs/bcachefs/disk_accounting_format.h
@@ -124,20 +124,19 @@ struct bch_dev_data_type {
__u8 data_type;
};
-struct bch_dev_stripe_buckets {
- __u8 dev;
-};
-
struct bch_acct_compression {
__u8 type;
};
struct bch_acct_snapshot {
__u32 id;
-};
+} __packed;
struct bch_acct_btree {
__u32 id;
+} __packed;
+
+struct bch_acct_rebalance_work {
};
struct disk_accounting_pos {
@@ -149,12 +148,12 @@ struct disk_accounting_pos {
struct bch_persistent_reserved persistent_reserved;
struct bch_replicas_entry_v1 replicas;
struct bch_dev_data_type dev_data_type;
- struct bch_dev_stripe_buckets dev_stripe_buckets;
struct bch_acct_compression compression;
struct bch_acct_snapshot snapshot;
struct bch_acct_btree btree;
- };
- };
+ struct bch_acct_rebalance_work rebalance_work;
+ } __packed;
+ } __packed;
struct bpos _pad;
};
};
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 3a5f49affa0a..15fc41e63b6c 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -1199,7 +1199,7 @@ static const struct inode_operations bch_file_inode_operations = {
.fiemap = bch2_fiemap,
.listxattr = bch2_xattr_list,
#ifdef CONFIG_BCACHEFS_POSIX_ACL
- .get_acl = bch2_get_acl,
+ .get_inode_acl = bch2_get_acl,
.set_acl = bch2_set_acl,
#endif
};
@@ -1219,7 +1219,7 @@ static const struct inode_operations bch_dir_inode_operations = {
.tmpfile = bch2_tmpfile,
.listxattr = bch2_xattr_list,
#ifdef CONFIG_BCACHEFS_POSIX_ACL
- .get_acl = bch2_get_acl,
+ .get_inode_acl = bch2_get_acl,
.set_acl = bch2_set_acl,
#endif
};
@@ -1241,7 +1241,7 @@ static const struct inode_operations bch_symlink_inode_operations = {
.setattr = bch2_setattr,
.listxattr = bch2_xattr_list,
#ifdef CONFIG_BCACHEFS_POSIX_ACL
- .get_acl = bch2_get_acl,
+ .get_inode_acl = bch2_get_acl,
.set_acl = bch2_set_acl,
#endif
};
@@ -1251,7 +1251,7 @@ static const struct inode_operations bch_special_inode_operations = {
.setattr = bch2_setattr,
.listxattr = bch2_xattr_list,
#ifdef CONFIG_BCACHEFS_POSIX_ACL
- .get_acl = bch2_get_acl,
+ .get_inode_acl = bch2_get_acl,
.set_acl = bch2_set_acl,
#endif
};
diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
index 10c96cb2047a..1223b710755d 100644
--- a/fs/bcachefs/replicas.c
+++ b/fs/bcachefs/replicas.c
@@ -24,7 +24,6 @@ static int bch2_memcmp(const void *l, const void *r, const void *priv)
static void verify_replicas_entry(struct bch_replicas_entry_v1 *e)
{
#ifdef CONFIG_BCACHEFS_DEBUG
- BUG_ON(e->data_type >= BCH_DATA_NR);
BUG_ON(!e->nr_devs);
BUG_ON(e->nr_required > 1 &&
e->nr_required >= e->nr_devs);
diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
index dfbbd33c8731..6c4469f53313 100644
--- a/fs/bcachefs/sb-downgrade.c
+++ b/fs/bcachefs/sb-downgrade.c
@@ -61,7 +61,18 @@
BCH_FSCK_ERR_dev_usage_buckets_wrong, \
BCH_FSCK_ERR_dev_usage_sectors_wrong, \
BCH_FSCK_ERR_dev_usage_fragmented_wrong, \
- BCH_FSCK_ERR_accounting_mismatch)
+ BCH_FSCK_ERR_accounting_mismatch) \
+ x(disk_accounting_v3, \
+ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
+ BCH_FSCK_ERR_bkey_version_in_future, \
+ BCH_FSCK_ERR_dev_usage_buckets_wrong, \
+ BCH_FSCK_ERR_dev_usage_sectors_wrong, \
+ BCH_FSCK_ERR_dev_usage_fragmented_wrong, \
+ BCH_FSCK_ERR_accounting_mismatch, \
+ BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \
+ BCH_FSCK_ERR_accounting_key_replicas_nr_required_bad, \
+ BCH_FSCK_ERR_accounting_key_replicas_devs_unsorted, \
+ BCH_FSCK_ERR_accounting_key_junk_at_end)
#define DOWNGRADE_TABLE() \
x(bucket_stripe_sectors, \
@@ -79,6 +90,20 @@
BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \
BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \
BCH_FSCK_ERR_fs_usage_replicas_wrong, \
+ BCH_FSCK_ERR_bkey_version_in_future) \
+ x(disk_accounting_v3, \
+ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
+ BCH_FSCK_ERR_dev_usage_buckets_wrong, \
+ BCH_FSCK_ERR_dev_usage_sectors_wrong, \
+ BCH_FSCK_ERR_dev_usage_fragmented_wrong, \
+ BCH_FSCK_ERR_fs_usage_hidden_wrong, \
+ BCH_FSCK_ERR_fs_usage_btree_wrong, \
+ BCH_FSCK_ERR_fs_usage_data_wrong, \
+ BCH_FSCK_ERR_fs_usage_cached_wrong, \
+ BCH_FSCK_ERR_fs_usage_reserved_wrong, \
+ BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \
+ BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \
+ BCH_FSCK_ERR_fs_usage_replicas_wrong, \
BCH_FSCK_ERR_bkey_version_in_future)
struct upgrade_downgrade_entry {
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index d1b2f2aa397a..d3a498617303 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -287,7 +287,11 @@ enum bch_fsck_flags {
x(accounting_replicas_not_marked, 273, 0) \
x(invalid_btree_id, 274, 0) \
x(alloc_key_io_time_bad, 275, 0) \
- x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX)
+ x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \
+ x(accounting_key_junk_at_end, 277, 0) \
+ x(accounting_key_replicas_nr_devs_0, 278, 0) \
+ x(accounting_key_replicas_nr_required_bad, 279, 0) \
+ x(accounting_key_replicas_devs_unsorted, 280, 0) \
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index c26545d71d39..cd6d5bbb4b9d 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -72,8 +72,10 @@
#ifdef CONFIG_BINFMT_FLAT_NO_DATA_START_OFFSET
#define DATA_START_OFFSET_WORDS (0)
+#define MAX_SHARED_LIBS_UPDATE (0)
#else
#define DATA_START_OFFSET_WORDS (MAX_SHARED_LIBS)
+#define MAX_SHARED_LIBS_UPDATE (MAX_SHARED_LIBS)
#endif
struct lib_info {
@@ -880,7 +882,7 @@ static int load_flat_binary(struct linux_binprm *bprm)
return res;
/* Update data segment pointers for all libraries */
- for (i = 0; i < MAX_SHARED_LIBS; i++) {
+ for (i = 0; i < MAX_SHARED_LIBS_UPDATE; i++) {
if (!libinfo.lib_list[i].loaded)
continue;
for (j = 0; j < MAX_SHARED_LIBS; j++) {
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 2ac9296edccb..06a9e0542d70 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -1134,6 +1134,73 @@ btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 byt
return find_ref_head(delayed_refs, bytenr, false);
}
+static int find_comp(struct btrfs_delayed_ref_node *entry, u64 root, u64 parent)
+{
+ int type = parent ? BTRFS_SHARED_BLOCK_REF_KEY : BTRFS_TREE_BLOCK_REF_KEY;
+
+ if (type < entry->type)
+ return -1;
+ if (type > entry->type)
+ return 1;
+
+ if (type == BTRFS_TREE_BLOCK_REF_KEY) {
+ if (root < entry->ref_root)
+ return -1;
+ if (root > entry->ref_root)
+ return 1;
+ } else {
+ if (parent < entry->parent)
+ return -1;
+ if (parent > entry->parent)
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Check to see if a given root/parent reference is attached to the head. This
+ * only checks for BTRFS_ADD_DELAYED_REF references that match, as that
+ * indicates the reference exists for the given root or parent. This is for
+ * tree blocks only.
+ *
+ * @head: the head of the bytenr we're searching.
+ * @root: the root objectid of the reference if it is a normal reference.
+ * @parent: the parent if this is a shared backref.
+ */
+bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head,
+ u64 root, u64 parent)
+{
+ struct rb_node *node;
+ bool found = false;
+
+ lockdep_assert_held(&head->mutex);
+
+ spin_lock(&head->lock);
+ node = head->ref_tree.rb_root.rb_node;
+ while (node) {
+ struct btrfs_delayed_ref_node *entry;
+ int ret;
+
+ entry = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
+ ret = find_comp(entry, root, parent);
+ if (ret < 0) {
+ node = node->rb_left;
+ } else if (ret > 0) {
+ node = node->rb_right;
+ } else {
+ /*
+ * We only want to count ADD actions, as drops mean the
+ * ref doesn't exist.
+ */
+ if (entry->action == BTRFS_ADD_DELAYED_REF)
+ found = true;
+ break;
+ }
+ }
+ spin_unlock(&head->lock);
+ return found;
+}
+
void __cold btrfs_delayed_ref_exit(void)
{
kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index ef15e998be03..05f634eb472d 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -389,6 +389,8 @@ void btrfs_dec_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info);
int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
enum btrfs_reserve_flush_enum flush);
bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info);
+bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head,
+ u64 root, u64 parent);
static inline u64 btrfs_delayed_ref_owner(struct btrfs_delayed_ref_node *node)
{
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index ff9f0d41987e..feec49e6f9c8 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5472,23 +5472,62 @@ static int check_ref_exists(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr, u64 parent,
int level)
{
+ struct btrfs_delayed_ref_root *delayed_refs;
+ struct btrfs_delayed_ref_head *head;
struct btrfs_path *path;
struct btrfs_extent_inline_ref *iref;
int ret;
+ bool exists = false;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
-
+again:
ret = lookup_extent_backref(trans, path, &iref, bytenr,
root->fs_info->nodesize, parent,
btrfs_root_id(root), level, 0);
+ if (ret != -ENOENT) {
+ /*
+ * If we get 0 then we found our reference, return 1, else
+ * return the error if it's not -ENOENT;
+ */
+ btrfs_free_path(path);
+ return (ret < 0 ) ? ret : 1;
+ }
+
+ /*
+ * We could have a delayed ref with this reference, so look it up while
+ * we're holding the path open to make sure we don't race with the
+ * delayed ref running.
+ */
+ delayed_refs = &trans->transaction->delayed_refs;
+ spin_lock(&delayed_refs->lock);
+ head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
+ if (!head)
+ goto out;
+ if (!mutex_trylock(&head->mutex)) {
+ /*
+ * We're contended, means that the delayed ref is running, get a
+ * reference and wait for the ref head to be complete and then
+ * try again.
+ */
+ refcount_inc(&head->refs);
+ spin_unlock(&delayed_refs->lock);
+
+ btrfs_release_path(path);
+
+ mutex_lock(&head->mutex);
+ mutex_unlock(&head->mutex);
+ btrfs_put_delayed_ref_head(head);
+ goto again;
+ }
+
+ exists = btrfs_find_delayed_tree_ref(head, root->root_key.objectid, parent);
+ mutex_unlock(&head->mutex);
+out:
+ spin_unlock(&delayed_refs->lock);
btrfs_free_path(path);
- if (ret == -ENOENT)
- return 0;
- if (ret < 0)
- return ret;
- return 1;
+ return exists ? 1 : 0;
}
/*
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index aa7f8148cd0d..c73cd4f89015 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1496,6 +1496,13 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
free_extent_map(em);
em = NULL;
+ /*
+ * Although the PageDirty bit might be cleared before entering
+ * this function, subpage dirty bit is not cleared.
+ * So clear subpage dirty bit here so next time we won't submit
+ * page for range already written to disk.
+ */
+ btrfs_folio_clear_dirty(fs_info, page_folio(page), cur, iosize);
btrfs_set_range_writeback(inode, cur, cur + iosize - 1);
if (!PageWriteback(page)) {
btrfs_err(inode->root->fs_info,
@@ -1503,13 +1510,6 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
page->index, cur, end);
}
- /*
- * Although the PageDirty bit is cleared before entering this
- * function, subpage dirty bit is not cleared.
- * So clear subpage dirty bit here so next time we won't submit
- * page for range already written to disk.
- */
- btrfs_folio_clear_dirty(fs_info, page_folio(page), cur, iosize);
submit_extent_page(bio_ctrl, disk_bytenr, page, iosize,
cur - page_offset(page));
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 23b65dc73c00..10ac5f657e38 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -1147,8 +1147,7 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_c
return 0;
/*
- * We want to be fast because we can be called from any path trying to
- * allocate memory, so if the lock is busy we don't want to spend time
+ * We want to be fast so if the lock is busy we don't want to spend time
* waiting for it - either some task is about to do IO for the inode or
* we may have another task shrinking extent maps, here in this code, so
* skip this inode.
@@ -1191,9 +1190,7 @@ next:
/*
* Stop if we need to reschedule or there's contention on the
* lock. This is to avoid slowing other tasks trying to take the
- * lock and because the shrinker might be called during a memory
- * allocation path and we want to avoid taking a very long time
- * and slowing down all sorts of tasks.
+ * lock.
*/
if (need_resched() || rwlock_needbreak(&tree->lock))
break;
@@ -1222,12 +1219,7 @@ static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx
if (ctx->scanned >= ctx->nr_to_scan)
break;
- /*
- * We may be called from memory allocation paths, so we don't
- * want to take too much time and slowdown tasks.
- */
- if (need_resched())
- break;
+ cond_resched();
inode = btrfs_find_first_inode(root, min_ino);
}
@@ -1285,14 +1277,12 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan)
ctx.last_ino);
}
- /*
- * We may be called from memory allocation paths, so we don't want to
- * take too much time and slowdown tasks, so stop if we need reschedule.
- */
- while (ctx.scanned < ctx.nr_to_scan && !need_resched()) {
+ while (ctx.scanned < ctx.nr_to_scan) {
struct btrfs_root *root;
unsigned long count;
+ cond_resched();
+
spin_lock(&fs_info->fs_roots_radix_lock);
count = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
(void **)&root,
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 4ca711a773ef..7fc692fc76e1 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -6157,25 +6157,51 @@ static int send_write_or_clone(struct send_ctx *sctx,
u64 offset = key->offset;
u64 end;
u64 bs = sctx->send_root->fs_info->sectorsize;
+ struct btrfs_file_extent_item *ei;
+ u64 disk_byte;
+ u64 data_offset;
+ u64 num_bytes;
+ struct btrfs_inode_info info = { 0 };
end = min_t(u64, btrfs_file_extent_end(path), sctx->cur_inode_size);
if (offset >= end)
return 0;
- if (clone_root && IS_ALIGNED(end, bs)) {
- struct btrfs_file_extent_item *ei;
- u64 disk_byte;
- u64 data_offset;
+ num_bytes = end - offset;
- ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
- struct btrfs_file_extent_item);
- disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei);
- data_offset = btrfs_file_extent_offset(path->nodes[0], ei);
- ret = clone_range(sctx, path, clone_root, disk_byte,
- data_offset, offset, end - offset);
- } else {
- ret = send_extent_data(sctx, path, offset, end - offset);
- }
+ if (!clone_root)
+ goto write_data;
+
+ if (IS_ALIGNED(end, bs))
+ goto clone_data;
+
+ /*
+ * If the extent end is not aligned, we can clone if the extent ends at
+ * the i_size of the inode and the clone range ends at the i_size of the
+ * source inode, otherwise the clone operation fails with -EINVAL.
+ */
+ if (end != sctx->cur_inode_size)
+ goto write_data;
+
+ ret = get_inode_info(clone_root->root, clone_root->ino, &info);
+ if (ret < 0)
+ return ret;
+
+ if (clone_root->offset + num_bytes == info.size)
+ goto clone_data;
+
+write_data:
+ ret = send_extent_data(sctx, path, offset, num_bytes);
+ sctx->cur_inode_next_write_offset = end;
+ return ret;
+
+clone_data:
+ ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_file_extent_item);
+ disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei);
+ data_offset = btrfs_file_extent_offset(path->nodes[0], ei);
+ ret = clone_range(sctx, path, clone_root, disk_byte, data_offset, offset,
+ num_bytes);
sctx->cur_inode_next_write_offset = end;
return ret;
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 83478deada3b..11044e9e2cb1 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -28,6 +28,7 @@
#include <linux/btrfs.h>
#include <linux/security.h>
#include <linux/fs_parser.h>
+#include <linux/swap.h>
#include "messages.h"
#include "delayed-inode.h"
#include "ctree.h"
@@ -2409,6 +2410,15 @@ static long btrfs_free_cached_objects(struct super_block *sb, struct shrink_cont
const long nr_to_scan = min_t(unsigned long, LONG_MAX, sc->nr_to_scan);
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
+ /*
+ * We may be called from any task trying to allocate memory and we don't
+ * want to slow it down with scanning and dropping extent maps. It would
+ * also cause heavy lock contention if many tasks concurrently enter
+ * here. Therefore only allow kswapd tasks to scan and drop extent maps.
+ */
+ if (!current_is_kswapd())
+ return 0;
+
return btrfs_free_extent_maps(fs_info, nr_to_scan);
}
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index a825fa598e3c..6f1e2f2215d9 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -569,9 +569,10 @@ static int check_dir_item(struct extent_buffer *leaf,
/* dir type check */
dir_type = btrfs_dir_ftype(leaf, di);
- if (unlikely(dir_type >= BTRFS_FT_MAX)) {
+ if (unlikely(dir_type <= BTRFS_FT_UNKNOWN ||
+ dir_type >= BTRFS_FT_MAX)) {
dir_item_err(leaf, slot,
- "invalid dir item type, have %u expect [0, %u)",
+ "invalid dir item type, have %u expect (0, %u)",
dir_type, BTRFS_FT_MAX);
return -EUCLEAN;
}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 8c16bc5250ef..c4744a02db75 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -246,7 +246,8 @@ static void finish_netfs_read(struct ceph_osd_request *req)
if (err >= 0) {
if (sparse && err > 0)
err = ceph_sparse_ext_map_end(op);
- if (err < subreq->len)
+ if (err < subreq->len &&
+ subreq->rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
if (IS_ENCRYPTED(inode) && err > 0) {
err = ceph_fscrypt_decrypt_extents(inode,
@@ -282,7 +283,8 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
size_t len;
int mode;
- __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+ if (rreq->origin != NETFS_DIO_READ)
+ __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
__clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
if (subreq->start >= inode->i_size)
@@ -424,6 +426,9 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
struct ceph_netfs_request_data *priv;
int ret = 0;
+ /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
+ __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags);
+
if (rreq->origin != NETFS_READAHEAD)
return 0;
@@ -498,6 +503,11 @@ const struct netfs_request_ops ceph_netfs_ops = {
};
#ifdef CONFIG_CEPH_FSCACHE
+static void ceph_set_page_fscache(struct page *page)
+{
+ folio_start_private_2(page_folio(page)); /* [DEPRECATED] */
+}
+
static void ceph_fscache_write_terminated(void *priv, ssize_t error, bool was_async)
{
struct inode *inode = priv;
@@ -515,6 +525,10 @@ static void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, b
ceph_fscache_write_terminated, inode, true, caching);
}
#else
+static inline void ceph_set_page_fscache(struct page *page)
+{
+}
+
static inline void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching)
{
}
@@ -706,6 +720,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
len = wlen;
set_page_writeback(page);
+ if (caching)
+ ceph_set_page_fscache(page);
ceph_fscache_write_to_cache(inode, page_off, len, caching);
if (IS_ENCRYPTED(inode)) {
@@ -789,6 +805,8 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc)
return AOP_WRITEPAGE_ACTIVATE;
}
+ folio_wait_private_2(page_folio(page)); /* [DEPRECATED] */
+
err = writepage_nounlock(page, wbc);
if (err == -ERESTARTSYS) {
/* direct memory reclaimer was killed by SIGKILL. return 0
@@ -1062,7 +1080,8 @@ get_more_pages:
unlock_page(page);
break;
}
- if (PageWriteback(page)) {
+ if (PageWriteback(page) ||
+ PagePrivate2(page) /* [DEPRECATED] */) {
if (wbc->sync_mode == WB_SYNC_NONE) {
doutc(cl, "%p under writeback\n", page);
unlock_page(page);
@@ -1070,6 +1089,7 @@ get_more_pages:
}
doutc(cl, "waiting on writeback %p\n", page);
wait_on_page_writeback(page);
+ folio_wait_private_2(page_folio(page)); /* [DEPRECATED] */
}
if (!clear_page_dirty_for_io(page)) {
@@ -1254,6 +1274,8 @@ new_request:
}
set_page_writeback(page);
+ if (caching)
+ ceph_set_page_fscache(page);
len += thp_size(page);
}
ceph_fscache_write_to_cache(inode, offset, len, caching);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 8f8de8f33abb..71cd70514efa 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -577,8 +577,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
/* Set parameters for the netfs library */
netfs_inode_init(&ci->netfs, &ceph_netfs_ops, false);
- /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
- __set_bit(NETFS_ICTX_USE_PGPRIV2, &ci->netfs.flags);
spin_lock_init(&ci->i_ceph_lock);
diff --git a/fs/exec.c b/fs/exec.c
index a126e3d1cacb..50e76cc633c4 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1692,6 +1692,7 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file)
unsigned int mode;
vfsuid_t vfsuid;
vfsgid_t vfsgid;
+ int err;
if (!mnt_may_suid(file->f_path.mnt))
return;
@@ -1708,12 +1709,17 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file)
/* Be careful if suid/sgid is set */
inode_lock(inode);
- /* reload atomically mode/uid/gid now that lock held */
+ /* Atomically reload and check mode/uid/gid now that lock held. */
mode = inode->i_mode;
vfsuid = i_uid_into_vfsuid(idmap, inode);
vfsgid = i_gid_into_vfsgid(idmap, inode);
+ err = inode_permission(idmap, inode, MAY_EXEC);
inode_unlock(inode);
+ /* Did the exec bit vanish out from under us? Give up. */
+ if (err)
+ return;
+
/* We ignore suid/sgid if there are no mappings for them in the ns */
if (!vfsuid_has_mapping(bprm->cred->user_ns, vfsuid) ||
!vfsgid_has_mapping(bprm->cred->user_ns, vfsgid))
diff --git a/fs/file.c b/fs/file.c
index a11e59b5d602..655338effe9c 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -46,27 +46,23 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
#define BITBIT_NR(nr) BITS_TO_LONGS(BITS_TO_LONGS(nr))
#define BITBIT_SIZE(nr) (BITBIT_NR(nr) * sizeof(long))
+#define fdt_words(fdt) ((fdt)->max_fds / BITS_PER_LONG) // words in ->open_fds
/*
* Copy 'count' fd bits from the old table to the new table and clear the extra
* space if any. This does not copy the file pointers. Called with the files
* spinlock held for write.
*/
-static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt,
- unsigned int count)
+static inline void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt,
+ unsigned int copy_words)
{
- unsigned int cpy, set;
-
- cpy = count / BITS_PER_BYTE;
- set = (nfdt->max_fds - count) / BITS_PER_BYTE;
- memcpy(nfdt->open_fds, ofdt->open_fds, cpy);
- memset((char *)nfdt->open_fds + cpy, 0, set);
- memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
- memset((char *)nfdt->close_on_exec + cpy, 0, set);
-
- cpy = BITBIT_SIZE(count);
- set = BITBIT_SIZE(nfdt->max_fds) - cpy;
- memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy);
- memset((char *)nfdt->full_fds_bits + cpy, 0, set);
+ unsigned int nwords = fdt_words(nfdt);
+
+ bitmap_copy_and_extend(nfdt->open_fds, ofdt->open_fds,
+ copy_words * BITS_PER_LONG, nwords * BITS_PER_LONG);
+ bitmap_copy_and_extend(nfdt->close_on_exec, ofdt->close_on_exec,
+ copy_words * BITS_PER_LONG, nwords * BITS_PER_LONG);
+ bitmap_copy_and_extend(nfdt->full_fds_bits, ofdt->full_fds_bits,
+ copy_words, nwords);
}
/*
@@ -84,7 +80,7 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
memcpy(nfdt->fd, ofdt->fd, cpy);
memset((char *)nfdt->fd + cpy, 0, set);
- copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds);
+ copy_fd_bitmaps(nfdt, ofdt, fdt_words(ofdt));
}
/*
@@ -379,7 +375,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
open_files = sane_fdtable_size(old_fdt, max_fds);
}
- copy_fd_bitmaps(new_fdt, old_fdt, open_files);
+ copy_fd_bitmaps(new_fdt, old_fdt, open_files / BITS_PER_LONG);
old_fds = old_fdt->fd;
new_fds = new_fdt->fd;
diff --git a/fs/inode.c b/fs/inode.c
index 86670941884b..10c4619faeef 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -488,6 +488,39 @@ static void inode_lru_list_del(struct inode *inode)
this_cpu_dec(nr_unused);
}
+static void inode_pin_lru_isolating(struct inode *inode)
+{
+ lockdep_assert_held(&inode->i_lock);
+ WARN_ON(inode->i_state & (I_LRU_ISOLATING | I_FREEING | I_WILL_FREE));
+ inode->i_state |= I_LRU_ISOLATING;
+}
+
+static void inode_unpin_lru_isolating(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ WARN_ON(!(inode->i_state & I_LRU_ISOLATING));
+ inode->i_state &= ~I_LRU_ISOLATING;
+ smp_mb();
+ wake_up_bit(&inode->i_state, __I_LRU_ISOLATING);
+ spin_unlock(&inode->i_lock);
+}
+
+static void inode_wait_for_lru_isolating(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ if (inode->i_state & I_LRU_ISOLATING) {
+ DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING);
+ wait_queue_head_t *wqh;
+
+ wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING);
+ spin_unlock(&inode->i_lock);
+ __wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE);
+ spin_lock(&inode->i_lock);
+ WARN_ON(inode->i_state & I_LRU_ISOLATING);
+ }
+ spin_unlock(&inode->i_lock);
+}
+
/**
* inode_sb_list_add - add inode to the superblock list of inodes
* @inode: inode to add
@@ -657,6 +690,8 @@ static void evict(struct inode *inode)
inode_sb_list_del(inode);
+ inode_wait_for_lru_isolating(inode);
+
/*
* Wait for flusher thread to be done with the inode so that filesystem
* does not start destroying it while writeback is still running. Since
@@ -855,7 +890,7 @@ static enum lru_status inode_lru_isolate(struct list_head *item,
* be under pressure before the cache inside the highmem zone.
*/
if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) {
- __iget(inode);
+ inode_pin_lru_isolating(inode);
spin_unlock(&inode->i_lock);
spin_unlock(lru_lock);
if (remove_inode_buffers(inode)) {
@@ -867,7 +902,7 @@ static enum lru_status inode_lru_isolate(struct list_head *item,
__count_vm_events(PGINODESTEAL, reap);
mm_account_reclaimed_pages(reap);
}
- iput(inode);
+ inode_unpin_lru_isolating(inode);
spin_lock(lru_lock);
return LRU_RETRY;
}
diff --git a/fs/libfs.c b/fs/libfs.c
index 8aa34870449f..02602d00939e 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -450,6 +450,14 @@ void simple_offset_destroy(struct offset_ctx *octx)
mtree_destroy(&octx->mt);
}
+static int offset_dir_open(struct inode *inode, struct file *file)
+{
+ struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode);
+
+ file->private_data = (void *)ctx->next_offset;
+ return 0;
+}
+
/**
* offset_dir_llseek - Advance the read position of a directory descriptor
* @file: an open directory whose position is to be updated
@@ -463,6 +471,9 @@ void simple_offset_destroy(struct offset_ctx *octx)
*/
static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
{
+ struct inode *inode = file->f_inode;
+ struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode);
+
switch (whence) {
case SEEK_CUR:
offset += file->f_pos;
@@ -476,7 +487,8 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
}
/* In this case, ->private_data is protected by f_pos_lock */
- file->private_data = NULL;
+ if (!offset)
+ file->private_data = (void *)ctx->next_offset;
return vfs_setpos(file, offset, LONG_MAX);
}
@@ -507,7 +519,7 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
inode->i_ino, fs_umode_to_dtype(inode->i_mode));
}
-static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
+static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx, long last_index)
{
struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
struct dentry *dentry;
@@ -515,17 +527,21 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
while (true) {
dentry = offset_find_next(octx, ctx->pos);
if (!dentry)
- return ERR_PTR(-ENOENT);
+ return;
+
+ if (dentry2offset(dentry) >= last_index) {
+ dput(dentry);
+ return;
+ }
if (!offset_dir_emit(ctx, dentry)) {
dput(dentry);
- break;
+ return;
}
ctx->pos = dentry2offset(dentry) + 1;
dput(dentry);
}
- return NULL;
}
/**
@@ -552,22 +568,19 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
static int offset_readdir(struct file *file, struct dir_context *ctx)
{
struct dentry *dir = file->f_path.dentry;
+ long last_index = (long)file->private_data;
lockdep_assert_held(&d_inode(dir)->i_rwsem);
if (!dir_emit_dots(file, ctx))
return 0;
- /* In this case, ->private_data is protected by f_pos_lock */
- if (ctx->pos == DIR_OFFSET_MIN)
- file->private_data = NULL;
- else if (file->private_data == ERR_PTR(-ENOENT))
- return 0;
- file->private_data = offset_iterate_dir(d_inode(dir), ctx);
+ offset_iterate_dir(d_inode(dir), ctx, last_index);
return 0;
}
const struct file_operations simple_offset_dir_operations = {
+ .open = offset_dir_open,
.llseek = offset_dir_llseek,
.iterate_shared = offset_readdir,
.read = generic_read_dir,
diff --git a/fs/locks.c b/fs/locks.c
index 9afb16e0683f..e45cad40f8b6 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2984,7 +2984,7 @@ static int __init filelock_init(void)
filelock_cache = kmem_cache_create("file_lock_cache",
sizeof(struct file_lock), 0, SLAB_PANIC, NULL);
- filelease_cache = kmem_cache_create("file_lock_cache",
+ filelease_cache = kmem_cache_create("file_lease_cache",
sizeof(struct file_lease), 0, SLAB_PANIC, NULL);
for_each_possible_cpu(i) {
diff --git a/fs/netfs/Kconfig b/fs/netfs/Kconfig
index 1b78e8b65ebc..7701c037c328 100644
--- a/fs/netfs/Kconfig
+++ b/fs/netfs/Kconfig
@@ -24,7 +24,7 @@ config NETFS_STATS
config NETFS_DEBUG
bool "Enable dynamic debugging netfslib and FS-Cache"
- depends on NETFS
+ depends on NETFS_SUPPORT
help
This permits debugging to be dynamically enabled in the local caching
management module. If this is set, the debugging output may be
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index a688d4c75d99..27c750d39476 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -10,6 +10,97 @@
#include "internal.h"
/*
+ * [DEPRECATED] Unlock the folios in a read operation for when the filesystem
+ * is using PG_private_2 and direct writing to the cache from here rather than
+ * marking the page for writeback.
+ *
+ * Note that we don't touch folio->private in this code.
+ */
+static void netfs_rreq_unlock_folios_pgpriv2(struct netfs_io_request *rreq,
+ size_t *account)
+{
+ struct netfs_io_subrequest *subreq;
+ struct folio *folio;
+ pgoff_t start_page = rreq->start / PAGE_SIZE;
+ pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
+ bool subreq_failed = false;
+
+ XA_STATE(xas, &rreq->mapping->i_pages, start_page);
+
+ /* Walk through the pagecache and the I/O request lists simultaneously.
+ * We may have a mixture of cached and uncached sections and we only
+ * really want to write out the uncached sections. This is slightly
+ * complicated by the possibility that we might have huge pages with a
+ * mixture inside.
+ */
+ subreq = list_first_entry(&rreq->subrequests,
+ struct netfs_io_subrequest, rreq_link);
+ subreq_failed = (subreq->error < 0);
+
+ trace_netfs_rreq(rreq, netfs_rreq_trace_unlock_pgpriv2);
+
+ rcu_read_lock();
+ xas_for_each(&xas, folio, last_page) {
+ loff_t pg_end;
+ bool pg_failed = false;
+ bool folio_started = false;
+
+ if (xas_retry(&xas, folio))
+ continue;
+
+ pg_end = folio_pos(folio) + folio_size(folio) - 1;
+
+ for (;;) {
+ loff_t sreq_end;
+
+ if (!subreq) {
+ pg_failed = true;
+ break;
+ }
+
+ if (!folio_started &&
+ test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags) &&
+ fscache_operation_valid(&rreq->cache_resources)) {
+ trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
+ folio_start_private_2(folio);
+ folio_started = true;
+ }
+
+ pg_failed |= subreq_failed;
+ sreq_end = subreq->start + subreq->len - 1;
+ if (pg_end < sreq_end)
+ break;
+
+ *account += subreq->transferred;
+ if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
+ subreq = list_next_entry(subreq, rreq_link);
+ subreq_failed = (subreq->error < 0);
+ } else {
+ subreq = NULL;
+ subreq_failed = false;
+ }
+
+ if (pg_end == sreq_end)
+ break;
+ }
+
+ if (!pg_failed) {
+ flush_dcache_folio(folio);
+ folio_mark_uptodate(folio);
+ }
+
+ if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
+ if (folio->index == rreq->no_unlock_folio &&
+ test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
+ _debug("no unlock");
+ else
+ folio_unlock(folio);
+ }
+ }
+ rcu_read_unlock();
+}
+
+/*
* Unlock the folios in a read operation. We need to set PG_writeback on any
* folios we're going to write back before we unlock them.
*
@@ -35,6 +126,12 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
}
}
+ /* Handle deprecated PG_private_2 case. */
+ if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) {
+ netfs_rreq_unlock_folios_pgpriv2(rreq, &account);
+ goto out;
+ }
+
/* Walk through the pagecache and the I/O request lists simultaneously.
* We may have a mixture of cached and uncached sections and we only
* really want to write out the uncached sections. This is slightly
@@ -52,7 +149,6 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
loff_t pg_end;
bool pg_failed = false;
bool wback_to_cache = false;
- bool folio_started = false;
if (xas_retry(&xas, folio))
continue;
@@ -66,17 +162,8 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
pg_failed = true;
break;
}
- if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) {
- if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE,
- &subreq->flags)) {
- trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
- folio_start_private_2(folio);
- folio_started = true;
- }
- } else {
- wback_to_cache |=
- test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
- }
+
+ wback_to_cache |= test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
pg_failed |= subreq_failed;
sreq_end = subreq->start + subreq->len - 1;
if (pg_end < sreq_end)
@@ -124,6 +211,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
}
rcu_read_unlock();
+out:
task_io_account_read(account);
if (rreq->netfs_ops->done)
rreq->netfs_ops->done(rreq);
@@ -395,7 +483,7 @@ zero_out:
}
/**
- * netfs_write_begin - Helper to prepare for writing
+ * netfs_write_begin - Helper to prepare for writing [DEPRECATED]
* @ctx: The netfs context
* @file: The file to read from
* @mapping: The mapping to read from
@@ -426,6 +514,9 @@ zero_out:
* inode before calling this.
*
* This is usable whether or not caching is enabled.
+ *
+ * Note that this should be considered deprecated and netfs_perform_write()
+ * used instead.
*/
int netfs_write_begin(struct netfs_inode *ctx,
struct file *file, struct address_space *mapping,
@@ -466,7 +557,7 @@ retry:
if (!netfs_is_cache_enabled(ctx) &&
netfs_skip_folio_read(folio, pos, len, false)) {
netfs_stat(&netfs_n_rh_write_zskip);
- goto have_folio;
+ goto have_folio_no_wait;
}
rreq = netfs_alloc_request(mapping, file,
@@ -507,6 +598,10 @@ retry:
netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
have_folio:
+ ret = folio_wait_private_2_killable(folio);
+ if (ret < 0)
+ goto error;
+have_folio_no_wait:
*_folio = folio;
_leave(" = 0");
return 0;
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index 4726c315453c..ca53c5d1622e 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -184,7 +184,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0;
ssize_t written = 0, ret, ret2;
loff_t i_size, pos = iocb->ki_pos, from, to;
- size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
+ size_t max_chunk = mapping_max_folio_size(mapping);
bool maybe_trouble = false;
if (unlikely(test_bit(NETFS_ICTX_WRITETHROUGH, &ctx->flags) ||
diff --git a/fs/netfs/fscache_cookie.c b/fs/netfs/fscache_cookie.c
index bce2492186d0..d4d4b3a8b106 100644
--- a/fs/netfs/fscache_cookie.c
+++ b/fs/netfs/fscache_cookie.c
@@ -741,6 +741,10 @@ again_locked:
spin_lock(&cookie->lock);
}
if (test_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags)) {
+ if (atomic_read(&cookie->n_accesses) != 0)
+ /* still being accessed: postpone it */
+ break;
+
__fscache_set_cookie_state(cookie,
FSCACHE_COOKIE_STATE_LRU_DISCARDING);
wake = true;
diff --git a/fs/netfs/io.c b/fs/netfs/io.c
index c93851b98368..5367caf3fa28 100644
--- a/fs/netfs/io.c
+++ b/fs/netfs/io.c
@@ -99,6 +99,146 @@ static void netfs_rreq_completed(struct netfs_io_request *rreq, bool was_async)
}
/*
+ * [DEPRECATED] Deal with the completion of writing the data to the cache. We
+ * have to clear the PG_fscache bits on the folios involved and release the
+ * caller's ref.
+ *
+ * May be called in softirq mode and we inherit a ref from the caller.
+ */
+static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq,
+ bool was_async)
+{
+ struct netfs_io_subrequest *subreq;
+ struct folio *folio;
+ pgoff_t unlocked = 0;
+ bool have_unlocked = false;
+
+ rcu_read_lock();
+
+ list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
+ XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE);
+
+ xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) {
+ if (xas_retry(&xas, folio))
+ continue;
+
+ /* We might have multiple writes from the same huge
+ * folio, but we mustn't unlock a folio more than once.
+ */
+ if (have_unlocked && folio->index <= unlocked)
+ continue;
+ unlocked = folio_next_index(folio) - 1;
+ trace_netfs_folio(folio, netfs_folio_trace_end_copy);
+ folio_end_private_2(folio);
+ have_unlocked = true;
+ }
+ }
+
+ rcu_read_unlock();
+ netfs_rreq_completed(rreq, was_async);
+}
+
+static void netfs_rreq_copy_terminated(void *priv, ssize_t transferred_or_error,
+ bool was_async) /* [DEPRECATED] */
+{
+ struct netfs_io_subrequest *subreq = priv;
+ struct netfs_io_request *rreq = subreq->rreq;
+
+ if (IS_ERR_VALUE(transferred_or_error)) {
+ netfs_stat(&netfs_n_rh_write_failed);
+ trace_netfs_failure(rreq, subreq, transferred_or_error,
+ netfs_fail_copy_to_cache);
+ } else {
+ netfs_stat(&netfs_n_rh_write_done);
+ }
+
+ trace_netfs_sreq(subreq, netfs_sreq_trace_write_term);
+
+ /* If we decrement nr_copy_ops to 0, the ref belongs to us. */
+ if (atomic_dec_and_test(&rreq->nr_copy_ops))
+ netfs_rreq_unmark_after_write(rreq, was_async);
+
+ netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
+}
+
+/*
+ * [DEPRECATED] Perform any outstanding writes to the cache. We inherit a ref
+ * from the caller.
+ */
+static void netfs_rreq_do_write_to_cache(struct netfs_io_request *rreq)
+{
+ struct netfs_cache_resources *cres = &rreq->cache_resources;
+ struct netfs_io_subrequest *subreq, *next, *p;
+ struct iov_iter iter;
+ int ret;
+
+ trace_netfs_rreq(rreq, netfs_rreq_trace_copy);
+
+ /* We don't want terminating writes trying to wake us up whilst we're
+ * still going through the list.
+ */
+ atomic_inc(&rreq->nr_copy_ops);
+
+ list_for_each_entry_safe(subreq, p, &rreq->subrequests, rreq_link) {
+ if (!test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) {
+ list_del_init(&subreq->rreq_link);
+ netfs_put_subrequest(subreq, false,
+ netfs_sreq_trace_put_no_copy);
+ }
+ }
+
+ list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
+ /* Amalgamate adjacent writes */
+ while (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
+ next = list_next_entry(subreq, rreq_link);
+ if (next->start != subreq->start + subreq->len)
+ break;
+ subreq->len += next->len;
+ list_del_init(&next->rreq_link);
+ netfs_put_subrequest(next, false,
+ netfs_sreq_trace_put_merged);
+ }
+
+ ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len,
+ subreq->len, rreq->i_size, true);
+ if (ret < 0) {
+ trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip);
+ continue;
+ }
+
+ iov_iter_xarray(&iter, ITER_SOURCE, &rreq->mapping->i_pages,
+ subreq->start, subreq->len);
+
+ atomic_inc(&rreq->nr_copy_ops);
+ netfs_stat(&netfs_n_rh_write);
+ netfs_get_subrequest(subreq, netfs_sreq_trace_get_copy_to_cache);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_write);
+ cres->ops->write(cres, subreq->start, &iter,
+ netfs_rreq_copy_terminated, subreq);
+ }
+
+ /* If we decrement nr_copy_ops to 0, the usage ref belongs to us. */
+ if (atomic_dec_and_test(&rreq->nr_copy_ops))
+ netfs_rreq_unmark_after_write(rreq, false);
+}
+
+static void netfs_rreq_write_to_cache_work(struct work_struct *work) /* [DEPRECATED] */
+{
+ struct netfs_io_request *rreq =
+ container_of(work, struct netfs_io_request, work);
+
+ netfs_rreq_do_write_to_cache(rreq);
+}
+
+static void netfs_rreq_write_to_cache(struct netfs_io_request *rreq) /* [DEPRECATED] */
+{
+ rreq->work.func = netfs_rreq_write_to_cache_work;
+ if (!queue_work(system_unbound_wq, &rreq->work))
+ BUG();
+}
+
+/*
* Handle a short read.
*/
static void netfs_rreq_short_read(struct netfs_io_request *rreq,
@@ -275,6 +415,10 @@ again:
clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS);
+ if (test_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags) &&
+ test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags))
+ return netfs_rreq_write_to_cache(rreq);
+
netfs_rreq_completed(rreq, was_async);
}
@@ -386,7 +530,8 @@ incomplete:
if (transferred_or_error == 0) {
if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) {
- subreq->error = -ENODATA;
+ if (rreq->origin != NETFS_DIO_READ)
+ subreq->error = -ENODATA;
goto failed;
}
} else {
@@ -457,9 +602,14 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq,
}
if (subreq->len > ictx->zero_point - subreq->start)
subreq->len = ictx->zero_point - subreq->start;
+
+ /* We limit buffered reads to the EOF, but let the
+ * server deal with larger-than-EOF DIO/unbuffered
+ * reads.
+ */
+ if (subreq->len > rreq->i_size - subreq->start)
+ subreq->len = rreq->i_size - subreq->start;
}
- if (subreq->len > rreq->i_size - subreq->start)
- subreq->len = rreq->i_size - subreq->start;
if (rreq->rsize && subreq->len > rreq->rsize)
subreq->len = rreq->rsize;
@@ -595,11 +745,10 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
do {
_debug("submit %llx + %llx >= %llx",
rreq->start, rreq->submitted, rreq->i_size);
- if (rreq->origin == NETFS_DIO_READ &&
- rreq->start + rreq->submitted >= rreq->i_size)
- break;
if (!netfs_rreq_submit_slice(rreq, &io_iter))
break;
+ if (test_bit(NETFS_SREQ_NO_PROGRESS, &rreq->flags))
+ break;
if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) &&
test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags))
break;
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index f4a642727479..0294df70c3ff 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -24,10 +24,6 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
struct netfs_io_request *rreq;
mempool_t *mempool = ctx->ops->request_pool ?: &netfs_request_pool;
struct kmem_cache *cache = mempool->pool_data;
- bool is_unbuffered = (origin == NETFS_UNBUFFERED_WRITE ||
- origin == NETFS_DIO_READ ||
- origin == NETFS_DIO_WRITE);
- bool cached = !is_unbuffered && netfs_is_cache_enabled(ctx);
int ret;
for (;;) {
@@ -56,12 +52,6 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
refcount_set(&rreq->ref, 1);
__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
- if (cached) {
- __set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags);
- if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags))
- /* Filesystem uses deprecated PG_private_2 marking. */
- __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags);
- }
if (file && file->f_flags & O_NONBLOCK)
__set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags);
if (rreq->netfs_ops->init_request) {
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 9258d30cffe3..3f7e37e50c7d 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -94,6 +94,8 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
{
struct netfs_io_request *wreq;
struct netfs_inode *ictx;
+ bool is_buffered = (origin == NETFS_WRITEBACK ||
+ origin == NETFS_WRITETHROUGH);
wreq = netfs_alloc_request(mapping, file, start, 0, origin);
if (IS_ERR(wreq))
@@ -102,7 +104,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
_enter("R=%x", wreq->debug_id);
ictx = netfs_inode(wreq->inode);
- if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags))
+ if (is_buffered && netfs_is_cache_enabled(ictx))
fscache_begin_write_operation(&wreq->cache_resources, netfs_i_cookie(ictx));
wreq->contiguity = wreq->start;
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 7202ce84d0eb..7a558dea75c4 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -265,6 +265,8 @@ static int nfs_netfs_init_request(struct netfs_io_request *rreq, struct file *fi
{
rreq->netfs_priv = get_nfs_open_context(nfs_file_open_context(file));
rreq->debug_id = atomic_inc_return(&nfs_netfs_debug_id);
+ /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
+ __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags);
return 0;
}
@@ -361,7 +363,8 @@ void nfs_netfs_read_completion(struct nfs_pgio_header *hdr)
return;
sreq = netfs->sreq;
- if (test_bit(NFS_IOHDR_EOF, &hdr->flags))
+ if (test_bit(NFS_IOHDR_EOF, &hdr->flags) &&
+ sreq->rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &sreq->flags);
if (hdr->error)
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index fbed0027996f..e8adae1bc260 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -81,8 +81,6 @@ static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs)
static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi)
{
netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops, false);
- /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
- __set_bit(NETFS_ICTX_USE_PGPRIV2, &nfsi->netfs.flags);
}
extern void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr);
extern void nfs_netfs_read_completion(struct nfs_pgio_header *hdr);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 9e0ea6fc2aa3..34eb2c2cbcde 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -2069,8 +2069,7 @@ int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info)
continue;
}
- ret = svc_xprt_create_from_sa(serv, xcl_name, net, sa,
- SVC_SOCK_ANONYMOUS,
+ ret = svc_xprt_create_from_sa(serv, xcl_name, net, sa, 0,
get_current_cred());
/* always save the latest error */
if (ret < 0)
diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c
index c71ae5c04306..4a20e92474b2 100644
--- a/fs/smb/client/cifs_debug.c
+++ b/fs/smb/client/cifs_debug.c
@@ -1072,7 +1072,7 @@ static int cifs_security_flags_proc_open(struct inode *inode, struct file *file)
static void
cifs_security_flags_handle_must_flags(unsigned int *flags)
{
- unsigned int signflags = *flags & CIFSSEC_MUST_SIGN;
+ unsigned int signflags = *flags & (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL);
if ((*flags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5)
*flags = CIFSSEC_MUST_KRB5;
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index f6d1f075987f..5c9b3e6cd95f 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -345,7 +345,7 @@ struct smb_version_operations {
/* connect to a server share */
int (*tree_connect)(const unsigned int, struct cifs_ses *, const char *,
struct cifs_tcon *, const struct nls_table *);
- /* close tree connecion */
+ /* close tree connection */
int (*tree_disconnect)(const unsigned int, struct cifs_tcon *);
/* get DFS referrals */
int (*get_dfs_refer)(const unsigned int, struct cifs_ses *,
@@ -816,7 +816,7 @@ struct TCP_Server_Info {
* Protected by @refpath_lock and @srv_lock. The @refpath_lock is
* mostly used for not requiring a copy of @leaf_fullpath when getting
* cached or new DFS referrals (which might also sleep during I/O).
- * While @srv_lock is held for making string and NULL comparions against
+ * While @srv_lock is held for making string and NULL comparisons against
* both fields as in mount(2) and cache refresh.
*
* format: \\HOST\SHARE[\OPTIONAL PATH]
@@ -1881,7 +1881,7 @@ static inline bool is_replayable_error(int error)
#define CIFSSEC_MAY_SIGN 0x00001
#define CIFSSEC_MAY_NTLMV2 0x00004
#define CIFSSEC_MAY_KRB5 0x00008
-#define CIFSSEC_MAY_SEAL 0x00040 /* not supported yet */
+#define CIFSSEC_MAY_SEAL 0x00040
#define CIFSSEC_MAY_NTLMSSP 0x00080 /* raw ntlmssp with ntlmv2 */
#define CIFSSEC_MUST_SIGN 0x01001
@@ -1891,11 +1891,11 @@ require use of the stronger protocol */
#define CIFSSEC_MUST_NTLMV2 0x04004
#define CIFSSEC_MUST_KRB5 0x08008
#ifdef CONFIG_CIFS_UPCALL
-#define CIFSSEC_MASK 0x8F08F /* flags supported if no weak allowed */
+#define CIFSSEC_MASK 0xCF0CF /* flags supported if no weak allowed */
#else
-#define CIFSSEC_MASK 0x87087 /* flags supported if no weak allowed */
+#define CIFSSEC_MASK 0xC70C7 /* flags supported if no weak allowed */
#endif /* UPCALL */
-#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */
+#define CIFSSEC_MUST_SEAL 0x40040
#define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */
#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP | CIFSSEC_MAY_SEAL)
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index b2405dd4d4d4..3f3842e7b44a 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -217,7 +217,8 @@ static void cifs_req_issue_read(struct netfs_io_subrequest *subreq)
goto out;
}
- __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+ if (subreq->rreq->origin != NETFS_DIO_READ)
+ __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
rc = rdata->server->ops->async_readv(rdata);
out:
diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c
index b28ff62f1f15..c6f11e6f9eb9 100644
--- a/fs/smb/client/misc.c
+++ b/fs/smb/client/misc.c
@@ -352,7 +352,7 @@ checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server)
* on simple responses (wct, bcc both zero)
* in particular have seen this on
* ulogoffX and FindClose. This leaves
- * one byte of bcc potentially unitialized
+ * one byte of bcc potentially uninitialized
*/
/* zero rest of bcc */
tmp[sizeof(struct smb_hdr)+1] = 0;
@@ -1234,6 +1234,7 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid,
const char *full_path,
bool *islink)
{
+ struct TCP_Server_Info *server = tcon->ses->server;
struct cifs_ses *ses = tcon->ses;
size_t len;
char *path;
@@ -1250,12 +1251,12 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid,
!is_tcon_dfs(tcon))
return 0;
- spin_lock(&tcon->tc_lock);
- if (!tcon->origin_fullpath) {
- spin_unlock(&tcon->tc_lock);
+ spin_lock(&server->srv_lock);
+ if (!server->leaf_fullpath) {
+ spin_unlock(&server->srv_lock);
return 0;
}
- spin_unlock(&tcon->tc_lock);
+ spin_unlock(&server->srv_lock);
/*
* Slow path - tcon is DFS and @full_path has prefix path, so attempt
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 9a06b5594669..83facb54276a 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -82,6 +82,9 @@ int smb3_encryption_required(const struct cifs_tcon *tcon)
if (tcon->seal &&
(tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION))
return 1;
+ if (((global_secflags & CIFSSEC_MUST_SEAL) == CIFSSEC_MUST_SEAL) &&
+ (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION))
+ return 1;
return 0;
}
diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c
index d74e829de51c..7bcc379014ca 100644
--- a/fs/smb/client/smbdirect.c
+++ b/fs/smb/client/smbdirect.c
@@ -406,7 +406,7 @@ static void smbd_post_send_credits(struct work_struct *work)
else
response = get_empty_queue_buffer(info);
if (!response) {
- /* now switch to emtpy packet queue */
+ /* now switch to empty packet queue */
if (use_receive_queue) {
use_receive_queue = 0;
continue;
@@ -618,7 +618,7 @@ out:
/*
* Test if FRWR (Fast Registration Work Requests) is supported on the device
- * This implementation requries FRWR on RDMA read/write
+ * This implementation requires FRWR on RDMA read/write
* return value: true if it is supported
*/
static bool frwr_is_supported(struct ib_device_attr *attrs)
@@ -2177,7 +2177,7 @@ cleanup_entries:
* MR available in the list. It may access the list while the
* smbd_mr_recovery_work is recovering the MR list. This doesn't need a lock
* as they never modify the same places. However, there may be several CPUs
- * issueing I/O trying to get MR at the same time, mr_list_lock is used to
+ * issuing I/O trying to get MR at the same time, mr_list_lock is used to
* protect this situation.
*/
static struct smbd_mr *get_mr(struct smbd_connection *info)
@@ -2311,7 +2311,7 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info,
/*
* There is no need for waiting for complemtion on ib_post_send
* on IB_WR_REG_MR. Hardware enforces a barrier and order of execution
- * on the next ib_post_send when we actaully send I/O to remote peer
+ * on the next ib_post_send when we actually send I/O to remote peer
*/
rc = ib_post_send(info->id->qp, &reg_wr->wr, NULL);
if (!rc)
diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c
index adfe0d058701..6e68aaf5bd20 100644
--- a/fs/smb/client/transport.c
+++ b/fs/smb/client/transport.c
@@ -1289,7 +1289,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
out:
/*
* This will dequeue all mids. After this it is important that the
- * demultiplex_thread will not process any of these mids any futher.
+ * demultiplex_thread will not process any of these mids any further.
* This is prevented above by using a noop callback that will not
* wake this thread except for the very last PDU.
*/
diff --git a/fs/smb/server/mgmt/share_config.c b/fs/smb/server/mgmt/share_config.c
index e0a6b758094f..d8d03070ae44 100644
--- a/fs/smb/server/mgmt/share_config.c
+++ b/fs/smb/server/mgmt/share_config.c
@@ -15,6 +15,7 @@
#include "share_config.h"
#include "user_config.h"
#include "user_session.h"
+#include "../connection.h"
#include "../transport_ipc.h"
#include "../misc.h"
@@ -120,12 +121,13 @@ static int parse_veto_list(struct ksmbd_share_config *share,
return 0;
}
-static struct ksmbd_share_config *share_config_request(struct unicode_map *um,
+static struct ksmbd_share_config *share_config_request(struct ksmbd_work *work,
const char *name)
{
struct ksmbd_share_config_response *resp;
struct ksmbd_share_config *share = NULL;
struct ksmbd_share_config *lookup;
+ struct unicode_map *um = work->conn->um;
int ret;
resp = ksmbd_ipc_share_config_request(name);
@@ -181,7 +183,14 @@ static struct ksmbd_share_config *share_config_request(struct unicode_map *um,
KSMBD_SHARE_CONFIG_VETO_LIST(resp),
resp->veto_list_sz);
if (!ret && share->path) {
+ if (__ksmbd_override_fsids(work, share)) {
+ kill_share(share);
+ share = NULL;
+ goto out;
+ }
+
ret = kern_path(share->path, 0, &share->vfs_path);
+ ksmbd_revert_fsids(work);
if (ret) {
ksmbd_debug(SMB, "failed to access '%s'\n",
share->path);
@@ -214,7 +223,7 @@ out:
return share;
}
-struct ksmbd_share_config *ksmbd_share_config_get(struct unicode_map *um,
+struct ksmbd_share_config *ksmbd_share_config_get(struct ksmbd_work *work,
const char *name)
{
struct ksmbd_share_config *share;
@@ -227,7 +236,7 @@ struct ksmbd_share_config *ksmbd_share_config_get(struct unicode_map *um,
if (share)
return share;
- return share_config_request(um, name);
+ return share_config_request(work, name);
}
bool ksmbd_share_veto_filename(struct ksmbd_share_config *share,
diff --git a/fs/smb/server/mgmt/share_config.h b/fs/smb/server/mgmt/share_config.h
index 5f591751b923..d4ac2dd4de20 100644
--- a/fs/smb/server/mgmt/share_config.h
+++ b/fs/smb/server/mgmt/share_config.h
@@ -11,6 +11,8 @@
#include <linux/path.h>
#include <linux/unicode.h>
+struct ksmbd_work;
+
struct ksmbd_share_config {
char *name;
char *path;
@@ -68,7 +70,7 @@ static inline void ksmbd_share_config_put(struct ksmbd_share_config *share)
__ksmbd_share_config_put(share);
}
-struct ksmbd_share_config *ksmbd_share_config_get(struct unicode_map *um,
+struct ksmbd_share_config *ksmbd_share_config_get(struct ksmbd_work *work,
const char *name);
bool ksmbd_share_veto_filename(struct ksmbd_share_config *share,
const char *filename);
diff --git a/fs/smb/server/mgmt/tree_connect.c b/fs/smb/server/mgmt/tree_connect.c
index d2c81a8a11dd..94a52a75014a 100644
--- a/fs/smb/server/mgmt/tree_connect.c
+++ b/fs/smb/server/mgmt/tree_connect.c
@@ -16,17 +16,18 @@
#include "user_session.h"
struct ksmbd_tree_conn_status
-ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess,
- const char *share_name)
+ksmbd_tree_conn_connect(struct ksmbd_work *work, const char *share_name)
{
struct ksmbd_tree_conn_status status = {-ENOENT, NULL};
struct ksmbd_tree_connect_response *resp = NULL;
struct ksmbd_share_config *sc;
struct ksmbd_tree_connect *tree_conn = NULL;
struct sockaddr *peer_addr;
+ struct ksmbd_conn *conn = work->conn;
+ struct ksmbd_session *sess = work->sess;
int ret;
- sc = ksmbd_share_config_get(conn->um, share_name);
+ sc = ksmbd_share_config_get(work, share_name);
if (!sc)
return status;
@@ -61,7 +62,7 @@ ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess,
struct ksmbd_share_config *new_sc;
ksmbd_share_config_del(sc);
- new_sc = ksmbd_share_config_get(conn->um, share_name);
+ new_sc = ksmbd_share_config_get(work, share_name);
if (!new_sc) {
pr_err("Failed to update stale share config\n");
status.ret = -ESTALE;
diff --git a/fs/smb/server/mgmt/tree_connect.h b/fs/smb/server/mgmt/tree_connect.h
index 6377a70b811c..a42cdd051041 100644
--- a/fs/smb/server/mgmt/tree_connect.h
+++ b/fs/smb/server/mgmt/tree_connect.h
@@ -13,6 +13,7 @@
struct ksmbd_share_config;
struct ksmbd_user;
struct ksmbd_conn;
+struct ksmbd_work;
enum {
TREE_NEW = 0,
@@ -50,8 +51,7 @@ static inline int test_tree_conn_flag(struct ksmbd_tree_connect *tree_conn,
struct ksmbd_session;
struct ksmbd_tree_conn_status
-ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess,
- const char *share_name);
+ksmbd_tree_conn_connect(struct ksmbd_work *work, const char *share_name);
void ksmbd_tree_connect_put(struct ksmbd_tree_connect *tcon);
int ksmbd_tree_conn_disconnect(struct ksmbd_session *sess,
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 37a39ab4ee65..2df1354288e6 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -1955,7 +1955,7 @@ int smb2_tree_connect(struct ksmbd_work *work)
ksmbd_debug(SMB, "tree connect request for tree %s treename %s\n",
name, treename);
- status = ksmbd_tree_conn_connect(conn, sess, name);
+ status = ksmbd_tree_conn_connect(work, name);
if (status.ret == KSMBD_TREE_CONN_STATUS_OK)
rsp->hdr.Id.SyncId.TreeId = cpu_to_le32(status.tree_conn->id);
else
@@ -5596,6 +5596,11 @@ int smb2_query_info(struct ksmbd_work *work)
ksmbd_debug(SMB, "GOT query info request\n");
+ if (ksmbd_override_fsids(work)) {
+ rc = -ENOMEM;
+ goto err_out;
+ }
+
switch (req->InfoType) {
case SMB2_O_INFO_FILE:
ksmbd_debug(SMB, "GOT SMB2_O_INFO_FILE\n");
@@ -5614,6 +5619,7 @@ int smb2_query_info(struct ksmbd_work *work)
req->InfoType);
rc = -EOPNOTSUPP;
}
+ ksmbd_revert_fsids(work);
if (!rc) {
rsp->StructureSize = cpu_to_le16(9);
@@ -5623,6 +5629,7 @@ int smb2_query_info(struct ksmbd_work *work)
le32_to_cpu(rsp->OutputBufferLength));
}
+err_out:
if (rc < 0) {
if (rc == -EACCES)
rsp->hdr.Status = STATUS_ACCESS_DENIED;
diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c
index 474dadf6b7b8..13818ecb6e1b 100644
--- a/fs/smb/server/smb_common.c
+++ b/fs/smb/server/smb_common.c
@@ -732,10 +732,10 @@ bool is_asterisk(char *p)
return p && p[0] == '*';
}
-int ksmbd_override_fsids(struct ksmbd_work *work)
+int __ksmbd_override_fsids(struct ksmbd_work *work,
+ struct ksmbd_share_config *share)
{
struct ksmbd_session *sess = work->sess;
- struct ksmbd_share_config *share = work->tcon->share_conf;
struct cred *cred;
struct group_info *gi;
unsigned int uid;
@@ -775,6 +775,11 @@ int ksmbd_override_fsids(struct ksmbd_work *work)
return 0;
}
+int ksmbd_override_fsids(struct ksmbd_work *work)
+{
+ return __ksmbd_override_fsids(work, work->tcon->share_conf);
+}
+
void ksmbd_revert_fsids(struct ksmbd_work *work)
{
const struct cred *cred;
diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h
index f1092519c0c2..4a3148b0167f 100644
--- a/fs/smb/server/smb_common.h
+++ b/fs/smb/server/smb_common.h
@@ -447,6 +447,8 @@ int ksmbd_extract_shortname(struct ksmbd_conn *conn,
int ksmbd_smb_negotiate_common(struct ksmbd_work *work, unsigned int command);
int ksmbd_smb_check_shared_mode(struct file *filp, struct ksmbd_file *curr_fp);
+int __ksmbd_override_fsids(struct ksmbd_work *work,
+ struct ksmbd_share_config *share);
int ksmbd_override_fsids(struct ksmbd_work *work);
void ksmbd_revert_fsids(struct ksmbd_work *work);
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index 16bd693d0b3a..d5918eba27e3 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -279,8 +279,13 @@ int squashfs_read_inode(struct inode *inode, long long ino)
if (err < 0)
goto failed_read;
- set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
inode->i_size = le32_to_cpu(sqsh_ino->symlink_size);
+ if (inode->i_size > PAGE_SIZE) {
+ ERROR("Corrupted symlink\n");
+ return -EINVAL;
+ }
+
+ set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
inode->i_op = &squashfs_symlink_inode_ops;
inode_nohighmem(inode);
inode->i_data.a_ops = &squashfs_symlink_aops;