diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/afs/dir.c | 1 | ||||
-rw-r--r-- | fs/afs/file.c | 1 | ||||
-rw-r--r-- | fs/afs/fs_operation.c | 7 | ||||
-rw-r--r-- | fs/afs/inode.c | 1 | ||||
-rw-r--r-- | fs/afs/internal.h | 1 | ||||
-rw-r--r-- | fs/afs/mntpt.c | 1 | ||||
-rw-r--r-- | fs/afs/xattr.c | 31 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 2 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 23 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 33 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 37 | ||||
-rw-r--r-- | fs/btrfs/reada.c | 35 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 8 | ||||
-rw-r--r-- | fs/fuse/dev.c | 26 | ||||
-rw-r--r-- | fs/fuse/fuse_i.h | 1 | ||||
-rw-r--r-- | fs/fuse/virtio_fs.c | 9 | ||||
-rw-r--r-- | fs/iomap/swapfile.c | 10 | ||||
-rw-r--r-- | fs/locks.c | 3 | ||||
-rw-r--r-- | fs/nfsd/Kconfig | 1 | ||||
-rw-r--r-- | fs/nfsd/filecache.c | 2 | ||||
-rw-r--r-- | fs/nfsd/nfs4callback.c | 1 | ||||
-rw-r--r-- | fs/nfsd/nfs4proc.c | 2 | ||||
-rw-r--r-- | fs/nfsd/nfs4state.c | 55 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 14 | ||||
-rw-r--r-- | fs/xfs/xfs_itable.c | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.c | 90 | ||||
-rw-r--r-- | fs/xfs/xfs_symlink.c | 3 |
27 files changed, 230 insertions, 174 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 714fcca9af99..17548c1faf02 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -70,7 +70,6 @@ const struct inode_operations afs_dir_inode_operations = { .permission = afs_permission, .getattr = afs_getattr, .setattr = afs_setattr, - .listxattr = afs_listxattr, }; const struct address_space_operations afs_dir_aops = { diff --git a/fs/afs/file.c b/fs/afs/file.c index 85f5adf21aa0..960b64268623 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -43,7 +43,6 @@ const struct inode_operations afs_file_inode_operations = { .getattr = afs_getattr, .setattr = afs_setattr, .permission = afs_permission, - .listxattr = afs_listxattr, }; const struct address_space_operations afs_fs_aops = { diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c index 97cab12b0a6c..71c58723763d 100644 --- a/fs/afs/fs_operation.c +++ b/fs/afs/fs_operation.c @@ -181,10 +181,13 @@ void afs_wait_for_operation(struct afs_operation *op) if (test_bit(AFS_SERVER_FL_IS_YFS, &op->server->flags) && op->ops->issue_yfs_rpc) op->ops->issue_yfs_rpc(op); - else + else if (op->ops->issue_afs_rpc) op->ops->issue_afs_rpc(op); + else + op->ac.error = -ENOTSUPP; - op->error = afs_wait_for_call_to_complete(op->call, &op->ac); + if (op->call) + op->error = afs_wait_for_call_to_complete(op->call, &op->ac); } switch (op->error) { diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 1156b2df28d3..12be88716e4c 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -27,7 +27,6 @@ static const struct inode_operations afs_symlink_inode_operations = { .get_link = page_get_link, - .listxattr = afs_listxattr, }; static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *parent_vnode) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index b626e38e9ab5..1627b1872812 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -1509,7 +1509,6 @@ extern int afs_launder_page(struct page *); * xattr.c */ extern const struct xattr_handler *afs_xattr_handlers[]; -extern ssize_t afs_listxattr(struct dentry *, char *, size_t); /* * yfsclient.c diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 052dab2f5c03..bbb2c210d139 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -32,7 +32,6 @@ const struct inode_operations afs_mntpt_inode_operations = { .lookup = afs_mntpt_lookup, .readlink = page_readlink, .getattr = afs_getattr, - .listxattr = afs_listxattr, }; const struct inode_operations afs_autocell_inode_operations = { diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c index c629caae5002..7751b0b3f81d 100644 --- a/fs/afs/xattr.c +++ b/fs/afs/xattr.c @@ -11,29 +11,6 @@ #include <linux/xattr.h> #include "internal.h" -static const char afs_xattr_list[] = - "afs.acl\0" - "afs.cell\0" - "afs.fid\0" - "afs.volume\0" - "afs.yfs.acl\0" - "afs.yfs.acl_inherited\0" - "afs.yfs.acl_num_cleaned\0" - "afs.yfs.vol_acl"; - -/* - * Retrieve a list of the supported xattrs. - */ -ssize_t afs_listxattr(struct dentry *dentry, char *buffer, size_t size) -{ - if (size == 0) - return sizeof(afs_xattr_list); - if (size < sizeof(afs_xattr_list)) - return -ERANGE; - memcpy(buffer, afs_xattr_list, sizeof(afs_xattr_list)); - return sizeof(afs_xattr_list); -} - /* * Deal with the result of a successful fetch ACL operation. */ @@ -231,6 +208,8 @@ static int afs_xattr_get_yfs(const struct xattr_handler *handler, else ret = -ERANGE; } + } else if (ret == -ENOTSUPP) { + ret = -ENODATA; } error_yacl: @@ -256,6 +235,7 @@ static int afs_xattr_set_yfs(const struct xattr_handler *handler, { struct afs_operation *op; struct afs_vnode *vnode = AFS_FS_I(inode); + int ret; if (flags == XATTR_CREATE || strcmp(name, "acl") != 0) @@ -270,7 +250,10 @@ static int afs_xattr_set_yfs(const struct xattr_handler *handler, return afs_put_operation(op); op->ops = &yfs_store_opaque_acl2_operation; - return afs_do_sync_operation(op); + ret = afs_do_sync_operation(op); + if (ret == -ENOTSUPP) + ret = -ENODATA; + return ret; } static const struct xattr_handler afs_xattr_yfs_handler = { diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index d56730a67885..34b929bd5c1a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1365,7 +1365,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq) "failed to read tree block %llu from get_old_root", logical); } else { + btrfs_tree_read_lock(old); eb = btrfs_clone_extent_buffer(old); + btrfs_tree_read_unlock(old); free_extent_buffer(old); } } else if (old_root) { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 78ad31a59e59..36a3c973fda1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3323,6 +3323,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, if (last_ref && btrfs_header_generation(buf) == trans->transid) { struct btrfs_block_group *cache; + bool must_pin = false; if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { ret = check_ref_cleanup(trans, buf->start); @@ -3340,7 +3341,27 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, goto out; } - if (btrfs_is_zoned(fs_info)) { + /* + * If this is a leaf and there are tree mod log users, we may + * have recorded mod log operations that point to this leaf. + * So we must make sure no one reuses this leaf's extent before + * mod log operations are applied to a node, otherwise after + * rewinding a node using the mod log operations we get an + * inconsistent btree, as the leaf's extent may now be used as + * a node or leaf for another different btree. + * We are safe from races here because at this point no other + * node or root points to this extent buffer, so if after this + * check a new tree mod log user joins, it will not be able to + * find a node pointing to this leaf and record operations that + * point to this leaf. + */ + if (btrfs_header_level(buf) == 0) { + read_lock(&fs_info->tree_mod_log_lock); + must_pin = !list_empty(&fs_info->tree_mod_seq_list); + read_unlock(&fs_info->tree_mod_log_lock); + } + + if (must_pin || btrfs_is_zoned(fs_info)) { btrfs_redirty_list_add(trans->transaction, buf); pin_down_extent(trans, cache, buf->start, buf->len, 1); btrfs_put_block_group(cache); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 191e358f1322..910769d5fcdb 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2886,6 +2886,35 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len) } /* + * Find extent buffer for a givne bytenr. + * + * This is for end_bio_extent_readpage(), thus we can't do any unsafe locking + * in endio context. + */ +static struct extent_buffer *find_extent_buffer_readpage( + struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr) +{ + struct extent_buffer *eb; + + /* + * For regular sectorsize, we can use page->private to grab extent + * buffer + */ + if (fs_info->sectorsize == PAGE_SIZE) { + ASSERT(PagePrivate(page) && page->private); + return (struct extent_buffer *)page->private; + } + + /* For subpage case, we need to lookup buffer radix tree */ + rcu_read_lock(); + eb = radix_tree_lookup(&fs_info->buffer_radix, + bytenr >> fs_info->sectorsize_bits); + rcu_read_unlock(); + ASSERT(eb); + return eb; +} + +/* * after a readpage IO is done, we need to: * clear the uptodate bits on error * set the uptodate bits if things worked @@ -2996,7 +3025,7 @@ static void end_bio_extent_readpage(struct bio *bio) } else { struct extent_buffer *eb; - eb = (struct extent_buffer *)page->private; + eb = find_extent_buffer_readpage(fs_info, page, start); set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); eb->read_mirror = mirror; atomic_dec(&eb->io_pages); @@ -3020,7 +3049,7 @@ readpage_ok: */ if (page->index == end_index && i_size <= end) { u32 zero_start = max(offset_in_page(i_size), - offset_in_page(end)); + offset_in_page(start)); zero_user_segment(page, zero_start, offset_in_page(end) + 1); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 35bfa0533f23..7cdf65be3707 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9008,7 +9008,7 @@ int __init btrfs_init_cachep(void) btrfs_free_space_bitmap_cachep = kmem_cache_create("btrfs_free_space_bitmap", PAGE_SIZE, PAGE_SIZE, - SLAB_RED_ZONE, NULL); + SLAB_MEM_SPREAD, NULL); if (!btrfs_free_space_bitmap_cachep) goto fail; @@ -9877,6 +9877,7 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent( struct btrfs_path *path; u64 start = ins->objectid; u64 len = ins->offset; + int qgroup_released; int ret; memset(&stack_fi, 0, sizeof(stack_fi)); @@ -9889,16 +9890,16 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent( btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE); /* Encryption and other encoding is reserved and all 0 */ - ret = btrfs_qgroup_release_data(inode, file_offset, len); - if (ret < 0) - return ERR_PTR(ret); + qgroup_released = btrfs_qgroup_release_data(inode, file_offset, len); + if (qgroup_released < 0) + return ERR_PTR(qgroup_released); if (trans) { ret = insert_reserved_file_extent(trans, inode, file_offset, &stack_fi, - true, ret); + true, qgroup_released); if (ret) - return ERR_PTR(ret); + goto free_qgroup; return trans; } @@ -9909,21 +9910,35 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent( extent_info.file_offset = file_offset; extent_info.extent_buf = (char *)&stack_fi; extent_info.is_new_extent = true; - extent_info.qgroup_reserved = ret; + extent_info.qgroup_reserved = qgroup_released; extent_info.insertions = 0; path = btrfs_alloc_path(); - if (!path) - return ERR_PTR(-ENOMEM); + if (!path) { + ret = -ENOMEM; + goto free_qgroup; + } ret = btrfs_replace_file_extents(&inode->vfs_inode, path, file_offset, file_offset + len - 1, &extent_info, &trans); btrfs_free_path(path); if (ret) - return ERR_PTR(ret); - + goto free_qgroup; return trans; + +free_qgroup: + /* + * We have released qgroup data range at the beginning of the function, + * and normally qgroup_released bytes will be freed when committing + * transaction. + * But if we error out early, we have to free what we have released + * or we leak qgroup data reservation. + */ + btrfs_qgroup_free_refroot(inode->root->fs_info, + inode->root->root_key.objectid, qgroup_released, + BTRFS_QGROUP_RSV_DATA); + return ERR_PTR(ret); } static int __btrfs_prealloc_file_range(struct inode *inode, int mode, diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 20fd4aa48a8c..06713a8fe26b 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -209,7 +209,7 @@ int btree_readahead_hook(struct extent_buffer *eb, int err) /* find extent */ spin_lock(&fs_info->reada_lock); re = radix_tree_lookup(&fs_info->reada_tree, - eb->start >> PAGE_SHIFT); + eb->start >> fs_info->sectorsize_bits); if (re) re->refcnt++; spin_unlock(&fs_info->reada_lock); @@ -240,7 +240,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical, zone = NULL; spin_lock(&fs_info->reada_lock); ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, - logical >> PAGE_SHIFT, 1); + logical >> fs_info->sectorsize_bits, 1); if (ret == 1 && logical >= zone->start && logical <= zone->end) { kref_get(&zone->refcnt); spin_unlock(&fs_info->reada_lock); @@ -283,13 +283,13 @@ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical, spin_lock(&fs_info->reada_lock); ret = radix_tree_insert(&dev->reada_zones, - (unsigned long)(zone->end >> PAGE_SHIFT), - zone); + (unsigned long)(zone->end >> fs_info->sectorsize_bits), + zone); if (ret == -EEXIST) { kfree(zone); ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, - logical >> PAGE_SHIFT, 1); + logical >> fs_info->sectorsize_bits, 1); if (ret == 1 && logical >= zone->start && logical <= zone->end) kref_get(&zone->refcnt); else @@ -315,7 +315,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, u64 length; int real_stripes; int nzones = 0; - unsigned long index = logical >> PAGE_SHIFT; + unsigned long index = logical >> fs_info->sectorsize_bits; int dev_replace_is_ongoing; int have_zone = 0; @@ -497,7 +497,7 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info, struct reada_extent *re) { int i; - unsigned long index = re->logical >> PAGE_SHIFT; + unsigned long index = re->logical >> fs_info->sectorsize_bits; spin_lock(&fs_info->reada_lock); if (--re->refcnt) { @@ -538,11 +538,12 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info, static void reada_zone_release(struct kref *kref) { struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt); + struct btrfs_fs_info *fs_info = zone->device->fs_info; - lockdep_assert_held(&zone->device->fs_info->reada_lock); + lockdep_assert_held(&fs_info->reada_lock); radix_tree_delete(&zone->device->reada_zones, - zone->end >> PAGE_SHIFT); + zone->end >> fs_info->sectorsize_bits); kfree(zone); } @@ -593,7 +594,7 @@ static int reada_add_block(struct reada_control *rc, u64 logical, static void reada_peer_zones_set_lock(struct reada_zone *zone, int lock) { int i; - unsigned long index = zone->end >> PAGE_SHIFT; + unsigned long index = zone->end >> zone->device->fs_info->sectorsize_bits; for (i = 0; i < zone->ndevs; ++i) { struct reada_zone *peer; @@ -628,7 +629,7 @@ static int reada_pick_zone(struct btrfs_device *dev) (void **)&zone, index, 1); if (ret == 0) break; - index = (zone->end >> PAGE_SHIFT) + 1; + index = (zone->end >> dev->fs_info->sectorsize_bits) + 1; if (zone->locked) { if (zone->elems > top_locked_elems) { top_locked_elems = zone->elems; @@ -709,7 +710,7 @@ static int reada_start_machine_dev(struct btrfs_device *dev) * plugging to speed things up */ ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re, - dev->reada_next >> PAGE_SHIFT, 1); + dev->reada_next >> fs_info->sectorsize_bits, 1); if (ret == 0 || re->logical > dev->reada_curr_zone->end) { ret = reada_pick_zone(dev); if (!ret) { @@ -718,7 +719,7 @@ static int reada_start_machine_dev(struct btrfs_device *dev) } re = NULL; ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re, - dev->reada_next >> PAGE_SHIFT, 1); + dev->reada_next >> fs_info->sectorsize_bits, 1); } if (ret == 0) { spin_unlock(&fs_info->reada_lock); @@ -885,7 +886,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) pr_cont(" curr off %llu", device->reada_next - zone->start); pr_cont("\n"); - index = (zone->end >> PAGE_SHIFT) + 1; + index = (zone->end >> fs_info->sectorsize_bits) + 1; } cnt = 0; index = 0; @@ -910,7 +911,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) } } pr_cont("\n"); - index = (re->logical >> PAGE_SHIFT) + 1; + index = (re->logical >> fs_info->sectorsize_bits) + 1; if (++cnt > 15) break; } @@ -926,7 +927,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) if (ret == 0) break; if (!re->scheduled) { - index = (re->logical >> PAGE_SHIFT) + 1; + index = (re->logical >> fs_info->sectorsize_bits) + 1; continue; } pr_debug("re: logical %llu size %u list empty %d scheduled %d", @@ -942,7 +943,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) } } pr_cont("\n"); - index = (re->logical >> PAGE_SHIFT) + 1; + index = (re->logical >> fs_info->sectorsize_bits) + 1; } spin_unlock(&fs_info->reada_lock); } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 2f1acc9aea9e..92a368627791 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3169,10 +3169,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, mutex_lock(&log_root_tree->log_mutex); - index2 = log_root_tree->log_transid % 2; - list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]); - root_log_ctx.log_transid = log_root_tree->log_transid; - if (btrfs_is_zoned(fs_info)) { if (!log_root_tree->node) { ret = btrfs_alloc_log_tree_node(trans, log_root_tree); @@ -3183,6 +3179,10 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, } } + index2 = log_root_tree->log_transid % 2; + list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]); + root_log_ctx.log_transid = log_root_tree->log_transid; + /* * Now we are safe to update the log_root_tree because we're under the * log_mutex, and we're a current writer so we're holding the commit diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index c6636b4c4ccf..c0fee830a34e 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2229,19 +2229,21 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new) static long fuse_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - int err = -ENOTTY; + int res; + int oldfd; + struct fuse_dev *fud = NULL; - if (cmd == FUSE_DEV_IOC_CLONE) { - int oldfd; + if (_IOC_TYPE(cmd) != FUSE_DEV_IOC_MAGIC) + return -ENOTTY; - err = -EFAULT; - if (!get_user(oldfd, (__u32 __user *) arg)) { + switch (_IOC_NR(cmd)) { + case _IOC_NR(FUSE_DEV_IOC_CLONE): + res = -EFAULT; + if (!get_user(oldfd, (__u32 __user *)arg)) { struct file *old = fget(oldfd); - err = -EINVAL; + res = -EINVAL; if (old) { - struct fuse_dev *fud = NULL; - /* * Check against file->f_op because CUSE * uses the same ioctl handler. @@ -2252,14 +2254,18 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd, if (fud) { mutex_lock(&fuse_mutex); - err = fuse_device_clone(fud->fc, file); + res = fuse_device_clone(fud->fc, file); mutex_unlock(&fuse_mutex); } fput(old); } } + break; + default: + res = -ENOTTY; + break; } - return err; + return res; } const struct file_operations fuse_dev_operations = { diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 68cca8d4db6e..63d97a15ffde 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -863,6 +863,7 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc) static inline void fuse_make_bad(struct inode *inode) { + remove_inode_hash(inode); set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state); } diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 8868ac31a3c0..4ee6f734ba83 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -1324,8 +1324,15 @@ static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) /* virtiofs allocates and installs its own fuse devices */ ctx->fudptr = NULL; - if (ctx->dax) + if (ctx->dax) { + if (!fs->dax_dev) { + err = -EINVAL; + pr_err("virtio-fs: dax can't be enabled as filesystem" + " device does not support it.\n"); + goto err_free_fuse_devs; + } ctx->dax_dev = fs->dax_dev; + } err = fuse_fill_super_common(sb, ctx); if (err < 0) goto err_free_fuse_devs; diff --git a/fs/iomap/swapfile.c b/fs/iomap/swapfile.c index a648dbf6991e..a5e478de1417 100644 --- a/fs/iomap/swapfile.c +++ b/fs/iomap/swapfile.c @@ -170,6 +170,16 @@ int iomap_swapfile_activate(struct swap_info_struct *sis, return ret; } + /* + * If this swapfile doesn't contain even a single page-aligned + * contiguous range of blocks, reject this useless swapfile to + * prevent confusion later on. + */ + if (isi.nr_pages == 0) { + pr_warn("swapon: Cannot find a single usable page in file.\n"); + return -EINVAL; + } + *pagespan = 1 + isi.highest_ppage - isi.lowest_ppage; sis->max = isi.nr_pages; sis->pages = isi.nr_pages - 1; diff --git a/fs/locks.c b/fs/locks.c index 99ca97e81b7a..6125d2de39b8 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1808,9 +1808,6 @@ check_conflicting_open(struct file *filp, const long arg, int flags) if (flags & FL_LAYOUT) return 0; - if (flags & FL_DELEG) - /* We leave these checks to the caller. */ - return 0; if (arg == F_RDLCK) return inode_is_open_for_write(inode) ? -EAGAIN : 0; diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 821e5913faee..d6cff5fbe705 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -73,6 +73,7 @@ config NFSD_V4 select NFSD_V3 select FS_POSIX_ACL select SUNRPC_GSS + select CRYPTO select CRYPTO_MD5 select CRYPTO_SHA256 select GRACE_PERIOD diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 53fcbf79bdca..7629248fdd53 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -898,6 +898,8 @@ nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, continue; if (!nfsd_match_cred(nf->nf_cred, current_cred())) continue; + if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) + continue; if (nfsd_file_get(nf) != NULL) return nf; } diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 052be5bf9ef5..7325592b456e 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -1189,6 +1189,7 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) switch (task->tk_status) { case -EIO: case -ETIMEDOUT: + case -EACCES: nfsd4_mark_cb_down(clp, task->tk_status); } break; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index acdb3cd806a1..dd9f38d072dd 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1302,7 +1302,7 @@ nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, struct nfsd_file *dst) { nfs42_ssc_close(src->nf_file); - /* 'src' is freed by nfsd4_do_async_copy */ + fput(src->nf_file); nfsd_file_put(dst); mntput(ss_mnt); } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 423fd6683f3a..97447a64bad0 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4940,31 +4940,6 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, return fl; } -static int nfsd4_check_conflicting_opens(struct nfs4_client *clp, - struct nfs4_file *fp) -{ - struct nfs4_clnt_odstate *co; - struct file *f = fp->fi_deleg_file->nf_file; - struct inode *ino = locks_inode(f); - int writes = atomic_read(&ino->i_writecount); - - if (fp->fi_fds[O_WRONLY]) - writes--; - if (fp->fi_fds[O_RDWR]) - writes--; - if (writes > 0) - return -EAGAIN; - spin_lock(&fp->fi_lock); - list_for_each_entry(co, &fp->fi_clnt_odstate, co_perfile) { - if (co->co_client != clp) { - spin_unlock(&fp->fi_lock); - return -EAGAIN; - } - } - spin_unlock(&fp->fi_lock); - return 0; -} - static struct nfs4_delegation * nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate) @@ -4984,12 +4959,9 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, nf = find_readable_file(fp); if (!nf) { - /* - * We probably could attempt another open and get a read - * delegation, but for now, don't bother until the - * client actually sends us one. - */ - return ERR_PTR(-EAGAIN); + /* We should always have a readable file here */ + WARN_ON_ONCE(1); + return ERR_PTR(-EBADF); } spin_lock(&state_lock); spin_lock(&fp->fi_lock); @@ -5019,19 +4991,11 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, if (!fl) goto out_clnt_odstate; - status = nfsd4_check_conflicting_opens(clp, fp); - if (status) { - locks_free_lock(fl); - goto out_clnt_odstate; - } status = vfs_setlease(fp->fi_deleg_file->nf_file, fl->fl_type, &fl, NULL); if (fl) locks_free_lock(fl); if (status) goto out_clnt_odstate; - status = nfsd4_check_conflicting_opens(clp, fp); - if (status) - goto out_clnt_odstate; spin_lock(&state_lock); spin_lock(&fp->fi_lock); @@ -5113,6 +5077,17 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, goto out_no_deleg; if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) goto out_no_deleg; + /* + * Also, if the file was opened for write or + * create, there's a good chance the client's + * about to write to it, resulting in an + * immediate recall (since we don't support + * write delegations): + */ + if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) + goto out_no_deleg; + if (open->op_create == NFS4_OPEN_CREATE) + goto out_no_deleg; break; default: goto out_no_deleg; @@ -5389,7 +5364,7 @@ nfs4_laundromat(struct nfsd_net *nn) idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) { cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid); if (cps->cp_stateid.sc_type == NFS4_COPYNOTIFY_STID && - cps->cpntf_time > cutoff) + cps->cpntf_time < cutoff) _free_cpntf_state_locked(nn, cps); } spin_unlock(&nn->s2s_cp_lock); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 46a861d55e48..f93370bd7b1e 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1007,9 +1007,10 @@ xfs_create( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, - XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, - &udqp, &gdqp, &pdqp); + error = xfs_qm_vop_dqalloc(dp, fsuid_into_mnt(mnt_userns), + fsgid_into_mnt(mnt_userns), prid, + XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, + &udqp, &gdqp, &pdqp); if (error) return error; @@ -1157,9 +1158,10 @@ xfs_create_tmpfile( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, - XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, - &udqp, &gdqp, &pdqp); + error = xfs_qm_vop_dqalloc(dp, fsuid_into_mnt(mnt_userns), + fsgid_into_mnt(mnt_userns), prid, + XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, + &udqp, &gdqp, &pdqp); if (error) return error; diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index ca310a125d1e..3498b97fb06d 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -168,6 +168,12 @@ xfs_bulkstat_one( }; int error; + if (breq->mnt_userns != &init_user_ns) { + xfs_warn_ratelimited(breq->mp, + "bulkstat not supported inside of idmapped mounts."); + return -EINVAL; + } + ASSERT(breq->icount == 1); bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat), diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 52370d0a3f43..1c97b155a8ee 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -635,6 +635,47 @@ xfs_check_summary_counts( } /* + * Flush and reclaim dirty inodes in preparation for unmount. Inodes and + * internal inode structures can be sitting in the CIL and AIL at this point, + * so we need to unpin them, write them back and/or reclaim them before unmount + * can proceed. + * + * An inode cluster that has been freed can have its buffer still pinned in + * memory because the transaction is still sitting in a iclog. The stale inodes + * on that buffer will be pinned to the buffer until the transaction hits the + * disk and the callbacks run. Pushing the AIL will skip the stale inodes and + * may never see the pinned buffer, so nothing will push out the iclog and + * unpin the buffer. + * + * Hence we need to force the log to unpin everything first. However, log + * forces don't wait for the discards they issue to complete, so we have to + * explicitly wait for them to complete here as well. + * + * Then we can tell the world we are unmounting so that error handling knows + * that the filesystem is going away and we should error out anything that we + * have been retrying in the background. This will prevent never-ending + * retries in AIL pushing from hanging the unmount. + * + * Finally, we can push the AIL to clean all the remaining dirty objects, then + * reclaim the remaining inodes that are still in memory at this point in time. + */ +static void +xfs_unmount_flush_inodes( + struct xfs_mount *mp) +{ + xfs_log_force(mp, XFS_LOG_SYNC); + xfs_extent_busy_wait_all(mp); + flush_workqueue(xfs_discard_wq); + + mp->m_flags |= XFS_MOUNT_UNMOUNTING; + + xfs_ail_push_all_sync(mp->m_ail); + cancel_delayed_work_sync(&mp->m_reclaim_work); + xfs_reclaim_inodes(mp); + xfs_health_unmount(mp); +} + +/* * This function does the following on an initial mount of a file system: * - reads the superblock from disk and init the mount struct * - if we're a 32-bit kernel, do a size check on the superblock @@ -1008,7 +1049,7 @@ xfs_mountfs( /* Clean out dquots that might be in memory after quotacheck. */ xfs_qm_unmount(mp); /* - * Cancel all delayed reclaim work and reclaim the inodes directly. + * Flush all inode reclamation work and flush the log. * We have to do this /after/ rtunmount and qm_unmount because those * two will have scheduled delayed reclaim for the rt/quota inodes. * @@ -1018,11 +1059,8 @@ xfs_mountfs( * qm_unmount_quotas and therefore rely on qm_unmount to release the * quota inodes. */ - cancel_delayed_work_sync(&mp->m_reclaim_work); - xfs_reclaim_inodes(mp); - xfs_health_unmount(mp); + xfs_unmount_flush_inodes(mp); out_log_dealloc: - mp->m_flags |= XFS_MOUNT_UNMOUNTING; xfs_log_mount_cancel(mp); out_fail_wait: if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) @@ -1063,47 +1101,7 @@ xfs_unmountfs( xfs_rtunmount_inodes(mp); xfs_irele(mp->m_rootip); - /* - * We can potentially deadlock here if we have an inode cluster - * that has been freed has its buffer still pinned in memory because - * the transaction is still sitting in a iclog. The stale inodes - * on that buffer will be pinned to the buffer until the - * transaction hits the disk and the callbacks run. Pushing the AIL will - * skip the stale inodes and may never see the pinned buffer, so - * nothing will push out the iclog and unpin the buffer. Hence we - * need to force the log here to ensure all items are flushed into the - * AIL before we go any further. - */ - xfs_log_force(mp, XFS_LOG_SYNC); - - /* - * Wait for all busy extents to be freed, including completion of - * any discard operation. - */ - xfs_extent_busy_wait_all(mp); - flush_workqueue(xfs_discard_wq); - - /* - * We now need to tell the world we are unmounting. This will allow - * us to detect that the filesystem is going away and we should error - * out anything that we have been retrying in the background. This will - * prevent neverending retries in AIL pushing from hanging the unmount. - */ - mp->m_flags |= XFS_MOUNT_UNMOUNTING; - - /* - * Flush all pending changes from the AIL. - */ - xfs_ail_push_all_sync(mp->m_ail); - - /* - * Reclaim all inodes. At this point there should be no dirty inodes and - * none should be pinned or locked. Stop background inode reclaim here - * if it is still running. - */ - cancel_delayed_work_sync(&mp->m_reclaim_work); - xfs_reclaim_inodes(mp); - xfs_health_unmount(mp); + xfs_unmount_flush_inodes(mp); xfs_qm_unmount(mp); diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 1379013d74b8..7f368b10ded1 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -182,7 +182,8 @@ xfs_symlink( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, + error = xfs_qm_vop_dqalloc(dp, fsuid_into_mnt(mnt_userns), + fsgid_into_mnt(mnt_userns), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp); if (error) |