summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/block_dev.c18
-rw-r--r--fs/btrfs/compression.c42
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/extent-tree.c6
-rw-r--r--fs/btrfs/extent_io.c9
-rw-r--r--fs/btrfs/file.c35
-rw-r--r--fs/btrfs/free-space-cache.c2
-rw-r--r--fs/btrfs/inode.c7
-rw-r--r--fs/btrfs/ioctl.c4
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/qgroup.c16
-rw-r--r--fs/btrfs/reflink.c5
-rw-r--r--fs/btrfs/send.c4
-rw-r--r--fs/btrfs/tree-log.c23
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/btrfs/zoned.c9
-rw-r--r--fs/btrfs/zoned.h5
-rw-r--r--fs/cifs/cifsfs.c2
-rw-r--r--fs/cifs/cifsglob.h4
-rw-r--r--fs/cifs/file.c46
-rw-r--r--fs/cifs/fs_context.c2
-rw-r--r--fs/cifs/misc.c23
-rw-r--r--fs/cifs/smb2ops.c4
-rw-r--r--fs/cifs/smb2pdu.c6
-rw-r--r--fs/dax.c35
-rw-r--r--fs/ecryptfs/crypto.c4
-rw-r--r--fs/erofs/zmap.c21
-rw-r--r--fs/f2fs/compress.c55
-rw-r--r--fs/f2fs/data.c39
-rw-r--r--fs/f2fs/f2fs.h2
-rw-r--r--fs/f2fs/file.c3
-rw-r--r--fs/f2fs/segment.c4
-rw-r--r--fs/hfsplus/extents.c7
-rw-r--r--fs/hugetlbfs/inode.c7
-rw-r--r--fs/io_uring.c33
-rw-r--r--fs/iomap/buffered-io.c4
-rw-r--r--fs/namespace.c6
-rw-r--r--fs/quota/dquot.c6
-rw-r--r--fs/signalfd.c23
-rw-r--r--fs/squashfs/file.c6
-rw-r--r--fs/xfs/libxfs/xfs_fs.h4
-rw-r--r--fs/xfs/scrub/common.c4
-rw-r--r--fs/xfs/xfs_bmap_util.c98
43 files changed, 419 insertions, 220 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index b8abccd03e5d..6cc4d4cfe0c2 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1244,6 +1244,9 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate)
lockdep_assert_held(&bdev->bd_mutex);
+ if (!(disk->flags & GENHD_FL_UP))
+ return -ENXIO;
+
rescan:
if (bdev->bd_part_count)
return -EBUSY;
@@ -1298,6 +1301,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode)
struct gendisk *disk = bdev->bd_disk;
int ret = 0;
+ if (!(disk->flags & GENHD_FL_UP))
+ return -ENXIO;
+
if (!bdev->bd_openers) {
if (!bdev_is_partition(bdev)) {
ret = 0;
@@ -1332,8 +1338,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode)
whole->bd_part_count++;
mutex_unlock(&whole->bd_mutex);
- if (!(disk->flags & GENHD_FL_UP) ||
- !bdev_nr_sectors(bdev)) {
+ if (!bdev_nr_sectors(bdev)) {
__blkdev_put(whole, mode, 1);
bdput(whole);
return -ENXIO;
@@ -1364,16 +1369,12 @@ struct block_device *blkdev_get_no_open(dev_t dev)
struct block_device *bdev;
struct gendisk *disk;
- down_read(&bdev_lookup_sem);
bdev = bdget(dev);
if (!bdev) {
- up_read(&bdev_lookup_sem);
blk_request_module(dev);
- down_read(&bdev_lookup_sem);
-
bdev = bdget(dev);
if (!bdev)
- goto unlock;
+ return NULL;
}
disk = bdev->bd_disk;
@@ -1383,14 +1384,11 @@ struct block_device *blkdev_get_no_open(dev_t dev)
goto put_disk;
if (!try_module_get(bdev->bd_disk->fops->owner))
goto put_disk;
- up_read(&bdev_lookup_sem);
return bdev;
put_disk:
put_disk(disk);
bdput:
bdput(bdev);
-unlock:
- up_read(&bdev_lookup_sem);
return NULL;
}
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 2bea01d23a5b..d17ac301032e 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -28,6 +28,7 @@
#include "compression.h"
#include "extent_io.h"
#include "extent_map.h"
+#include "zoned.h"
static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
@@ -349,6 +350,7 @@ static void end_compressed_bio_write(struct bio *bio)
*/
inode = cb->inode;
cb->compressed_pages[0]->mapping = cb->inode->i_mapping;
+ btrfs_record_physical_zoned(inode, cb->start, bio);
btrfs_writepage_endio_finish_ordered(cb->compressed_pages[0],
cb->start, cb->start + cb->len - 1,
bio->bi_status == BLK_STS_OK);
@@ -401,6 +403,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
u64 first_byte = disk_start;
blk_status_t ret;
int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
+ const bool use_append = btrfs_use_zone_append(inode, disk_start);
+ const unsigned int bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE;
WARN_ON(!PAGE_ALIGNED(start));
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
@@ -418,10 +422,31 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
cb->nr_pages = nr_pages;
bio = btrfs_bio_alloc(first_byte);
- bio->bi_opf = REQ_OP_WRITE | write_flags;
+ bio->bi_opf = bio_op | write_flags;
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
+ if (use_append) {
+ struct extent_map *em;
+ struct map_lookup *map;
+ struct block_device *bdev;
+
+ em = btrfs_get_chunk_map(fs_info, disk_start, PAGE_SIZE);
+ if (IS_ERR(em)) {
+ kfree(cb);
+ bio_put(bio);
+ return BLK_STS_NOTSUPP;
+ }
+
+ map = em->map_lookup;
+ /* We only support single profile for now */
+ ASSERT(map->num_stripes == 1);
+ bdev = map->stripes[0].dev->bdev;
+
+ bio_set_dev(bio, bdev);
+ free_extent_map(em);
+ }
+
if (blkcg_css) {
bio->bi_opf |= REQ_CGROUP_PUNT;
kthread_associate_blkcg(blkcg_css);
@@ -432,6 +457,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
bytes_left = compressed_len;
for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
int submit = 0;
+ int len;
page = compressed_pages[pg_index];
page->mapping = inode->vfs_inode.i_mapping;
@@ -439,9 +465,13 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio,
0);
+ if (pg_index == 0 && use_append)
+ len = bio_add_zone_append_page(bio, page, PAGE_SIZE, 0);
+ else
+ len = bio_add_page(bio, page, PAGE_SIZE, 0);
+
page->mapping = NULL;
- if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
- PAGE_SIZE) {
+ if (submit || len < PAGE_SIZE) {
/*
* inc the count before we submit the bio so
* we know the end IO handler won't happen before
@@ -465,11 +495,15 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
}
bio = btrfs_bio_alloc(first_byte);
- bio->bi_opf = REQ_OP_WRITE | write_flags;
+ bio->bi_opf = bio_op | write_flags;
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
if (blkcg_css)
bio->bi_opf |= REQ_CGROUP_PUNT;
+ /*
+ * Use bio_add_page() to ensure the bio has at least one
+ * page.
+ */
bio_add_page(bio, page, PAGE_SIZE, 0);
}
if (bytes_left < PAGE_SIZE) {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f83fd3cbf243..9fb76829a281 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3127,7 +3127,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, u64 new_size,
u32 min_type);
-int btrfs_start_delalloc_snapshot(struct btrfs_root *root);
+int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context);
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
bool in_reclaim_context);
int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 7a28314189b4..f1d15b68994a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1340,12 +1340,16 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
stripe = bbio->stripes;
for (i = 0; i < bbio->num_stripes; i++, stripe++) {
u64 bytes;
+ struct btrfs_device *device = stripe->dev;
- if (!stripe->dev->bdev) {
+ if (!device->bdev) {
ASSERT(btrfs_test_opt(fs_info, DEGRADED));
continue;
}
+ if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
+ continue;
+
ret = do_discard_extent(stripe, &bytes);
if (!ret) {
discarded_bytes += bytes;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 074a78a202b8..dee2dafbc872 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3753,7 +3753,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
/* Note that em_end from extent_map_end() is exclusive */
iosize = min(em_end, end + 1) - cur;
- if (btrfs_use_zone_append(inode, em))
+ if (btrfs_use_zone_append(inode, em->block_start))
opf = REQ_OP_ZONE_APPEND;
free_extent_map(em);
@@ -5196,7 +5196,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
int ret = 0;
- u64 off = start;
+ u64 off;
u64 max = start + len;
u32 flags = 0;
u32 found_type;
@@ -5231,6 +5231,11 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
goto out_free_ulist;
}
+ /*
+ * We can't initialize that to 'start' as this could miss extents due
+ * to extent item merging
+ */
+ off = 0;
start = round_down(start, btrfs_inode_sectorsize(inode));
len = round_up(max, btrfs_inode_sectorsize(inode)) - start;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 864c08d08a35..3b10d98b4ebb 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2067,6 +2067,30 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
return ret;
}
+static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx)
+{
+ struct btrfs_inode *inode = BTRFS_I(ctx->inode);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+
+ if (btrfs_inode_in_log(inode, fs_info->generation) &&
+ list_empty(&ctx->ordered_extents))
+ return true;
+
+ /*
+ * If we are doing a fast fsync we can not bail out if the inode's
+ * last_trans is <= then the last committed transaction, because we only
+ * update the last_trans of the inode during ordered extent completion,
+ * and for a fast fsync we don't wait for that, we only wait for the
+ * writeback to complete.
+ */
+ if (inode->last_trans <= fs_info->last_trans_committed &&
+ (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) ||
+ list_empty(&ctx->ordered_extents)))
+ return true;
+
+ return false;
+}
+
/*
* fsync call for both files and directories. This logs the inode into
* the tree log instead of forcing full commits whenever possible.
@@ -2185,17 +2209,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
atomic_inc(&root->log_batch);
- /*
- * If we are doing a fast fsync we can not bail out if the inode's
- * last_trans is <= then the last committed transaction, because we only
- * update the last_trans of the inode during ordered extent completion,
- * and for a fast fsync we don't wait for that, we only wait for the
- * writeback to complete.
- */
smp_mb();
- if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) ||
- (BTRFS_I(inode)->last_trans <= fs_info->last_trans_committed &&
- (full_sync || list_empty(&ctx.ordered_extents)))) {
+ if (skip_inode_logging(&ctx)) {
/*
* We've had everything committed since the last time we were
* modified so clear this flag in case it was set for whatever
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index e54466fc101f..4806295116d8 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -3949,7 +3949,7 @@ static int cleanup_free_space_cache_v1(struct btrfs_fs_info *fs_info,
{
struct btrfs_block_group *block_group;
struct rb_node *node;
- int ret;
+ int ret = 0;
btrfs_info(fs_info, "cleaning free space cache v1");
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4af336008b12..33f14573f2ec 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3241,6 +3241,7 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
inode = list_first_entry(&fs_info->delayed_iputs,
struct btrfs_inode, delayed_iput);
run_delayed_iput_locked(fs_info, inode);
+ cond_resched_lock(&fs_info->delayed_iput_lock);
}
spin_unlock(&fs_info->delayed_iput_lock);
}
@@ -7785,7 +7786,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
iomap->bdev = fs_info->fs_devices->latest_bdev;
iomap->length = len;
- if (write && btrfs_use_zone_append(BTRFS_I(inode), em))
+ if (write && btrfs_use_zone_append(BTRFS_I(inode), em->block_start))
iomap->flags |= IOMAP_F_ZONE_APPEND;
free_extent_map(em);
@@ -9678,7 +9679,7 @@ out:
return ret;
}
-int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
+int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context)
{
struct writeback_control wbc = {
.nr_to_write = LONG_MAX,
@@ -9691,7 +9692,7 @@ int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
return -EROFS;
- return start_delalloc_inodes(root, &wbc, true, false);
+ return start_delalloc_inodes(root, &wbc, true, in_reclaim_context);
}
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ee1dbabb5d3c..5dc2fd843ae3 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -259,6 +259,8 @@ int btrfs_fileattr_set(struct user_namespace *mnt_userns,
if (!fa->flags_valid) {
/* 1 item for the inode */
trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
goto update_flags;
}
@@ -907,7 +909,7 @@ static noinline int btrfs_mksnapshot(const struct path *parent,
*/
btrfs_drew_read_lock(&root->snapshot_lock);
- ret = btrfs_start_delalloc_snapshot(root);
+ ret = btrfs_start_delalloc_snapshot(root, false);
if (ret)
goto out;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 07b0b4218791..6c413bb451a3 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -984,7 +984,7 @@ int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pre,
if (pre)
ret = clone_ordered_extent(ordered, 0, pre);
- if (post)
+ if (ret == 0 && post)
ret = clone_ordered_extent(ordered, pre + ordered->disk_num_bytes,
post);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 2319c923c9e6..3ded812f522c 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3545,11 +3545,15 @@ static int try_flush_qgroup(struct btrfs_root *root)
struct btrfs_trans_handle *trans;
int ret;
- /* Can't hold an open transaction or we run the risk of deadlocking */
- ASSERT(current->journal_info == NULL ||
- current->journal_info == BTRFS_SEND_TRANS_STUB);
- if (WARN_ON(current->journal_info &&
- current->journal_info != BTRFS_SEND_TRANS_STUB))
+ /*
+ * Can't hold an open transaction or we run the risk of deadlocking,
+ * and can't either be under the context of a send operation (where
+ * current->journal_info is set to BTRFS_SEND_TRANS_STUB), as that
+ * would result in a crash when starting a transaction and does not
+ * make sense either (send is a read-only operation).
+ */
+ ASSERT(current->journal_info == NULL);
+ if (WARN_ON(current->journal_info))
return 0;
/*
@@ -3562,7 +3566,7 @@ static int try_flush_qgroup(struct btrfs_root *root)
return 0;
}
- ret = btrfs_start_delalloc_snapshot(root);
+ ret = btrfs_start_delalloc_snapshot(root, true);
if (ret < 0)
goto out;
btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index 3928ecc40d7b..d434dc78dadf 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -282,6 +282,11 @@ copy_inline_extent:
out:
if (!ret && !trans) {
/*
+ * Release path before starting a new transaction so we don't
+ * hold locks that would confuse lockdep.
+ */
+ btrfs_release_path(path);
+ /*
* No transaction here means we copied the inline extent into a
* page of the destination inode.
*
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 55741adf9071..bd69db72acc5 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -7170,7 +7170,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx)
int i;
if (root) {
- ret = btrfs_start_delalloc_snapshot(root);
+ ret = btrfs_start_delalloc_snapshot(root, false);
if (ret)
return ret;
btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
@@ -7178,7 +7178,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx)
for (i = 0; i < sctx->clone_roots_cnt; i++) {
root = sctx->clone_roots[i].root;
- ret = btrfs_start_delalloc_snapshot(root);
+ ret = btrfs_start_delalloc_snapshot(root, false);
if (ret)
return ret;
btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index f67721d82e5d..326be57f2828 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1858,8 +1858,6 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
} else if (ret == -EEXIST) {
ret = 0;
- } else {
- BUG(); /* Logic Error */
}
iput(inode);
@@ -6061,7 +6059,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
* (since logging them is pointless, a link count of 0 means they
* will never be accessible).
*/
- if (btrfs_inode_in_log(inode, trans->transid) ||
+ if ((btrfs_inode_in_log(inode, trans->transid) &&
+ list_empty(&ctx->ordered_extents)) ||
inode->vfs_inode.i_nlink == 0) {
ret = BTRFS_NO_LOG_SYNC;
goto end_no_trans;
@@ -6462,6 +6461,24 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
(!old_dir || old_dir->logged_trans < trans->transid))
return;
+ /*
+ * If we are doing a rename (old_dir is not NULL) from a directory that
+ * was previously logged, make sure the next log attempt on the directory
+ * is not skipped and logs the inode again. This is because the log may
+ * not currently be authoritative for a range including the old
+ * BTRFS_DIR_ITEM_KEY and BTRFS_DIR_INDEX_KEY keys, so we want to make
+ * sure after a log replay we do not end up with both the new and old
+ * dentries around (in case the inode is a directory we would have a
+ * directory with two hard links and 2 inode references for different
+ * parents). The next log attempt of old_dir will happen at
+ * btrfs_log_all_parents(), called through btrfs_log_inode_parent()
+ * below, because we have previously set inode->last_unlink_trans to the
+ * current transaction ID, either here or at btrfs_record_unlink_dir() in
+ * case inode is a directory.
+ */
+ if (old_dir)
+ old_dir->logged_trans = 0;
+
btrfs_init_log_ctx(&ctx, &inode->vfs_inode);
ctx.logging_new_name = true;
/*
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9a1ead0c4a31..47d27059d064 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1459,7 +1459,7 @@ static bool dev_extent_hole_check_zoned(struct btrfs_device *device,
/* Given hole range was invalid (outside of device) */
if (ret == -ERANGE) {
*hole_start += *hole_size;
- *hole_size = false;
+ *hole_size = 0;
return true;
}
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 70b23a0d03b1..1bb8ee97aae0 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1126,6 +1126,11 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
goto out;
}
+ if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) {
+ ret = -EIO;
+ goto out;
+ }
+
switch (zone.cond) {
case BLK_ZONE_COND_OFFLINE:
case BLK_ZONE_COND_READONLY:
@@ -1273,7 +1278,7 @@ void btrfs_free_redirty_list(struct btrfs_transaction *trans)
spin_unlock(&trans->releasing_ebs_lock);
}
-bool btrfs_use_zone_append(struct btrfs_inode *inode, struct extent_map *em)
+bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_block_group *cache;
@@ -1288,7 +1293,7 @@ bool btrfs_use_zone_append(struct btrfs_inode *inode, struct extent_map *em)
if (!is_data_inode(&inode->vfs_inode))
return false;
- cache = btrfs_lookup_block_group(fs_info, em->block_start);
+ cache = btrfs_lookup_block_group(fs_info, start);
ASSERT(cache);
if (!cache)
return false;
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index 5e41a74a9cb2..e55d32595c2c 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -53,7 +53,7 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache);
void btrfs_redirty_list_add(struct btrfs_transaction *trans,
struct extent_buffer *eb);
void btrfs_free_redirty_list(struct btrfs_transaction *trans);
-bool btrfs_use_zone_append(struct btrfs_inode *inode, struct extent_map *em);
+bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start);
void btrfs_record_physical_zoned(struct inode *inode, u64 file_offset,
struct bio *bio);
void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered);
@@ -152,8 +152,7 @@ static inline void btrfs_redirty_list_add(struct btrfs_transaction *trans,
struct extent_buffer *eb) { }
static inline void btrfs_free_redirty_list(struct btrfs_transaction *trans) { }
-static inline bool btrfs_use_zone_append(struct btrfs_inode *inode,
- struct extent_map *em)
+static inline bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start)
{
return false;
}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index d7ea9c5fe0f8..2ffcb29d5c8f 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -133,7 +133,7 @@ struct workqueue_struct *cifsiod_wq;
struct workqueue_struct *decrypt_wq;
struct workqueue_struct *fileinfo_put_wq;
struct workqueue_struct *cifsoplockd_wq;
-struct workqueue_struct *deferredclose_wq;
+struct workqueue_struct *deferredclose_wq;
__u32 cifs_lock_secret;
/*
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index d88b4b523dcc..8488d7024462 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1257,8 +1257,7 @@ struct cifsFileInfo {
struct work_struct oplock_break; /* work for oplock breaks */
struct work_struct put; /* work for the final part of _put */
struct delayed_work deferred;
- bool oplock_break_received; /* Flag to indicate oplock break */
- bool deferred_scheduled;
+ bool deferred_close_scheduled; /* Flag to indicate close is scheduled */
};
struct cifs_io_parms {
@@ -1418,6 +1417,7 @@ struct cifsInodeInfo {
struct inode vfs_inode;
struct list_head deferred_closes; /* list of deferred closes */
spinlock_t deferred_lock; /* protection on deferred list */
+ bool lease_granted; /* Flag to indicate whether lease or oplock is granted. */
};
static inline struct cifsInodeInfo *
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 6caad100c3f3..379a427f3c2f 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -323,8 +323,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
cfile->dentry = dget(dentry);
cfile->f_flags = file->f_flags;
cfile->invalidHandle = false;
- cfile->oplock_break_received = false;
- cfile->deferred_scheduled = false;
+ cfile->deferred_close_scheduled = false;
cfile->tlink = cifs_get_tlink(tlink);
INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
INIT_WORK(&cfile->put, cifsFileInfo_put_work);
@@ -574,21 +573,18 @@ int cifs_open(struct inode *inode, struct file *file)
file->f_op = &cifs_file_direct_ops;
}
- spin_lock(&CIFS_I(inode)->deferred_lock);
/* Get the cached handle as SMB2 close is deferred */
rc = cifs_get_readable_path(tcon, full_path, &cfile);
if (rc == 0) {
if (file->f_flags == cfile->f_flags) {
file->private_data = cfile;
+ spin_lock(&CIFS_I(inode)->deferred_lock);
cifs_del_deferred_close(cfile);
spin_unlock(&CIFS_I(inode)->deferred_lock);
goto out;
} else {
- spin_unlock(&CIFS_I(inode)->deferred_lock);
_cifsFileInfo_put(cfile, true, false);
}
- } else {
- spin_unlock(&CIFS_I(inode)->deferred_lock);
}
if (server->oplocks)
@@ -878,12 +874,8 @@ void smb2_deferred_work_close(struct work_struct *work)
struct cifsFileInfo, deferred.work);
spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
- if (!cfile->deferred_scheduled) {
- spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
- return;
- }
cifs_del_deferred_close(cfile);
- cfile->deferred_scheduled = false;
+ cfile->deferred_close_scheduled = false;
spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
_cifsFileInfo_put(cfile, true, false);
}
@@ -900,19 +892,26 @@ int cifs_close(struct inode *inode, struct file *file)
file->private_data = NULL;
dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
+ cinode->lease_granted &&
dclose) {
if (test_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags))
inode->i_ctime = inode->i_mtime = current_time(inode);
spin_lock(&cinode->deferred_lock);
cifs_add_deferred_close(cfile, dclose);
- if (cfile->deferred_scheduled) {
- mod_delayed_work(deferredclose_wq,
- &cfile->deferred, cifs_sb->ctx->acregmax);
+ if (cfile->deferred_close_scheduled &&
+ delayed_work_pending(&cfile->deferred)) {
+ /*
+ * If there is no pending work, mod_delayed_work queues new work.
+ * So, Increase the ref count to avoid use-after-free.
+ */
+ if (!mod_delayed_work(deferredclose_wq,
+ &cfile->deferred, cifs_sb->ctx->acregmax))
+ cifsFileInfo_get(cfile);
} else {
/* Deferred close for files */
queue_delayed_work(deferredclose_wq,
&cfile->deferred, cifs_sb->ctx->acregmax);
- cfile->deferred_scheduled = true;
+ cfile->deferred_close_scheduled = true;
spin_unlock(&cinode->deferred_lock);
return 0;
}
@@ -2020,8 +2019,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
continue;
if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
- if ((!open_file->invalidHandle) &&
- (!open_file->oplock_break_received)) {
+ if ((!open_file->invalidHandle)) {
/* found a good file */
/* lock it so it will not be closed on us */
cifsFileInfo_get(open_file);
@@ -4874,14 +4872,20 @@ oplock_break_ack:
}
/*
* When oplock break is received and there are no active
- * file handles but cached, then set the flag oplock_break_received.
+ * file handles but cached, then schedule deferred close immediately.
* So, new open will not use cached handle.
*/
spin_lock(&CIFS_I(inode)->deferred_lock);
is_deferred = cifs_is_deferred_close(cfile, &dclose);
- if (is_deferred && cfile->deferred_scheduled) {
- cfile->oplock_break_received = true;
- mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
+ if (is_deferred &&
+ cfile->deferred_close_scheduled &&
+ delayed_work_pending(&cfile->deferred)) {
+ /*
+ * If there is no pending work, mod_delayed_work queues new work.
+ * So, Increase the ref count to avoid use-after-free.
+ */
+ if (!mod_delayed_work(deferredclose_wq, &cfile->deferred, 0))
+ cifsFileInfo_get(cfile);
}
spin_unlock(&CIFS_I(inode)->deferred_lock);
_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index 5d21cd905315..92d4ab029c91 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -1145,7 +1145,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
/* if iocharset not set then load_nls_default
* is used by caller
*/
- cifs_dbg(FYI, "iocharset set to %s\n", ctx->iocharset);
+ cifs_dbg(FYI, "iocharset set to %s\n", ctx->iocharset);
break;
case Opt_netbiosname:
memset(ctx->source_rfc1001_name, 0x20,
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 524dbdfb7184..7207a63819cb 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -672,6 +672,11 @@ cifs_add_pending_open(struct cifs_fid *fid, struct tcon_link *tlink,
spin_unlock(&tlink_tcon(open->tlink)->open_file_lock);
}
+/*
+ * Critical section which runs after acquiring deferred_lock.
+ * As there is no reference count on cifs_deferred_close, pdclose
+ * should not be used outside deferred_lock.
+ */
bool
cifs_is_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close **pdclose)
{
@@ -688,6 +693,9 @@ cifs_is_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close **
return false;
}
+/*
+ * Critical section which runs after acquiring deferred_lock.
+ */
void
cifs_add_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close *dclose)
{
@@ -707,6 +715,9 @@ cifs_add_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close *
list_add_tail(&dclose->dlist, &CIFS_I(d_inode(cfile->dentry))->deferred_closes);
}
+/*
+ * Critical section which runs after acquiring deferred_lock.
+ */
void
cifs_del_deferred_close(struct cifsFileInfo *cfile)
{
@@ -738,15 +749,19 @@ void
cifs_close_all_deferred_files(struct cifs_tcon *tcon)
{
struct cifsFileInfo *cfile;
- struct cifsInodeInfo *cinode;
struct list_head *tmp;
spin_lock(&tcon->open_file_lock);
list_for_each(tmp, &tcon->openFileList) {
cfile = list_entry(tmp, struct cifsFileInfo, tlist);
- cinode = CIFS_I(d_inode(cfile->dentry));
- if (delayed_work_pending(&cfile->deferred))
- mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
+ if (delayed_work_pending(&cfile->deferred)) {
+ /*
+ * If there is no pending work, mod_delayed_work queues new work.
+ * So, Increase the ref count to avoid use-after-free.
+ */
+ if (!mod_delayed_work(deferredclose_wq, &cfile->deferred, 0))
+ cifsFileInfo_get(cfile);
+ }
}
spin_unlock(&tcon->open_file_lock);
}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index dd0eb665b680..21ef51d338e0 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1861,6 +1861,8 @@ smb2_copychunk_range(const unsigned int xid,
cpu_to_le32(min_t(u32, len, tcon->max_bytes_chunk));
/* Request server copy to target from src identified by key */
+ kfree(retbuf);
+ retbuf = NULL;
rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE,
true /* is_fsctl */, (char *)pcchunk,
@@ -3981,6 +3983,7 @@ smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
unsigned int epoch, bool *purge_cache)
{
oplock &= 0xFF;
+ cinode->lease_granted = false;
if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE)
return;
if (oplock == SMB2_OPLOCK_LEVEL_BATCH) {
@@ -4007,6 +4010,7 @@ smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
unsigned int new_oplock = 0;
oplock &= 0xFF;
+ cinode->lease_granted = true;
if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE)
return;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index a8bf43184773..9f24eb88297a 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -3900,10 +3900,10 @@ smb2_new_read_req(void **buf, unsigned int *total_len,
* Related requests use info from previous read request
* in chain.
*/
- shdr->SessionId = 0xFFFFFFFF;
+ shdr->SessionId = 0xFFFFFFFFFFFFFFFF;
shdr->TreeId = 0xFFFFFFFF;
- req->PersistentFileId = 0xFFFFFFFF;
- req->VolatileFileId = 0xFFFFFFFF;
+ req->PersistentFileId = 0xFFFFFFFFFFFFFFFF;
+ req->VolatileFileId = 0xFFFFFFFFFFFFFFFF;
}
}
if (remaining_bytes > io_parms->length)
diff --git a/fs/dax.c b/fs/dax.c
index 69216241392f..62352cbcf0f4 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -144,6 +144,16 @@ struct wait_exceptional_entry_queue {
struct exceptional_entry_key key;
};
+/**
+ * enum dax_wake_mode: waitqueue wakeup behaviour
+ * @WAKE_ALL: wake all waiters in the waitqueue
+ * @WAKE_NEXT: wake only the first waiter in the waitqueue
+ */
+enum dax_wake_mode {
+ WAKE_ALL,
+ WAKE_NEXT,
+};
+
static wait_queue_head_t *dax_entry_waitqueue(struct xa_state *xas,
void *entry, struct exceptional_entry_key *key)
{
@@ -182,7 +192,8 @@ static int wake_exceptional_entry_func(wait_queue_entry_t *wait,
* The important information it's conveying is whether the entry at
* this index used to be a PMD entry.
*/
-static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all)
+static void dax_wake_entry(struct xa_state *xas, void *entry,
+ enum dax_wake_mode mode)
{
struct exceptional_entry_key key;
wait_queue_head_t *wq;
@@ -196,7 +207,7 @@ static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all)
* must be in the waitqueue and the following check will see them.
*/
if (waitqueue_active(wq))
- __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
+ __wake_up(wq, TASK_NORMAL, mode == WAKE_ALL ? 0 : 1, &key);
}
/*
@@ -264,11 +275,11 @@ static void wait_entry_unlocked(struct xa_state *xas, void *entry)
finish_wait(wq, &ewait.wait);
}
-static void put_unlocked_entry(struct xa_state *xas, void *entry)
+static void put_unlocked_entry(struct xa_state *xas, void *entry,
+ enum dax_wake_mode mode)
{
- /* If we were the only waiter woken, wake the next one */
if (entry && !dax_is_conflict(entry))
- dax_wake_entry(xas, entry, false);
+ dax_wake_entry(xas, entry, mode);
}
/*
@@ -286,7 +297,7 @@ static void dax_unlock_entry(struct xa_state *xas, void *entry)
old = xas_store(xas, entry);
xas_unlock_irq(xas);
BUG_ON(!dax_is_locked(old));
- dax_wake_entry(xas, entry, false);
+ dax_wake_entry(xas, entry, WAKE_NEXT);
}
/*
@@ -524,7 +535,7 @@ retry:
dax_disassociate_entry(entry, mapping, false);
xas_store(xas, NULL); /* undo the PMD join */
- dax_wake_entry(xas, entry, true);
+ dax_wake_entry(xas, entry, WAKE_ALL);
mapping->nrpages -= PG_PMD_NR;
entry = NULL;
xas_set(xas, index);
@@ -622,7 +633,7 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping,
entry = get_unlocked_entry(&xas, 0);
if (entry)
page = dax_busy_page(entry);
- put_unlocked_entry(&xas, entry);
+ put_unlocked_entry(&xas, entry, WAKE_NEXT);
if (page)
break;
if (++scanned % XA_CHECK_SCHED)
@@ -664,7 +675,7 @@ static int __dax_invalidate_entry(struct address_space *mapping,
mapping->nrpages -= 1UL << dax_entry_order(entry);
ret = 1;
out:
- put_unlocked_entry(&xas, entry);
+ put_unlocked_entry(&xas, entry, WAKE_ALL);
xas_unlock_irq(&xas);
return ret;
}
@@ -937,13 +948,13 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
xas_lock_irq(xas);
xas_store(xas, entry);
xas_clear_mark(xas, PAGECACHE_TAG_DIRTY);
- dax_wake_entry(xas, entry, false);
+ dax_wake_entry(xas, entry, WAKE_NEXT);
trace_dax_writeback_one(mapping->host, index, count);
return ret;
put_unlocked:
- put_unlocked_entry(xas, entry);
+ put_unlocked_entry(xas, entry, WAKE_NEXT);
return ret;
}
@@ -1684,7 +1695,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
/* Did we race with someone splitting entry or so? */
if (!entry || dax_is_conflict(entry) ||
(order == 0 && !dax_is_pte_entry(entry))) {
- put_unlocked_entry(&xas, entry);
+ put_unlocked_entry(&xas, entry, WAKE_NEXT);
xas_unlock_irq(&xas);
trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
VM_FAULT_NOPAGE);
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 345f8061e3b4..e3f5d7f3c8a0 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -296,10 +296,6 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
struct extent_crypt_result ecr;
int rc = 0;
- if (!crypt_stat || !crypt_stat->tfm
- || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED))
- return -EINVAL;
-
if (unlikely(ecryptfs_verbosity > 0)) {
ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n",
crypt_stat->key_size);
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index e62d813756f2..efaf32596b97 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -450,14 +450,31 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
lcn = m->lcn + 1;
if (m->compressedlcs)
goto out;
- if (lcn == initial_lcn)
- goto err_bonus_cblkcnt;
err = z_erofs_load_cluster_from_disk(m, lcn);
if (err)
return err;
+ /*
+ * If the 1st NONHEAD lcluster has already been handled initially w/o
+ * valid compressedlcs, which means at least it mustn't be CBLKCNT, or
+ * an internal implemenatation error is detected.
+ *
+ * The following code can also handle it properly anyway, but let's
+ * BUG_ON in the debugging mode only for developers to notice that.
+ */
+ DBG_BUGON(lcn == initial_lcn &&
+ m->type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD);
+
switch (m->type) {
+ case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+ case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+ /*
+ * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
+ * rather than CBLKCNT, it's a 1 lcluster-sized pcluster.
+ */
+ m->compressedlcs = 1;
+ break;
case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
if (m->delta[0] != 1)
goto err_bonus_cblkcnt;
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 53b13787eb2c..925a5ca3744a 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -117,19 +117,6 @@ static void f2fs_unlock_rpages(struct compress_ctx *cc, int len)
f2fs_drop_rpages(cc, len, true);
}
-static void f2fs_put_rpages_mapping(struct address_space *mapping,
- pgoff_t start, int len)
-{
- int i;
-
- for (i = 0; i < len; i++) {
- struct page *page = find_get_page(mapping, start + i);
-
- put_page(page);
- put_page(page);
- }
-}
-
static void f2fs_put_rpages_wbc(struct compress_ctx *cc,
struct writeback_control *wbc, bool redirty, int unlock)
{
@@ -158,13 +145,14 @@ int f2fs_init_compress_ctx(struct compress_ctx *cc)
return cc->rpages ? 0 : -ENOMEM;
}
-void f2fs_destroy_compress_ctx(struct compress_ctx *cc)
+void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse)
{
page_array_free(cc->inode, cc->rpages, cc->cluster_size);
cc->rpages = NULL;
cc->nr_rpages = 0;
cc->nr_cpages = 0;
- cc->cluster_idx = NULL_CLUSTER;
+ if (!reuse)
+ cc->cluster_idx = NULL_CLUSTER;
}
void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page)
@@ -1036,7 +1024,7 @@ retry:
}
if (PageUptodate(page))
- unlock_page(page);
+ f2fs_put_page(page, 1);
else
f2fs_compress_ctx_add_page(cc, page);
}
@@ -1046,33 +1034,35 @@ retry:
ret = f2fs_read_multi_pages(cc, &bio, cc->cluster_size,
&last_block_in_bio, false, true);
- f2fs_destroy_compress_ctx(cc);
+ f2fs_put_rpages(cc);
+ f2fs_destroy_compress_ctx(cc, true);
if (ret)
- goto release_pages;
+ goto out;
if (bio)
f2fs_submit_bio(sbi, bio, DATA);
ret = f2fs_init_compress_ctx(cc);
if (ret)
- goto release_pages;
+ goto out;
}
for (i = 0; i < cc->cluster_size; i++) {
f2fs_bug_on(sbi, cc->rpages[i]);
page = find_lock_page(mapping, start_idx + i);
- f2fs_bug_on(sbi, !page);
+ if (!page) {
+ /* page can be truncated */
+ goto release_and_retry;
+ }
f2fs_wait_on_page_writeback(page, DATA, true, true);
-
f2fs_compress_ctx_add_page(cc, page);
- f2fs_put_page(page, 0);
if (!PageUptodate(page)) {
+release_and_retry:
+ f2fs_put_rpages(cc);
f2fs_unlock_rpages(cc, i + 1);
- f2fs_put_rpages_mapping(mapping, start_idx,
- cc->cluster_size);
- f2fs_destroy_compress_ctx(cc);
+ f2fs_destroy_compress_ctx(cc, true);
goto retry;
}
}
@@ -1103,10 +1093,10 @@ retry:
}
unlock_pages:
+ f2fs_put_rpages(cc);
f2fs_unlock_rpages(cc, i);
-release_pages:
- f2fs_put_rpages_mapping(mapping, start_idx, i);
- f2fs_destroy_compress_ctx(cc);
+ f2fs_destroy_compress_ctx(cc, true);
+out:
return ret;
}
@@ -1141,7 +1131,7 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata,
set_cluster_dirty(&cc);
f2fs_put_rpages_wbc(&cc, NULL, false, 1);
- f2fs_destroy_compress_ctx(&cc);
+ f2fs_destroy_compress_ctx(&cc, false);
return first_index;
}
@@ -1361,7 +1351,7 @@ unlock_continue:
f2fs_put_rpages(cc);
page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
cc->cpages = NULL;
- f2fs_destroy_compress_ctx(cc);
+ f2fs_destroy_compress_ctx(cc, false);
return 0;
out_destroy_crypt:
@@ -1372,7 +1362,8 @@ out_destroy_crypt:
for (i = 0; i < cc->nr_cpages; i++) {
if (!cc->cpages[i])
continue;
- f2fs_put_page(cc->cpages[i], 1);
+ f2fs_compress_free_page(cc->cpages[i]);
+ cc->cpages[i] = NULL;
}
out_put_cic:
kmem_cache_free(cic_entry_slab, cic);
@@ -1522,7 +1513,7 @@ write:
err = f2fs_write_raw_pages(cc, submitted, wbc, io_type);
f2fs_put_rpages_wbc(cc, wbc, false, 0);
destroy_out:
- f2fs_destroy_compress_ctx(cc);
+ f2fs_destroy_compress_ctx(cc, false);
return err;
}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 96f1a354f89f..009a09fb9d88 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2287,7 +2287,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
max_nr_pages,
&last_block_in_bio,
rac != NULL, false);
- f2fs_destroy_compress_ctx(&cc);
+ f2fs_destroy_compress_ctx(&cc, false);
if (ret)
goto set_error_page;
}
@@ -2332,7 +2332,7 @@ next_page:
max_nr_pages,
&last_block_in_bio,
rac != NULL, false);
- f2fs_destroy_compress_ctx(&cc);
+ f2fs_destroy_compress_ctx(&cc, false);
}
}
#endif
@@ -3033,7 +3033,7 @@ next:
}
}
if (f2fs_compressed_file(inode))
- f2fs_destroy_compress_ctx(&cc);
+ f2fs_destroy_compress_ctx(&cc, false);
#endif
if (retry) {
index = 0;
@@ -3801,6 +3801,7 @@ static int f2fs_is_file_aligned(struct inode *inode)
block_t pblock;
unsigned long nr_pblocks;
unsigned int blocks_per_sec = BLKS_PER_SEC(sbi);
+ unsigned int not_aligned = 0;
int ret = 0;
cur_lblock = 0;
@@ -3833,13 +3834,20 @@ static int f2fs_is_file_aligned(struct inode *inode)
if ((pblock - main_blkaddr) & (blocks_per_sec - 1) ||
nr_pblocks & (blocks_per_sec - 1)) {
- f2fs_err(sbi, "Swapfile does not align to section");
- ret = -EINVAL;
- goto out;
+ if (f2fs_is_pinned_file(inode)) {
+ f2fs_err(sbi, "Swapfile does not align to section");
+ ret = -EINVAL;
+ goto out;
+ }
+ not_aligned++;
}
cur_lblock += nr_pblocks;
}
+ if (not_aligned)
+ f2fs_warn(sbi, "Swapfile (%u) is not align to section: \n"
+ "\t1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate()",
+ not_aligned);
out:
return ret;
}
@@ -3858,6 +3866,7 @@ static int check_swap_activate_fast(struct swap_info_struct *sis,
int nr_extents = 0;
unsigned long nr_pblocks;
unsigned int blocks_per_sec = BLKS_PER_SEC(sbi);
+ unsigned int not_aligned = 0;
int ret = 0;
/*
@@ -3887,7 +3896,7 @@ static int check_swap_activate_fast(struct swap_info_struct *sis,
/* hole */
if (!(map.m_flags & F2FS_MAP_FLAGS)) {
f2fs_err(sbi, "Swapfile has holes\n");
- ret = -ENOENT;
+ ret = -EINVAL;
goto out;
}
@@ -3896,9 +3905,12 @@ static int check_swap_activate_fast(struct swap_info_struct *sis,
if ((pblock - SM_I(sbi)->main_blkaddr) & (blocks_per_sec - 1) ||
nr_pblocks & (blocks_per_sec - 1)) {
- f2fs_err(sbi, "Swapfile does not align to section");
- ret = -EINVAL;
- goto out;
+ if (f2fs_is_pinned_file(inode)) {
+ f2fs_err(sbi, "Swapfile does not align to section");
+ ret = -EINVAL;
+ goto out;
+ }
+ not_aligned++;
}
if (cur_lblock + nr_pblocks >= sis->max)
@@ -3927,6 +3939,11 @@ static int check_swap_activate_fast(struct swap_info_struct *sis,
sis->max = cur_lblock;
sis->pages = cur_lblock - 1;
sis->highest_bit = cur_lblock - 1;
+
+ if (not_aligned)
+ f2fs_warn(sbi, "Swapfile (%u) is not align to section: \n"
+ "\t1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate()",
+ not_aligned);
out:
return ret;
}
@@ -4035,7 +4052,7 @@ out:
return ret;
bad_bmap:
f2fs_err(sbi, "Swapfile has holes\n");
- return -ENOENT;
+ return -EINVAL;
}
static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 044878866ca3..c83d90125ebd 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3956,7 +3956,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc);
void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed);
void f2fs_put_page_dic(struct page *page);
int f2fs_init_compress_ctx(struct compress_ctx *cc);
-void f2fs_destroy_compress_ctx(struct compress_ctx *cc);
+void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse);
void f2fs_init_compress_info(struct f2fs_sb_info *sbi);
int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi);
void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 44a4650aea7b..ceb575f99048 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1817,7 +1817,8 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
struct f2fs_inode_info *fi = F2FS_I(inode);
u32 masked_flags = fi->i_flags & mask;
- f2fs_bug_on(F2FS_I_SB(inode), (iflags & ~mask));
+ /* mask can be shrunk by flags_valid selector */
+ iflags &= mask;
/* Is it quota file? Do not allow user to mess with it */
if (IS_NOQUOTA(inode))
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index c605415840b5..51dc79fad4fe 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -3574,12 +3574,12 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
return err;
drop_bio:
- if (fio->bio) {
+ if (fio->bio && *(fio->bio)) {
struct bio *bio = *(fio->bio);
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
- fio->bio = NULL;
+ *(fio->bio) = NULL;
}
return err;
}
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index a930ddd15681..7054a542689f 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -598,13 +598,15 @@ void hfsplus_file_truncate(struct inode *inode)
res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt);
if (res)
break;
- hfs_brec_remove(&fd);
- mutex_unlock(&fd.tree->tree_lock);
start = hip->cached_start;
+ if (blk_cnt <= start)
+ hfs_brec_remove(&fd);
+ mutex_unlock(&fd.tree->tree_lock);
hfsplus_free_extents(sb, hip->cached_extents,
alloc_cnt - start, alloc_cnt - blk_cnt);
hfsplus_dump_extent(hip->cached_extents);
+ mutex_lock(&fd.tree->tree_lock);
if (blk_cnt > start) {
hip->extent_state |= HFSPLUS_EXT_DIRTY;
break;
@@ -612,7 +614,6 @@ void hfsplus_file_truncate(struct inode *inode)
alloc_cnt = start;
hip->cached_start = hip->cached_blocks = 0;
hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW);
- mutex_lock(&fd.tree->tree_lock);
}
hfs_find_exit(&fd);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a2a42335e8fd..55efd3dd04f6 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -131,6 +131,7 @@ static void huge_pagevec_release(struct pagevec *pvec)
static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file_inode(file);
+ struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
loff_t len, vma_len;
int ret;
struct hstate *h = hstate_file(file);
@@ -146,6 +147,10 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
vma->vm_ops = &hugetlb_vm_ops;
+ ret = seal_check_future_write(info->seals, vma);
+ if (ret)
+ return ret;
+
/*
* page based offset in vm_pgoff could be sufficiently large to
* overflow a loff_t when converted to byte offset. This can
@@ -524,7 +529,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
* the subpool and global reserve usage count can need
* to be adjusted.
*/
- VM_BUG_ON(PagePrivate(page));
+ VM_BUG_ON(HPageRestoreReserve(page));
remove_huge_page(page);
freed++;
if (!truncate_op) {
diff --git a/fs/io_uring.c b/fs/io_uring.c
index f46acbbeed57..5f82954004f6 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -100,6 +100,8 @@
#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
IORING_REGISTER_LAST + IORING_OP_LAST)
+#define IORING_MAX_REG_BUFFERS (1U << 14)
+
#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \
IOSQE_IO_HARDLINK | IOSQE_ASYNC | \
IOSQE_BUFFER_SELECT)
@@ -4035,7 +4037,7 @@ static int io_epoll_ctl_prep(struct io_kiocb *req,
#if defined(CONFIG_EPOLL)
if (sqe->ioprio || sqe->buf_index)
return -EINVAL;
- if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL)))
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
req->epoll.epfd = READ_ONCE(sqe->fd);
@@ -4150,7 +4152,7 @@ static int io_fadvise(struct io_kiocb *req, unsigned int issue_flags)
static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL)))
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (sqe->ioprio || sqe->buf_index)
return -EINVAL;
@@ -5017,10 +5019,10 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
* Can't handle multishot for double wait for now, turn it
* into one-shot mode.
*/
- if (!(req->poll.events & EPOLLONESHOT))
- req->poll.events |= EPOLLONESHOT;
+ if (!(poll_one->events & EPOLLONESHOT))
+ poll_one->events |= EPOLLONESHOT;
/* double add on the same waitqueue head, ignore */
- if (poll->head == head)
+ if (poll_one->head == head)
return;
poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
if (!poll) {
@@ -5827,8 +5829,6 @@ done:
static int io_rsrc_update_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
- if (unlikely(req->ctx->flags & IORING_SETUP_SQPOLL))
- return -EINVAL;
if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
return -EINVAL;
if (sqe->ioprio || sqe->rw_flags)
@@ -6354,19 +6354,20 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
* We don't expect the list to be empty, that will only happen if we
* race with the completion of the linked work.
*/
- if (prev && req_ref_inc_not_zero(prev))
+ if (prev) {
io_remove_next_linked(prev);
- else
- prev = NULL;
+ if (!req_ref_inc_not_zero(prev))
+ prev = NULL;
+ }
spin_unlock_irqrestore(&ctx->completion_lock, flags);
if (prev) {
io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME);
io_put_req_deferred(prev, 1);
+ io_put_req_deferred(req, 1);
} else {
io_req_complete_post(req, -ETIME, 0);
}
- io_put_req_deferred(req, 1);
return HRTIMER_NORESTART;
}
@@ -8390,7 +8391,7 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
if (ctx->user_bufs)
return -EBUSY;
- if (!nr_args || nr_args > UIO_MAXIOV)
+ if (!nr_args || nr_args > IORING_MAX_REG_BUFFERS)
return -EINVAL;
ret = io_rsrc_node_switch_start(ctx);
if (ret)
@@ -9034,15 +9035,15 @@ static void io_uring_del_task_file(unsigned long index)
static void io_uring_clean_tctx(struct io_uring_task *tctx)
{
+ struct io_wq *wq = tctx->io_wq;
struct io_tctx_node *node;
unsigned long index;
+ tctx->io_wq = NULL;
xa_for_each(&tctx->xa, index, node)
io_uring_del_task_file(index);
- if (tctx->io_wq) {
- io_wq_put_and_exit(tctx->io_wq);
- tctx->io_wq = NULL;
- }
+ if (wq)
+ io_wq_put_and_exit(wq);
}
static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index f2cd2034a87b..9023717c5188 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -394,7 +394,7 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
{
struct inode *inode = rac->mapping->host;
loff_t pos = readahead_pos(rac);
- loff_t length = readahead_length(rac);
+ size_t length = readahead_length(rac);
struct iomap_readpage_ctx ctx = {
.rac = rac,
};
@@ -402,7 +402,7 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
trace_iomap_readahead(inode, readahead_count(rac));
while (length > 0) {
- loff_t ret = iomap_apply(inode, pos, length, 0, ops,
+ ssize_t ret = iomap_apply(inode, pos, length, 0, ops,
&ctx, iomap_readahead_actor);
if (ret <= 0) {
WARN_ON_ONCE(ret == 0);
diff --git a/fs/namespace.c b/fs/namespace.c
index f63337828e1c..c3f1a78ba369 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3855,8 +3855,12 @@ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP))
return -EINVAL;
+ /* Don't yet support filesystem mountable in user namespaces. */
+ if (m->mnt_sb->s_user_ns != &init_user_ns)
+ return -EINVAL;
+
/* We're not controlling the superblock. */
- if (!ns_capable(m->mnt_sb->s_user_ns, CAP_SYS_ADMIN))
+ if (!capable(CAP_SYS_ADMIN))
return -EPERM;
/* Mount has already been visible in the filesystem hierarchy. */
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 4f1373463766..22d904bde6ab 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -288,14 +288,12 @@ static inline void remove_dquot_hash(struct dquot *dquot)
static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb,
struct kqid qid)
{
- struct hlist_node *node;
struct dquot *dquot;
- hlist_for_each (node, dquot_hash+hashent) {
- dquot = hlist_entry(node, struct dquot, dq_hash);
+ hlist_for_each_entry(dquot, dquot_hash+hashent, dq_hash)
if (dquot->dq_sb == sb && qid_eq(dquot->dq_id, qid))
return dquot;
- }
+
return NULL;
}
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 040a1142915f..167b5889db4b 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -114,29 +114,24 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
break;
case SIL_FAULT_BNDERR:
case SIL_FAULT_PKUERR:
+ case SIL_PERF_EVENT:
/*
- * Fall through to the SIL_FAULT case. Both SIL_FAULT_BNDERR
- * and SIL_FAULT_PKUERR are only generated by faults that
- * deliver them synchronously to userspace. In case someone
- * injects one of these signals and signalfd catches it treat
- * it as SIL_FAULT.
+ * Fall through to the SIL_FAULT case. SIL_FAULT_BNDERR,
+ * SIL_FAULT_PKUERR, and SIL_PERF_EVENT are only
+ * generated by faults that deliver them synchronously to
+ * userspace. In case someone injects one of these signals
+ * and signalfd catches it treat it as SIL_FAULT.
*/
case SIL_FAULT:
new.ssi_addr = (long) kinfo->si_addr;
-#ifdef __ARCH_SI_TRAPNO
- new.ssi_trapno = kinfo->si_trapno;
-#endif
break;
- case SIL_FAULT_MCEERR:
+ case SIL_FAULT_TRAPNO:
new.ssi_addr = (long) kinfo->si_addr;
-#ifdef __ARCH_SI_TRAPNO
new.ssi_trapno = kinfo->si_trapno;
-#endif
- new.ssi_addr_lsb = (short) kinfo->si_addr_lsb;
break;
- case SIL_PERF_EVENT:
+ case SIL_FAULT_MCEERR:
new.ssi_addr = (long) kinfo->si_addr;
- new.ssi_perf = kinfo->si_perf;
+ new.ssi_addr_lsb = (short) kinfo->si_addr_lsb;
break;
case SIL_CHLD:
new.ssi_pid = kinfo->si_pid;
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 7b1128398976..89d492916dea 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -211,11 +211,11 @@ failure:
* If the skip factor is limited in this way then the file will use multiple
* slots.
*/
-static inline int calculate_skip(int blocks)
+static inline int calculate_skip(u64 blocks)
{
- int skip = blocks / ((SQUASHFS_META_ENTRIES + 1)
+ u64 skip = blocks / ((SQUASHFS_META_ENTRIES + 1)
* SQUASHFS_META_INDEXES);
- return min(SQUASHFS_CACHED_BLKS - 1, skip + 1);
+ return min((u64) SQUASHFS_CACHED_BLKS - 1, skip + 1);
}
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index a83bdd0c47a8..bde2b4c64dbe 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -770,6 +770,8 @@ struct xfs_scrub_metadata {
/*
* ioctl commands that are used by Linux filesystems
*/
+#define XFS_IOC_GETXFLAGS FS_IOC_GETFLAGS
+#define XFS_IOC_SETXFLAGS FS_IOC_SETFLAGS
#define XFS_IOC_GETVERSION FS_IOC_GETVERSION
/*
@@ -780,6 +782,8 @@ struct xfs_scrub_metadata {
#define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64)
#define XFS_IOC_FREESP _IOW ('X', 11, struct xfs_flock64)
#define XFS_IOC_DIOINFO _IOR ('X', 30, struct dioattr)
+#define XFS_IOC_FSGETXATTR FS_IOC_FSGETXATTR
+#define XFS_IOC_FSSETXATTR FS_IOC_FSSETXATTR
#define XFS_IOC_ALLOCSP64 _IOW ('X', 36, struct xfs_flock64)
#define XFS_IOC_FREESP64 _IOW ('X', 37, struct xfs_flock64)
#define XFS_IOC_GETBMAP _IOWR('X', 38, struct getbmap)
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index aa874607618a..be38c960da85 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -74,7 +74,9 @@ __xchk_process_error(
return true;
case -EDEADLOCK:
/* Used to restart an op with deadlock avoidance. */
- trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
+ trace_xchk_deadlock_retry(
+ sc->ip ? sc->ip : XFS_I(file_inode(sc->file)),
+ sc->sm, *error);
break;
case -EFSBADCRC:
case -EFSCORRUPTED:
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index a5e9d7d34023..0936f3a96fe6 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -71,18 +71,24 @@ xfs_zero_extent(
#ifdef CONFIG_XFS_RT
int
xfs_bmap_rtalloc(
- struct xfs_bmalloca *ap) /* bmap alloc argument struct */
+ struct xfs_bmalloca *ap)
{
- int error; /* error return value */
- xfs_mount_t *mp; /* mount point structure */
- xfs_extlen_t prod = 0; /* product factor for allocators */
- xfs_extlen_t mod = 0; /* product factor for allocators */
- xfs_extlen_t ralen = 0; /* realtime allocation length */
- xfs_extlen_t align; /* minimum allocation alignment */
- xfs_rtblock_t rtb;
-
- mp = ap->ip->i_mount;
+ struct xfs_mount *mp = ap->ip->i_mount;
+ xfs_fileoff_t orig_offset = ap->offset;
+ xfs_rtblock_t rtb;
+ xfs_extlen_t prod = 0; /* product factor for allocators */
+ xfs_extlen_t mod = 0; /* product factor for allocators */
+ xfs_extlen_t ralen = 0; /* realtime allocation length */
+ xfs_extlen_t align; /* minimum allocation alignment */
+ xfs_extlen_t orig_length = ap->length;
+ xfs_extlen_t minlen = mp->m_sb.sb_rextsize;
+ xfs_extlen_t raminlen;
+ bool rtlocked = false;
+ bool ignore_locality = false;
+ int error;
+
align = xfs_get_extsz_hint(ap->ip);
+retry:
prod = align / mp->m_sb.sb_rextsize;
error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
align, 1, ap->eof, 0,
@@ -93,6 +99,15 @@ xfs_bmap_rtalloc(
ASSERT(ap->length % mp->m_sb.sb_rextsize == 0);
/*
+ * If we shifted the file offset downward to satisfy an extent size
+ * hint, increase minlen by that amount so that the allocator won't
+ * give us an allocation that's too short to cover at least one of the
+ * blocks that the caller asked for.
+ */
+ if (ap->offset != orig_offset)
+ minlen += orig_offset - ap->offset;
+
+ /*
* If the offset & length are not perfectly aligned
* then kill prod, it will just get us in trouble.
*/
@@ -116,10 +131,13 @@ xfs_bmap_rtalloc(
/*
* Lock out modifications to both the RT bitmap and summary inodes
*/
- xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
- xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
- xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
- xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL);
+ if (!rtlocked) {
+ xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
+ xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
+ xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
+ xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL);
+ rtlocked = true;
+ }
/*
* If it's an allocation to an empty file at offset 0,
@@ -141,33 +159,59 @@ xfs_bmap_rtalloc(
/*
* Realtime allocation, done through xfs_rtallocate_extent.
*/
- do_div(ap->blkno, mp->m_sb.sb_rextsize);
+ if (ignore_locality)
+ ap->blkno = 0;
+ else
+ do_div(ap->blkno, mp->m_sb.sb_rextsize);
rtb = ap->blkno;
ap->length = ralen;
- error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length,
- &ralen, ap->wasdel, prod, &rtb);
+ raminlen = max_t(xfs_extlen_t, 1, minlen / mp->m_sb.sb_rextsize);
+ error = xfs_rtallocate_extent(ap->tp, ap->blkno, raminlen, ap->length,
+ &ralen, ap->wasdel, prod, &rtb);
if (error)
return error;
- ap->blkno = rtb;
- if (ap->blkno != NULLFSBLOCK) {
- ap->blkno *= mp->m_sb.sb_rextsize;
- ralen *= mp->m_sb.sb_rextsize;
- ap->length = ralen;
- ap->ip->i_nblocks += ralen;
+ if (rtb != NULLRTBLOCK) {
+ ap->blkno = rtb * mp->m_sb.sb_rextsize;
+ ap->length = ralen * mp->m_sb.sb_rextsize;
+ ap->ip->i_nblocks += ap->length;
xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
if (ap->wasdel)
- ap->ip->i_delayed_blks -= ralen;
+ ap->ip->i_delayed_blks -= ap->length;
/*
* Adjust the disk quota also. This was reserved
* earlier.
*/
xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
- XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
- } else {
- ap->length = 0;
+ XFS_TRANS_DQ_RTBCOUNT, ap->length);
+ return 0;
}
+
+ if (align > mp->m_sb.sb_rextsize) {
+ /*
+ * We previously enlarged the request length to try to satisfy
+ * an extent size hint. The allocator didn't return anything,
+ * so reset the parameters to the original values and try again
+ * without alignment criteria.
+ */
+ ap->offset = orig_offset;
+ ap->length = orig_length;
+ minlen = align = mp->m_sb.sb_rextsize;
+ goto retry;
+ }
+
+ if (!ignore_locality && ap->blkno != 0) {
+ /*
+ * If we can't allocate near a specific rt extent, try again
+ * without locality criteria.
+ */
+ ignore_locality = true;
+ goto retry;
+ }
+
+ ap->blkno = NULLFSBLOCK;
+ ap->length = 0;
return 0;
}
#endif /* CONFIG_XFS_RT */