summaryrefslogtreecommitdiff
path: root/fs/ext4
diff options
context:
space:
mode:
authorJens Axboe <axboe@fb.com>2014-09-11 09:31:18 -0600
committerJens Axboe <axboe@fb.com>2014-09-11 09:31:18 -0600
commitb207892b061da7608878e273ae22ba9bf9be264b (patch)
tree51daa46b89b83cad422941f52110b19571b85b79 /fs/ext4
parent018a17bdc8658ad448497c84d4ba21b6985820ec (diff)
parenta516440542afcb9647f88d12c35640baf02d07ea (diff)
Merge branch 'for-linus' into for-3.18/core
A bit of churn on the for-linus side that would be nice to have in the core bits for 3.18, so pull it in to catch us up and make forward progress easier. Signed-off-by: Jens Axboe <axboe@fb.com> Conflicts: block/scsi_ioctl.c
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/ext4.h18
-rw-r--r--fs/ext4/extents.c88
-rw-r--r--fs/ext4/inode.c44
-rw-r--r--fs/ext4/mballoc.c5
-rw-r--r--fs/ext4/namei.c56
-rw-r--r--fs/ext4/super.c5
6 files changed, 136 insertions, 80 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 5b19760b1de5..b0c225cdb52c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1825,7 +1825,7 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
/*
* Special error return code only used by dx_probe() and its callers.
*/
-#define ERR_BAD_DX_DIR -75000
+#define ERR_BAD_DX_DIR (-(MAX_ERRNO - 1))
/*
* Timeout and state flag for lazy initialization inode thread.
@@ -2454,6 +2454,22 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
up_write(&EXT4_I(inode)->i_data_sem);
}
+/* Update i_size, i_disksize. Requires i_mutex to avoid races with truncate */
+static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
+{
+ int changed = 0;
+
+ if (newsize > inode->i_size) {
+ i_size_write(inode, newsize);
+ changed = 1;
+ }
+ if (newsize > EXT4_I(inode)->i_disksize) {
+ ext4_update_i_disksize(inode, newsize);
+ changed |= 2;
+ }
+ return changed;
+}
+
struct ext4_group_info {
unsigned long bb_state;
struct rb_root bb_free_root;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 76c2df382b7d..74292a71b384 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4665,7 +4665,8 @@ retry:
}
static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
- ext4_lblk_t len, int flags, int mode)
+ ext4_lblk_t len, loff_t new_size,
+ int flags, int mode)
{
struct inode *inode = file_inode(file);
handle_t *handle;
@@ -4674,8 +4675,10 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
int retries = 0;
struct ext4_map_blocks map;
unsigned int credits;
+ loff_t epos;
map.m_lblk = offset;
+ map.m_len = len;
/*
* Don't normalize the request if it can fit in one extent so
* that it doesn't get unnecessarily split into multiple
@@ -4690,9 +4693,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
credits = ext4_chunk_trans_blocks(inode, len);
retry:
- while (ret >= 0 && ret < len) {
- map.m_lblk = map.m_lblk + ret;
- map.m_len = len = len - ret;
+ while (ret >= 0 && len) {
handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
credits);
if (IS_ERR(handle)) {
@@ -4709,6 +4710,21 @@ retry:
ret2 = ext4_journal_stop(handle);
break;
}
+ map.m_lblk += ret;
+ map.m_len = len = len - ret;
+ epos = (loff_t)map.m_lblk << inode->i_blkbits;
+ inode->i_ctime = ext4_current_time(inode);
+ if (new_size) {
+ if (epos > new_size)
+ epos = new_size;
+ if (ext4_update_inode_size(inode, epos) & 0x1)
+ inode->i_mtime = inode->i_ctime;
+ } else {
+ if (epos > inode->i_size)
+ ext4_set_inode_flag(inode,
+ EXT4_INODE_EOFBLOCKS);
+ }
+ ext4_mark_inode_dirty(handle, inode);
ret2 = ext4_journal_stop(handle);
if (ret2)
break;
@@ -4731,7 +4747,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
loff_t new_size = 0;
int ret = 0;
int flags;
- int partial;
+ int credits;
+ int partial_begin, partial_end;
loff_t start, end;
ext4_lblk_t lblk;
struct address_space *mapping = inode->i_mapping;
@@ -4771,7 +4788,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
if (start < offset || end > offset + len)
return -EINVAL;
- partial = (offset + len) & ((1 << blkbits) - 1);
+ partial_begin = offset & ((1 << blkbits) - 1);
+ partial_end = (offset + len) & ((1 << blkbits) - 1);
lblk = start >> blkbits;
max_blocks = (end >> blkbits);
@@ -4805,7 +4823,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
* If we have a partial block after EOF we have to allocate
* the entire block.
*/
- if (partial)
+ if (partial_end)
max_blocks += 1;
}
@@ -4813,6 +4831,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
/* Now release the pages and zero block aligned part of pages*/
truncate_pagecache_range(inode, start, end - 1);
+ inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
/* Wait all existing dio workers, newcomers will block on i_mutex */
ext4_inode_block_unlocked_dio(inode);
@@ -4825,13 +4844,22 @@ static long ext4_zero_range(struct file *file, loff_t offset,
if (ret)
goto out_dio;
- ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags,
- mode);
+ ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
+ flags, mode);
if (ret)
goto out_dio;
}
+ if (!partial_begin && !partial_end)
+ goto out_dio;
- handle = ext4_journal_start(inode, EXT4_HT_MISC, 4);
+ /*
+ * In worst case we have to writeout two nonadjacent unwritten
+ * blocks and update the inode
+ */
+ credits = (2 * ext4_ext_index_trans_blocks(inode, 2)) + 1;
+ if (ext4_should_journal_data(inode))
+ credits += 2;
+ handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
ext4_std_error(inode->i_sb, ret);
@@ -4839,12 +4867,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
}
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
-
if (new_size) {
- if (new_size > i_size_read(inode))
- i_size_write(inode, new_size);
- if (new_size > EXT4_I(inode)->i_disksize)
- ext4_update_i_disksize(inode, new_size);
+ ext4_update_inode_size(inode, new_size);
} else {
/*
* Mark that we allocate beyond EOF so the subsequent truncate
@@ -4853,7 +4877,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
if ((offset + len) > i_size_read(inode))
ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
}
-
ext4_mark_inode_dirty(handle, inode);
/* Zero out partial block at the edges of the range */
@@ -4880,13 +4903,11 @@ out_mutex:
long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{
struct inode *inode = file_inode(file);
- handle_t *handle;
loff_t new_size = 0;
unsigned int max_blocks;
int ret = 0;
int flags;
ext4_lblk_t lblk;
- struct timespec tv;
unsigned int blkbits = inode->i_blkbits;
/* Return error if mode is not supported */
@@ -4937,36 +4958,15 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
goto out;
}
- ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode);
+ ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
+ flags, mode);
if (ret)
goto out;
- handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
- if (IS_ERR(handle))
- goto out;
-
- tv = inode->i_ctime = ext4_current_time(inode);
-
- if (new_size) {
- if (new_size > i_size_read(inode)) {
- i_size_write(inode, new_size);
- inode->i_mtime = tv;
- }
- if (new_size > EXT4_I(inode)->i_disksize)
- ext4_update_i_disksize(inode, new_size);
- } else {
- /*
- * Mark that we allocate beyond EOF so the subsequent truncate
- * can proceed even if the new size is the same as i_size.
- */
- if ((offset + len) > i_size_read(inode))
- ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
+ if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
+ ret = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
+ EXT4_I(inode)->i_sync_tid);
}
- ext4_mark_inode_dirty(handle, inode);
- if (file->f_flags & O_SYNC)
- ext4_handle_sync(handle);
-
- ext4_journal_stop(handle);
out:
mutex_unlock(&inode->i_mutex);
trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 367a60c07cf0..3aa26e9117c4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1055,27 +1055,11 @@ static int ext4_write_end(struct file *file,
} else
copied = block_write_end(file, mapping, pos,
len, copied, page, fsdata);
-
/*
- * No need to use i_size_read() here, the i_size
- * cannot change under us because we hole i_mutex.
- *
- * But it's important to update i_size while still holding page lock:
+ * it's important to update i_size while still holding page lock:
* page writeout could otherwise come in and zero beyond i_size.
*/
- if (pos + copied > inode->i_size) {
- i_size_write(inode, pos + copied);
- i_size_changed = 1;
- }
-
- if (pos + copied > EXT4_I(inode)->i_disksize) {
- /* We need to mark inode dirty even if
- * new_i_size is less that inode->i_size
- * but greater than i_disksize. (hint delalloc)
- */
- ext4_update_i_disksize(inode, (pos + copied));
- i_size_changed = 1;
- }
+ i_size_changed = ext4_update_inode_size(inode, pos + copied);
unlock_page(page);
page_cache_release(page);
@@ -1123,7 +1107,7 @@ static int ext4_journalled_write_end(struct file *file,
int ret = 0, ret2;
int partial = 0;
unsigned from, to;
- loff_t new_i_size;
+ int size_changed = 0;
trace_ext4_journalled_write_end(inode, pos, len, copied);
from = pos & (PAGE_CACHE_SIZE - 1);
@@ -1146,20 +1130,18 @@ static int ext4_journalled_write_end(struct file *file,
if (!partial)
SetPageUptodate(page);
}
- new_i_size = pos + copied;
- if (new_i_size > inode->i_size)
- i_size_write(inode, pos+copied);
+ size_changed = ext4_update_inode_size(inode, pos + copied);
ext4_set_inode_state(inode, EXT4_STATE_JDATA);
EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
- if (new_i_size > EXT4_I(inode)->i_disksize) {
- ext4_update_i_disksize(inode, new_i_size);
+ unlock_page(page);
+ page_cache_release(page);
+
+ if (size_changed) {
ret2 = ext4_mark_inode_dirty(handle, inode);
if (!ret)
ret = ret2;
}
- unlock_page(page);
- page_cache_release(page);
if (pos + len > inode->i_size && ext4_can_truncate(inode))
/* if we have allocated more blocks and copied
* less. We will have blocks allocated outside
@@ -2095,6 +2077,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
struct ext4_map_blocks *map = &mpd->map;
int err;
loff_t disksize;
+ int progress = 0;
mpd->io_submit.io_end->offset =
((loff_t)map->m_lblk) << inode->i_blkbits;
@@ -2111,8 +2094,11 @@ static int mpage_map_and_submit_extent(handle_t *handle,
* is non-zero, a commit should free up blocks.
*/
if ((err == -ENOMEM) ||
- (err == -ENOSPC && ext4_count_free_clusters(sb)))
+ (err == -ENOSPC && ext4_count_free_clusters(sb))) {
+ if (progress)
+ goto update_disksize;
return err;
+ }
ext4_msg(sb, KERN_CRIT,
"Delayed block allocation failed for "
"inode %lu at logical offset %llu with"
@@ -2129,15 +2115,17 @@ static int mpage_map_and_submit_extent(handle_t *handle,
*give_up_on_write = true;
return err;
}
+ progress = 1;
/*
* Update buffer state, submit mapped pages, and get us new
* extent to map
*/
err = mpage_map_and_submit_buffers(mpd);
if (err < 0)
- return err;
+ goto update_disksize;
} while (map->m_len);
+update_disksize:
/*
* Update on-disk size after IO is submitted. Races with
* truncate are avoided by checking i_size under i_data_sem.
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 956027711faf..8b0f9ef517d6 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1412,6 +1412,8 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
int last = first + count - 1;
struct super_block *sb = e4b->bd_sb;
+ if (WARN_ON(count == 0))
+ return;
BUG_ON(last >= (sb->s_blocksize << 3));
assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
/* Don't bother if the block group is corrupt. */
@@ -3221,6 +3223,8 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
int err;
if (pa == NULL) {
+ if (ac->ac_f_ex.fe_len == 0)
+ return;
err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
if (err) {
/*
@@ -3235,6 +3239,7 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
ac->ac_f_ex.fe_len);
ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
+ ext4_mb_unload_buddy(&e4b);
return;
}
if (pa->pa_type == MB_INODE_PA)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index b147a67baa0d..90a3cdca3f88 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1227,7 +1227,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
buffer */
int num = 0;
ext4_lblk_t nblocks;
- int i, err;
+ int i, err = 0;
int namelen;
*res_dir = NULL;
@@ -1264,7 +1264,11 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
* return. Otherwise, fall back to doing a search the
* old fashioned way.
*/
- if (bh || (err != ERR_BAD_DX_DIR))
+ if (err == -ENOENT)
+ return NULL;
+ if (err && err != ERR_BAD_DX_DIR)
+ return ERR_PTR(err);
+ if (bh)
return bh;
dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
"falling back\n"));
@@ -1295,6 +1299,11 @@ restart:
}
num++;
bh = ext4_getblk(NULL, dir, b++, 0, &err);
+ if (unlikely(err)) {
+ if (ra_max == 0)
+ return ERR_PTR(err);
+ break;
+ }
bh_use[ra_max] = bh;
if (bh)
ll_rw_block(READ | REQ_META | REQ_PRIO,
@@ -1417,6 +1426,8 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
return ERR_PTR(-ENAMETOOLONG);
bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
+ if (IS_ERR(bh))
+ return (struct dentry *) bh;
inode = NULL;
if (bh) {
__u32 ino = le32_to_cpu(de->inode);
@@ -1450,6 +1461,8 @@ struct dentry *ext4_get_parent(struct dentry *child)
struct buffer_head *bh;
bh = ext4_find_entry(child->d_inode, &dotdot, &de, NULL);
+ if (IS_ERR(bh))
+ return (struct dentry *) bh;
if (!bh)
return ERR_PTR(-ENOENT);
ino = le32_to_cpu(de->inode);
@@ -2727,6 +2740,8 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
retval = -ENOENT;
bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
if (!bh)
goto end_rmdir;
@@ -2794,6 +2809,8 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
retval = -ENOENT;
bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
if (!bh)
goto end_unlink;
@@ -3121,6 +3138,8 @@ static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
struct ext4_dir_entry_2 *de;
bh = ext4_find_entry(dir, d_name, &de, NULL);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
if (bh) {
retval = ext4_delete_entry(handle, dir, de, bh);
brelse(bh);
@@ -3128,7 +3147,8 @@ static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
return retval;
}
-static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent)
+static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent,
+ int force_reread)
{
int retval;
/*
@@ -3140,7 +3160,8 @@ static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent)
if (le32_to_cpu(ent->de->inode) != ent->inode->i_ino ||
ent->de->name_len != ent->dentry->d_name.len ||
strncmp(ent->de->name, ent->dentry->d_name.name,
- ent->de->name_len)) {
+ ent->de->name_len) ||
+ force_reread) {
retval = ext4_find_delete_entry(handle, ent->dir,
&ent->dentry->d_name);
} else {
@@ -3191,6 +3212,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
.dentry = new_dentry,
.inode = new_dentry->d_inode,
};
+ int force_reread;
int retval;
dquot_initialize(old.dir);
@@ -3202,6 +3224,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
dquot_initialize(new.inode);
old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
+ if (IS_ERR(old.bh))
+ return PTR_ERR(old.bh);
/*
* Check for inode number is _not_ due to possible IO errors.
* We might rmdir the source, keep it as pwd of some process
@@ -3214,6 +3238,10 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
&new.de, &new.inlined);
+ if (IS_ERR(new.bh)) {
+ retval = PTR_ERR(new.bh);
+ goto end_rename;
+ }
if (new.bh) {
if (!new.inode) {
brelse(new.bh);
@@ -3246,6 +3274,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
if (retval)
goto end_rename;
}
+ /*
+ * If we're renaming a file within an inline_data dir and adding or
+ * setting the new dirent causes a conversion from inline_data to
+ * extents/blockmap, we need to force the dirent delete code to
+ * re-read the directory, or else we end up trying to delete a dirent
+ * from what is now the extent tree root (or a block map).
+ */
+ force_reread = (new.dir->i_ino == old.dir->i_ino &&
+ ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA));
if (!new.bh) {
retval = ext4_add_entry(handle, new.dentry, old.inode);
if (retval)
@@ -3256,6 +3293,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
if (retval)
goto end_rename;
}
+ if (force_reread)
+ force_reread = !ext4_test_inode_flag(new.dir,
+ EXT4_INODE_INLINE_DATA);
/*
* Like most other Unix systems, set the ctime for inodes on a
@@ -3267,7 +3307,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
/*
* ok, that's it
*/
- ext4_rename_delete(handle, &old);
+ ext4_rename_delete(handle, &old, force_reread);
if (new.inode) {
ext4_dec_count(handle, new.inode);
@@ -3330,6 +3370,8 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
old.bh = ext4_find_entry(old.dir, &old.dentry->d_name,
&old.de, &old.inlined);
+ if (IS_ERR(old.bh))
+ return PTR_ERR(old.bh);
/*
* Check for inode number is _not_ due to possible IO errors.
* We might rmdir the source, keep it as pwd of some process
@@ -3342,6 +3384,10 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
&new.de, &new.inlined);
+ if (IS_ERR(new.bh)) {
+ retval = PTR_ERR(new.bh);
+ goto end_rename;
+ }
/* RENAME_EXCHANGE case: old *and* new must both exist */
if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 32b43ad154b9..0b28b36e7915 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3181,9 +3181,9 @@ static int set_journal_csum_feature_set(struct super_block *sb)
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
- /* journal checksum v2 */
+ /* journal checksum v3 */
compat = 0;
- incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2;
+ incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
} else {
/* journal checksum v1 */
compat = JBD2_FEATURE_COMPAT_CHECKSUM;
@@ -3205,6 +3205,7 @@ static int set_journal_csum_feature_set(struct super_block *sb)
jbd2_journal_clear_features(sbi->s_journal,
JBD2_FEATURE_COMPAT_CHECKSUM, 0,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
+ JBD2_FEATURE_INCOMPAT_CSUM_V3 |
JBD2_FEATURE_INCOMPAT_CSUM_V2);
}