diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-20 18:24:39 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-20 18:24:39 -0800 |
commit | cab7076a185e1e27f6879325e4da762424c3f1c9 (patch) | |
tree | eb9274ba3b306bce14c581368579677775814088 /fs/ext4 | |
parent | 6c24337f22115d669e24ce990842dab667371b4d (diff) | |
parent | e9be2ac7c09cabcbbbb12b0869e49b7a715d6fb5 (diff) |
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o:
"For this cycle we add support for the shutdown ioctl, which is
primarily used for testing, but which can be useful on production
systems when a scratch volume is being destroyed and the data on it
doesn't need to be saved.
This found (and we fixed) a number of bugs with ext4's recovery to
corrupted file system --- the bugs increased the amount of data that
could be potentially lost, and in the case of the inline data feature,
could cause the kernel to BUG.
Also included are a number of other bug fixes, including in ext4's
fscrypt, DAX, inline data support"
* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (26 commits)
ext4: rename EXT4_IOC_GOINGDOWN to EXT4_IOC_SHUTDOWN
ext4: fix fencepost in s_first_meta_bg validation
ext4: don't BUG when truncating encrypted inodes on the orphan list
ext4: do not use stripe_width if it is not set
ext4: fix stripe-unaligned allocations
dax: assert that i_rwsem is held exclusive for writes
ext4: fix DAX write locking
ext4: add EXT4_IOC_GOINGDOWN ioctl
ext4: add shutdown bit and check for it
ext4: rename s_resize_flags to s_ext4_flags
ext4: return EROFS if device is r/o and journal replay is needed
ext4: preserve the needs_recovery flag when the journal is aborted
jbd2: don't leak modified metadata buffers on an aborted journal
ext4: fix inline data error paths
ext4: move halfmd4 into hash.c directly
ext4: fix use-after-iput when fscrypt contexts are inconsistent
jbd2: fix use after free in kjournald2()
ext4: fix data corruption in data=journal mode
ext4: trim allocation requests to group size
ext4: replace BUG_ON with WARN_ON in mb_find_extent()
...
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/ext4.h | 28 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.c | 11 | ||||
-rw-r--r-- | fs/ext4/extents.c | 27 | ||||
-rw-r--r-- | fs/ext4/file.c | 22 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 3 | ||||
-rw-r--r-- | fs/ext4/hash.c | 71 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 3 | ||||
-rw-r--r-- | fs/ext4/inline.c | 123 | ||||
-rw-r--r-- | fs/ext4/inode.c | 79 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 50 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 25 | ||||
-rw-r--r-- | fs/ext4/namei.c | 18 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 2 | ||||
-rw-r--r-- | fs/ext4/resize.c | 5 | ||||
-rw-r--r-- | fs/ext4/super.c | 47 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 33 | ||||
-rw-r--r-- | fs/ext4/xattr.h | 32 |
17 files changed, 446 insertions, 133 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index a5c756b80b7d..01d52b98f9a7 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -683,6 +683,16 @@ struct fsxattr { #define EXT4_IOC_FSGETXATTR FS_IOC_FSGETXATTR #define EXT4_IOC_FSSETXATTR FS_IOC_FSSETXATTR +#define EXT4_IOC_SHUTDOWN _IOR ('X', 125, __u32) + +/* + * Flags for going down operation + */ +#define EXT4_GOING_FLAGS_DEFAULT 0x0 /* going down */ +#define EXT4_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */ +#define EXT4_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ + + #if defined(__KERNEL__) && defined(CONFIG_COMPAT) /* * ioctl commands in 32 bit emulation @@ -1403,8 +1413,7 @@ struct ext4_sb_info { struct journal_s *s_journal; struct list_head s_orphan; struct mutex s_orphan_lock; - unsigned long s_resize_flags; /* Flags indicating if there - is a resizer */ + unsigned long s_ext4_flags; /* Ext4 superblock flags */ unsigned long s_commit_interval; u32 s_max_batch_time; u32 s_min_batch_time; @@ -1838,6 +1847,18 @@ static inline bool ext4_has_incompat_features(struct super_block *sb) } /* + * Superblock flags + */ +#define EXT4_FLAGS_RESIZING 0 +#define EXT4_FLAGS_SHUTDOWN 1 + +static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi) +{ + return test_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); +} + + +/* * Default values for user and/or group using reserved blocks */ #define EXT4_DEF_RESUID 0 @@ -3005,7 +3026,7 @@ extern int ext4_inline_data_fiemap(struct inode *inode, extern int ext4_try_to_evict_inline_data(handle_t *handle, struct inode *inode, int needed); -extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline); +extern int ext4_inline_data_truncate(struct inode *inode, int *has_inline); extern int ext4_convert_inline_data(struct inode *inode); @@ -3199,7 +3220,6 @@ static inline void ext4_inode_resume_unlocked_dio(struct inode *inode) EXT4_WQ_HASH_SZ]) extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; -#define EXT4_RESIZING 0 extern int ext4_resize_begin(struct super_block *sb); extern void ext4_resize_end(struct super_block *sb); diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index e770c1ee4613..dd106b1d5d89 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -43,6 +43,10 @@ static int ext4_journal_check_start(struct super_block *sb) journal_t *journal; might_sleep(); + + if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) + return -EIO; + if (sb->s_flags & MS_RDONLY) return -EROFS; WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE); @@ -161,6 +165,13 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line, might_sleep(); if (ext4_handle_valid(handle)) { + struct super_block *sb; + + sb = handle->h_transaction->t_journal->j_private; + if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) { + jbd2_journal_abort_handle(handle); + return -EIO; + } err = jbd2_journal_get_write_access(handle, bh); if (err) ext4_journal_abort_handle(where, line, __func__, bh, diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 3e295d3350a9..2a97dff87b96 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5334,7 +5334,8 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, ext4_lblk_t stop, *iterator, ex_start, ex_end; /* Let path point to the last extent */ - path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); + path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, + EXT4_EX_NOCACHE); if (IS_ERR(path)) return PTR_ERR(path); @@ -5343,15 +5344,15 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, if (!extent) goto out; - stop = le32_to_cpu(extent->ee_block) + - ext4_ext_get_actual_len(extent); + stop = le32_to_cpu(extent->ee_block); /* * In case of left shift, Don't start shifting extents until we make * sure the hole is big enough to accommodate the shift. */ if (SHIFT == SHIFT_LEFT) { - path = ext4_find_extent(inode, start - 1, &path, 0); + path = ext4_find_extent(inode, start - 1, &path, + EXT4_EX_NOCACHE); if (IS_ERR(path)) return PTR_ERR(path); depth = path->p_depth; @@ -5383,9 +5384,14 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, else iterator = &stop; - /* Its safe to start updating extents */ - while (start < stop) { - path = ext4_find_extent(inode, *iterator, &path, 0); + /* + * Its safe to start updating extents. Start and stop are unsigned, so + * in case of right shift if extent with 0 block is reached, iterator + * becomes NULL to indicate the end of the loop. + */ + while (iterator && start <= stop) { + path = ext4_find_extent(inode, *iterator, &path, + EXT4_EX_NOCACHE); if (IS_ERR(path)) return PTR_ERR(path); depth = path->p_depth; @@ -5412,8 +5418,11 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, ext4_ext_get_actual_len(extent); } else { extent = EXT_FIRST_EXTENT(path[depth].p_hdr); - *iterator = le32_to_cpu(extent->ee_block) > 0 ? - le32_to_cpu(extent->ee_block) - 1 : 0; + if (le32_to_cpu(extent->ee_block) > 0) + *iterator = le32_to_cpu(extent->ee_block) - 1; + else + /* Beginning is reached, end of the loop */ + iterator = NULL; /* Update path extent in case we need to stop */ while (le32_to_cpu(extent->ee_block) < start) extent++; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index d663d3d7c81c..87e11dfe3cde 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -57,6 +57,9 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { + if (unlikely(ext4_forced_shutdown(EXT4_SB(file_inode(iocb->ki_filp)->i_sb)))) + return -EIO; + if (!iov_iter_count(to)) return 0; /* skip atime */ @@ -175,7 +178,6 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; - bool overwrite = false; inode_lock(inode); ret = ext4_write_checks(iocb, from); @@ -188,16 +190,9 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) if (ret) goto out; - if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) { - overwrite = true; - downgrade_write(&inode->i_rwsem); - } ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops); out: - if (!overwrite) - inode_unlock(inode); - else - inode_unlock_shared(inode); + inode_unlock(inode); if (ret > 0) ret = generic_write_sync(iocb, ret); return ret; @@ -213,6 +208,9 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) int overwrite = 0; ssize_t ret; + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; + #ifdef CONFIG_FS_DAX if (IS_DAX(inode)) return ext4_dax_write_iter(iocb, from); @@ -348,6 +346,9 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file->f_mapping->host; + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; + if (ext4_encrypted_inode(inode)) { int err = fscrypt_get_encryption_info(inode); if (err) @@ -375,6 +376,9 @@ static int ext4_file_open(struct inode * inode, struct file * filp) char buf[64], *cp; int ret; + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; + if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) && !(sb->s_flags & MS_RDONLY))) { sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED; diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 88effb1053c7..9d549608fd30 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -100,6 +100,9 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) tid_t commit_tid; bool needs_barrier = false; + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; + J_ASSERT(ext4_journal_current_handle() == NULL); trace_ext4_sync_file_enter(file, datasync); diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index e026aa941fd5..38b8a96eb97c 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c @@ -10,7 +10,8 @@ */ #include <linux/fs.h> -#include <linux/cryptohash.h> +#include <linux/compiler.h> +#include <linux/bitops.h> #include "ext4.h" #define DELTA 0x9E3779B9 @@ -32,6 +33,74 @@ static void TEA_transform(__u32 buf[4], __u32 const in[]) buf[1] += b1; } +/* F, G and H are basic MD4 functions: selection, majority, parity */ +#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) +#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z))) +#define H(x, y, z) ((x) ^ (y) ^ (z)) + +/* + * The generic round function. The application is so specific that + * we don't bother protecting all the arguments with parens, as is generally + * good macro practice, in favor of extra legibility. + * Rotation is separate from addition to prevent recomputation + */ +#define ROUND(f, a, b, c, d, x, s) \ + (a += f(b, c, d) + x, a = rol32(a, s)) +#define K1 0 +#define K2 013240474631UL +#define K3 015666365641UL + +/* + * Basic cut-down MD4 transform. Returns only 32 bits of result. + */ +static __u32 half_md4_transform(__u32 buf[4], __u32 const in[8]) +{ + __u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3]; + + /* Round 1 */ + ROUND(F, a, b, c, d, in[0] + K1, 3); + ROUND(F, d, a, b, c, in[1] + K1, 7); + ROUND(F, c, d, a, b, in[2] + K1, 11); + ROUND(F, b, c, d, a, in[3] + K1, 19); + ROUND(F, a, b, c, d, in[4] + K1, 3); + ROUND(F, d, a, b, c, in[5] + K1, 7); + ROUND(F, c, d, a, b, in[6] + K1, 11); + ROUND(F, b, c, d, a, in[7] + K1, 19); + + /* Round 2 */ + ROUND(G, a, b, c, d, in[1] + K2, 3); + ROUND(G, d, a, b, c, in[3] + K2, 5); + ROUND(G, c, d, a, b, in[5] + K2, 9); + ROUND(G, b, c, d, a, in[7] + K2, 13); + ROUND(G, a, b, c, d, in[0] + K2, 3); + ROUND(G, d, a, b, c, in[2] + K2, 5); + ROUND(G, c, d, a, b, in[4] + K2, 9); + ROUND(G, b, c, d, a, in[6] + K2, 13); + + /* Round 3 */ + ROUND(H, a, b, c, d, in[3] + K3, 3); + ROUND(H, d, a, b, c, in[7] + K3, 9); + ROUND(H, c, d, a, b, in[2] + K3, 11); + ROUND(H, b, c, d, a, in[6] + K3, 15); + ROUND(H, a, b, c, d, in[1] + K3, 3); + ROUND(H, d, a, b, c, in[5] + K3, 9); + ROUND(H, c, d, a, b, in[0] + K3, 11); + ROUND(H, b, c, d, a, in[4] + K3, 15); + + buf[0] += a; + buf[1] += b; + buf[2] += c; + buf[3] += d; + + return buf[1]; /* "most hashed" word */ +} +#undef ROUND +#undef K1 +#undef K2 +#undef K3 +#undef F +#undef G +#undef H /* The old legacy hash */ static __u32 dx_hack_hash_unsigned(const char *name, int len) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index f372fc431b8e..b14bae2598bc 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -764,6 +764,9 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, if (!dir || !dir->i_nlink) return ERR_PTR(-EPERM); + if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb)))) + return ERR_PTR(-EIO); + if ((ext4_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) && (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) { diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 437df6a1a841..30a9f210d1e3 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -215,6 +215,9 @@ static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc, struct ext4_inode *raw_inode; int cp_len = 0; + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return; + BUG_ON(!EXT4_I(inode)->i_inline_off); BUG_ON(pos + len > EXT4_I(inode)->i_inline_size); @@ -381,7 +384,7 @@ out: static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, unsigned int len) { - int ret, size; + int ret, size, no_expand; struct ext4_inode_info *ei = EXT4_I(inode); if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) @@ -391,15 +394,14 @@ static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, if (size < len) return -ENOSPC; - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); if (ei->i_inline_off) ret = ext4_update_inline_data(handle, inode, len); else ret = ext4_create_inline_data(handle, inode, len); - up_write(&EXT4_I(inode)->xattr_sem); - + ext4_write_unlock_xattr(inode, &no_expand); return ret; } @@ -533,7 +535,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping, struct inode *inode, unsigned flags) { - int ret, needed_blocks; + int ret, needed_blocks, no_expand; handle_t *handle = NULL; int retries = 0, sem_held = 0; struct page *page = NULL; @@ -573,7 +575,7 @@ retry: goto out; } - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); sem_held = 1; /* If some one has already done this for us, just exit. */ if (!ext4_has_inline_data(inode)) { @@ -610,7 +612,7 @@ retry: put_page(page); page = NULL; ext4_orphan_add(handle, inode); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); sem_held = 0; ext4_journal_stop(handle); handle = NULL; @@ -636,7 +638,7 @@ out: put_page(page); } if (sem_held) - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); if (handle) ext4_journal_stop(handle); brelse(iloc.bh); @@ -729,7 +731,7 @@ convert: int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied, struct page *page) { - int ret; + int ret, no_expand; void *kaddr; struct ext4_iloc iloc; @@ -747,7 +749,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, goto out; } - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); BUG_ON(!ext4_has_inline_data(inode)); kaddr = kmap_atomic(page); @@ -757,7 +759,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, /* clear page dirty so that writepages wouldn't work for us. */ ClearPageDirty(page); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); brelse(iloc.bh); out: return copied; @@ -768,7 +770,7 @@ ext4_journalled_write_inline_data(struct inode *inode, unsigned len, struct page *page) { - int ret; + int ret, no_expand; void *kaddr; struct ext4_iloc iloc; @@ -778,11 +780,11 @@ ext4_journalled_write_inline_data(struct inode *inode, return NULL; } - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); kaddr = kmap_atomic(page); ext4_write_inline_data(inode, &iloc, kaddr, 0, len); kunmap_atomic(kaddr); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); return iloc.bh; } @@ -944,8 +946,15 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, struct page *page) { int i_size_changed = 0; + int ret; - copied = ext4_write_inline_data_end(inode, pos, len, copied, page); + ret = ext4_write_inline_data_end(inode, pos, len, copied, page); + if (ret < 0) { + unlock_page(page); + put_page(page); + return ret; + } + copied = ret; /* * No need to use i_size_read() here, the i_size @@ -1043,7 +1052,6 @@ static int ext4_add_dirent_to_inline(handle_t *handle, dir->i_mtime = dir->i_ctime = current_time(dir); ext4_update_dx_flag(dir); dir->i_version++; - ext4_mark_inode_dirty(handle, dir); return 1; } @@ -1259,7 +1267,7 @@ out: int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, struct inode *dir, struct inode *inode) { - int ret, inline_size; + int ret, inline_size, no_expand; void *inline_start; struct ext4_iloc iloc; @@ -1267,7 +1275,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, if (ret) return ret; - down_write(&EXT4_I(dir)->xattr_sem); + ext4_write_lock_xattr(dir, &no_expand); if (!ext4_has_inline_data(dir)) goto out; @@ -1312,8 +1320,8 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, ret = ext4_convert_inline_data_nolock(handle, dir, &iloc); out: + ext4_write_unlock_xattr(dir, &no_expand); ext4_mark_inode_dirty(handle, dir); - up_write(&EXT4_I(dir)->xattr_sem); brelse(iloc.bh); return ret; } @@ -1673,7 +1681,7 @@ int ext4_delete_inline_entry(handle_t *handle, struct buffer_head *bh, int *has_inline_data) { - int err, inline_size; + int err, inline_size, no_expand; struct ext4_iloc iloc; void *inline_start; @@ -1681,7 +1689,7 @@ int ext4_delete_inline_entry(handle_t *handle, if (err) return err; - down_write(&EXT4_I(dir)->xattr_sem); + ext4_write_lock_xattr(dir, &no_expand); if (!ext4_has_inline_data(dir)) { *has_inline_data = 0; goto out; @@ -1709,13 +1717,11 @@ int ext4_delete_inline_entry(handle_t *handle, if (err) goto out; - err = ext4_mark_inode_dirty(handle, dir); - if (unlikely(err)) - goto out; - ext4_show_inline_dir(dir, iloc.bh, inline_start, inline_size); out: - up_write(&EXT4_I(dir)->xattr_sem); + ext4_write_unlock_xattr(dir, &no_expand); + if (likely(err == 0)) + err = ext4_mark_inode_dirty(handle, dir); brelse(iloc.bh); if (err != -ENOENT) ext4_std_error(dir->i_sb, err); @@ -1814,11 +1820,11 @@ out: int ext4_destroy_inline_data(handle_t *handle, struct inode *inode) { - int ret; + int ret, no_expand; - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); ret = ext4_destroy_inline_data_nolock(handle, inode); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); return ret; } @@ -1900,10 +1906,10 @@ out: return error; } -void ext4_inline_data_truncate(struct inode *inode, int *has_inline) +int ext4_inline_data_truncate(struct inode *inode, int *has_inline) { handle_t *handle; - int inline_size, value_len, needed_blocks; + int inline_size, value_len, needed_blocks, no_expand, err = 0; size_t i_size; void *value = NULL; struct ext4_xattr_ibody_find is = { @@ -1918,19 +1924,19 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline) needed_blocks = ext4_writepage_trans_blocks(inode); handle = ext4_journal_start(inode, EXT4_HT_INODE, needed_blocks); if (IS_ERR(handle)) - return; + return PTR_ERR(handle); - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); if (!ext4_has_inline_data(inode)) { *has_inline = 0; ext4_journal_stop(handle); - return; + return 0; } - if (ext4_orphan_add(handle, inode)) + if ((err = ext4_orphan_add(handle, inode)) != 0) goto out; - if (ext4_get_inode_loc(inode, &is.iloc)) + if ((err = ext4_get_inode_loc(inode, &is.iloc)) != 0) goto out; down_write(&EXT4_I(inode)->i_data_sem); @@ -1941,24 +1947,29 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline) if (i_size < inline_size) { /* Clear the content in the xattr space. */ if (inline_size > EXT4_MIN_INLINE_DATA_SIZE) { - if (ext4_xattr_ibody_find(inode, &i, &is)) + if ((err = ext4_xattr_ibody_find(inode, &i, &is)) != 0) goto out_error; BUG_ON(is.s.not_found); value_len = le32_to_cpu(is.s.here->e_value_size); value = kmalloc(value_len, GFP_NOFS); - if (!value) + if (!value) { + err = -ENOMEM; goto out_error; + } - if (ext4_xattr_ibody_get(inode, i.name_index, i.name, - value, value_len)) + err = ext4_xattr_ibody_get(inode, i.name_index, + i.name, value, value_len); + if (err <= 0) goto out_error; i.value = value; i.value_len = i_size > EXT4_MIN_INLINE_DATA_SIZE ? i_size - EXT4_MIN_INLINE_DATA_SIZE : 0; - if (ext4_xattr_ibody_inline_set(handle, inode, &i, &is)) + err = ext4_xattr_ibody_inline_set(handle, inode, + &i, &is); + if (err) goto out_error; } @@ -1978,23 +1989,24 @@ out_error: up_write(&EXT4_I(inode)->i_data_sem); out: brelse(is.iloc.bh); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); kfree(value); if (inode->i_nlink) ext4_orphan_del(handle, inode); - inode->i_mtime = inode->i_ctime = current_time(inode); - ext4_mark_inode_dirty(handle, inode); - if (IS_SYNC(inode)) - ext4_handle_sync(handle); - + if (err == 0) { + inode->i_mtime = inode->i_ctime = current_time(inode); + err = ext4_mark_inode_dirty(handle, inode); + if (IS_SYNC(inode)) + ext4_handle_sync(handle); + } ext4_journal_stop(handle); - return; + return err; } int ext4_convert_inline_data(struct inode *inode) { - int error, needed_blocks; + int error, needed_blocks, no_expand; handle_t *handle; struct ext4_iloc iloc; @@ -2016,15 +2028,10 @@ int ext4_convert_inline_data(struct inode *inode) goto out_free; } - down_write(&EXT4_I(inode)->xattr_sem); - if (!ext4_has_inline_data(inode)) { - up_write(&EXT4_I(inode)->xattr_sem); - goto out; - } - - error = ext4_convert_inline_data_nolock(handle, inode, &iloc); - up_write(&EXT4_I(inode)->xattr_sem); -out: + ext4_write_lock_xattr(inode, &no_expand); + if (ext4_has_inline_data(inode)) + error = ext4_convert_inline_data_nolock(handle, inode, &iloc); + ext4_write_unlock_xattr(inode, &no_expand); ext4_journal_stop(handle); out_free: brelse(iloc.bh); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 88d57af1b516..f622d4a577e3 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1189,6 +1189,9 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, pgoff_t index; unsigned from, to; + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; + trace_ext4_write_begin(inode, pos, len, flags); /* * Reserve one block more for addition to orphan list in case @@ -1330,8 +1333,11 @@ static int ext4_write_end(struct file *file, if (ext4_has_inline_data(inode)) { ret = ext4_write_inline_data_end(inode, pos, len, copied, page); - if (ret < 0) + if (ret < 0) { + unlock_page(page); + put_page(page); goto errout; + } copied = ret; } else copied = block_write_end(file, mapping, pos, @@ -1385,7 +1391,9 @@ errout: * set the buffer to be dirty, since in data=journalled mode we need * to call ext4_handle_dirty_metadata() instead. */ -static void zero_new_buffers(struct page *page, unsigned from, unsigned to) +static void ext4_journalled_zero_new_buffers(handle_t *handle, + struct page *page, + unsigned from, unsigned to) { unsigned int block_start = 0, block_end; struct buffer_head *head, *bh; @@ -1402,7 +1410,7 @@ static void zero_new_buffers(struct page *page, unsigned from, unsigned to) size = min(to, block_end) - start; zero_user(page, start, size); - set_buffer_uptodate(bh); + write_end_fn(handle, bh); } clear_buffer_new(bh); } @@ -1431,18 +1439,25 @@ static int ext4_journalled_write_end(struct file *file, BUG_ON(!ext4_handle_valid(handle)); - if (ext4_has_inline_data(inode)) - copied = ext4_write_inline_data_end(inode, pos, len, - copied, page); - else { - if (copied < len) { - if (!PageUptodate(page)) - copied = 0; - zero_new_buffers(page, from+copied, to); + if (ext4_has_inline_data(inode)) { + ret = ext4_write_inline_data_end(inode, pos, len, + copied, page); + if (ret < 0) { + unlock_page(page); + put_page(page); + goto errout; } - + copied = ret; + } else if (unlikely(copied < len) && !PageUptodate(page)) { + copied = 0; + ext4_journalled_zero_new_buffers(handle, page, from, to); + } else { + if (unlikely(copied < len)) + ext4_journalled_zero_new_buffers(handle, page, + from + copied, to); ret = ext4_walk_page_buffers(handle, page_buffers(page), from, - to, &partial, write_end_fn); + from + copied, &partial, + write_end_fn); if (!partial) SetPageUptodate(page); } @@ -1468,6 +1483,7 @@ static int ext4_journalled_write_end(struct file *file, */ ext4_orphan_add(handle, inode); +errout: ret2 = ext4_journal_stop(handle); if (!ret) ret = ret2; @@ -2034,6 +2050,12 @@ static int ext4_writepage(struct page *page, struct ext4_io_submit io_submit; bool keep_towrite = false; + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) { + ext4_invalidatepage(page, 0, PAGE_SIZE); + unlock_page(page); + return -EIO; + } + trace_ext4_writepage(page); size = i_size_read(inode); if (page->index == size >> PAGE_SHIFT) @@ -2409,7 +2431,8 @@ static int mpage_map_and_submit_extent(handle_t *handle, if (err < 0) { struct super_block *sb = inode->i_sb; - if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) + if (ext4_forced_shutdown(EXT4_SB(sb)) || + EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) goto invalidate_dirty_pages; /* * Let the uper layers retry transient errors. @@ -2464,8 +2487,8 @@ update_disksize: disksize = i_size; if (disksize > EXT4_I(inode)->i_disksize) EXT4_I(inode)->i_disksize = disksize; - err2 = ext4_mark_inode_dirty(handle, inode); up_write(&EXT4_I(inode)->i_data_sem); + err2 = ext4_mark_inode_dirty(handle, inode); if (err2) ext4_error(inode->i_sb, "Failed to mark inode %lu dirty", @@ -2631,6 +2654,9 @@ static int ext4_writepages(struct address_space *mapping, struct blk_plug plug; bool give_up_on_write = false; + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; + percpu_down_read(&sbi->s_journal_flag_rwsem); trace_ext4_writepages(inode, wbc); @@ -2667,7 +2693,8 @@ static int ext4_writepages(struct address_space *mapping, * *never* be called, so if that ever happens, we would want * the stack trace. */ - if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) { + if (unlikely(ext4_forced_shutdown(EXT4_SB(mapping->host->i_sb)) || + sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) { ret = -EROFS; goto out_writepages; } @@ -2892,6 +2919,9 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, struct inode *inode = mapping->host; handle_t *handle; + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; + index = pos >> PAGE_SHIFT; if (ext4_nonda_switch(inode->i_sb) || @@ -3914,6 +3944,10 @@ static int ext4_block_truncate_page(handle_t *handle, unsigned blocksize; struct inode *inode = mapping->host; + /* If we are processing an encrypted inode during orphan list handling */ + if (ext4_encrypted_inode(inode) && !fscrypt_has_encryption_key(inode)) + return 0; + blocksize = inode->i_sb->s_blocksize; length = blocksize - (offset & (blocksize - 1)); @@ -4222,7 +4256,9 @@ int ext4_truncate(struct inode *inode) if (ext4_has_inline_data(inode)) { int has_inline = 1; - ext4_inline_data_truncate(inode, &has_inline); + err = ext4_inline_data_truncate(inode, &has_inline); + if (err) + return err; if (has_inline) return 0; } @@ -5197,6 +5233,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) int orphan = 0; const unsigned int ia_valid = attr->ia_valid; + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; + error = setattr_prepare(dentry, attr); if (error) return error; @@ -5483,6 +5522,9 @@ int ext4_mark_iloc_dirty(handle_t *handle, { int err = 0; + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; + if (IS_I_VERSION(inode)) inode_inc_iversion(inode); @@ -5506,6 +5548,9 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode, { int err; + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; + err = ext4_get_inode_loc(inode, iloc); if (!err) { BUFFER_TRACE(iloc->bh, "get_write_access"); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index d534399cf607..a4273ddb9922 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -16,6 +16,7 @@ #include <linux/quotaops.h> #include <linux/uuid.h> #include <linux/uaccess.h> +#include <linux/delay.h> #include "ext4_jbd2.h" #include "ext4.h" @@ -442,6 +443,52 @@ static inline unsigned long ext4_xflags_to_iflags(__u32 xflags) return iflags; } +int ext4_shutdown(struct super_block *sb, unsigned long arg) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + __u32 flags; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (get_user(flags, (__u32 __user *)arg)) + return -EFAULT; + + if (flags > EXT4_GOING_FLAGS_NOLOGFLUSH) + return -EINVAL; + + if (ext4_forced_shutdown(sbi)) + return 0; + + ext4_msg(sb, KERN_ALERT, "shut down requested (%d)", flags); + + switch (flags) { + case EXT4_GOING_FLAGS_DEFAULT: + freeze_bdev(sb->s_bdev); + set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); + thaw_bdev(sb->s_bdev, sb); + break; + case EXT4_GOING_FLAGS_LOGFLUSH: + set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); + if (sbi->s_journal && !is_journal_aborted(sbi->s_journal)) { + (void) ext4_force_commit(sb); + jbd2_journal_abort(sbi->s_journal, 0); + } + break; + case EXT4_GOING_FLAGS_NOLOGFLUSH: + set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); + if (sbi->s_journal && !is_journal_aborted(sbi->s_journal)) { + msleep(100); + jbd2_journal_abort(sbi->s_journal, 0); + } + break; + default: + return -EINVAL; + } + clear_opt(sb, DISCARD); + return 0; +} + long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -893,6 +940,8 @@ resizefs_out: return 0; } + case EXT4_IOC_SHUTDOWN: + return ext4_shutdown(sb, arg); default: return -ENOTTY; } @@ -959,6 +1008,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case EXT4_IOC_SET_ENCRYPTION_POLICY: case EXT4_IOC_GET_ENCRYPTION_PWSALT: case EXT4_IOC_GET_ENCRYPTION_POLICY: + case EXT4_IOC_SHUTDOWN: break; default: return -ENOIOCTLCMD; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 7ae43c59bc79..10c62de642c6 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1556,7 +1556,17 @@ static int mb_find_extent(struct ext4_buddy *e4b, int block, ex->fe_len += 1 << order; } - BUG_ON(ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3))); + if (ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3))) { + /* Should never happen! (but apparently sometimes does?!?) */ + WARN_ON(1); + ext4_error(e4b->bd_sb, "corruption or bug in mb_find_extent " + "block=%d, order=%d needed=%d ex=%u/%d/%d@%u", + block, order, needed, ex->fe_group, ex->fe_start, + ex->fe_len, ex->fe_logical); + ex->fe_len = 0; + ex->fe_start = 0; + ex->fe_group = 0; + } return ex->fe_len; } @@ -2136,8 +2146,10 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) * We search using buddy data only if the order of the request * is greater than equal to the sbi_s_mb_order2_reqs * You can tune it via /sys/fs/ext4/<partition>/mb_order2_req + * We also support searching for power-of-two requests only for + * requests upto maximum buddy size we have constructed. */ - if (i >= sbi->s_mb_order2_reqs) { + if (i >= sbi->s_mb_order2_reqs && i <= sb->s_blocksize_bits + 2) { /* * This should tell if fe_len is exactly power of 2 */ @@ -2207,7 +2219,7 @@ repeat: } ac->ac_groups_scanned++; - if (cr == 0 && ac->ac_2order < sb->s_blocksize_bits+2) + if (cr == 0) ext4_mb_simple_scan_group(ac, &e4b); else if (cr == 1 && sbi->s_stripe && !(ac->ac_g_ex.fe_len % sbi->s_stripe)) @@ -3123,6 +3135,13 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, if (ar->pright && start + size - 1 >= ar->lright) size -= start + size - ar->lright; + /* + * Trim allocation request for filesystems with artificially small + * groups. + */ + if (size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)) + size = EXT4_BLOCKS_PER_GROUP(ac->ac_sb); + end = start + size; /* check we don't cross already preallocated blocks */ diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index bb880c326191..6ad612c576fc 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1618,13 +1618,15 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi !fscrypt_has_permitted_context(dir, inode)) { int nokey = ext4_encrypted_inode(inode) && !fscrypt_has_encryption_key(inode); - iput(inode); - if (nokey) + if (nokey) { + iput(inode); return ERR_PTR(-ENOKEY); + } ext4_warning(inode->i_sb, "Inconsistent encryption contexts: %lu/%lu", (unsigned long) dir->i_ino, (unsigned long) inode->i_ino); + iput(inode); return ERR_PTR(-EPERM); } } @@ -2937,6 +2939,9 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) struct ext4_dir_entry_2 *de; handle_t *handle = NULL; + if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb)))) + return -EIO; + /* Initialize quotas before so that eventual writes go in * separate transaction */ retval = dquot_initialize(dir); @@ -3010,6 +3015,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) struct ext4_dir_entry_2 *de; handle_t *handle = NULL; + if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb)))) + return -EIO; + trace_ext4_unlink_enter(dir, dentry); /* Initialize quotas before so that eventual writes go * in separate transaction */ @@ -3080,6 +3088,9 @@ static int ext4_symlink(struct inode *dir, struct fscrypt_str disk_link; struct fscrypt_symlink_data *sd = NULL; + if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb)))) + return -EIO; + disk_link.len = len + 1; disk_link.name = (char *) symname; @@ -3872,6 +3883,9 @@ static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { + if (unlikely(ext4_forced_shutdown(EXT4_SB(old_dir->i_sb)))) + return -EIO; + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return -EINVAL; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index e55624c50898..208241b06662 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -157,7 +157,7 @@ static int ext4_end_io(ext4_io_end_t *io) io->handle = NULL; /* Following call will use up the handle */ ret = ext4_convert_unwritten_extents(handle, inode, offset, size); - if (ret < 0) { + if (ret < 0 && !ext4_forced_shutdown(EXT4_SB(inode->i_sb))) { ext4_msg(inode->i_sb, KERN_EMERG, "failed to convert unwritten extents to written " "extents -- potential data loss! " diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index cf681004b196..c3ed9021b781 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -45,7 +45,8 @@ int ext4_resize_begin(struct super_block *sb) return -EPERM; } - if (test_and_set_bit_lock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags)) + if (test_and_set_bit_lock(EXT4_FLAGS_RESIZING, + &EXT4_SB(sb)->s_ext4_flags)) ret = -EBUSY; return ret; @@ -53,7 +54,7 @@ int ext4_resize_begin(struct super_block *sb) void ext4_resize_end(struct super_block *sb) { - clear_bit_unlock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags); + clear_bit_unlock(EXT4_FLAGS_RESIZING, &EXT4_SB(sb)->s_ext4_flags); smp_mb__after_atomic(); } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 20539c2cd6d8..2e03a0a88d92 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -438,6 +438,9 @@ void __ext4_error(struct super_block *sb, const char *function, struct va_format vaf; va_list args; + if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) + return; + if (ext4_error_ratelimit(sb)) { va_start(args, fmt); vaf.fmt = fmt; @@ -459,6 +462,9 @@ void __ext4_error_inode(struct inode *inode, const char *function, struct va_format vaf; struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return; + es->s_last_error_ino = cpu_to_le32(inode->i_ino); es->s_last_error_block = cpu_to_le64(block); if (ext4_error_ratelimit(inode->i_sb)) { @@ -491,6 +497,9 @@ void __ext4_error_file(struct file *file, const char *function, struct inode *inode = file_inode(file); char pathname[80], *path; + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return; + es = EXT4_SB(inode->i_sb)->s_es; es->s_last_error_ino = cpu_to_le32(inode->i_ino); if (ext4_error_ratelimit(inode->i_sb)) { @@ -567,6 +576,9 @@ void __ext4_std_error(struct super_block *sb, const char *function, char nbuf[16]; const char *errstr; + if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) + return; + /* Special case: if the error is EROFS, and we're not already * inside a transaction, then there's really no point in logging * an error. */ @@ -600,6 +612,9 @@ void __ext4_abort(struct super_block *sb, const char *function, struct va_format vaf; va_list args; + if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) + return; + save_error_info(sb, function, line); va_start(args, fmt); vaf.fmt = fmt; @@ -695,6 +710,9 @@ __acquires(bitlock) va_list args; struct ext4_super_block *es = EXT4_SB(sb)->s_es; + if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) + return; + es->s_last_error_ino = cpu_to_le32(ino); es->s_last_error_block = cpu_to_le64(block); __save_error_info(sb, function, line); @@ -825,6 +843,7 @@ static void ext4_put_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; + int aborted = 0; int i, err; ext4_unregister_li_request(sb); @@ -834,9 +853,10 @@ static void ext4_put_super(struct super_block *sb) destroy_workqueue(sbi->rsv_conversion_wq); if (sbi->s_journal) { + aborted = is_journal_aborted(sbi->s_journal); err = jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; - if (err < 0) + if ((err < 0) && !aborted) ext4_abort(sb, "Couldn't clean up the journal"); } @@ -847,7 +867,7 @@ static void ext4_put_super(struct super_block *sb) ext4_mb_release(sb); ext4_ext_release(sb); - if (!(sb->s_flags & MS_RDONLY)) { + if (!(sb->s_flags & MS_RDONLY) && !aborted) { ext4_clear_feature_journal_needs_recovery(sb); es->s_state = cpu_to_le16(sbi->s_mount_state); } @@ -1284,7 +1304,7 @@ enum { Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax, Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, - Opt_lazytime, Opt_nolazytime, + Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize, Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, Opt_inode_readahead_blks, Opt_journal_ioprio, Opt_dioread_nolock, Opt_dioread_lock, @@ -1352,6 +1372,7 @@ static const match_table_t tokens = { {Opt_delalloc, "delalloc"}, {Opt_lazytime, "lazytime"}, {Opt_nolazytime, "nolazytime"}, + {Opt_debug_want_extra_isize, "debug_want_extra_isize=%u"}, {Opt_nodelalloc, "nodelalloc"}, {Opt_removed, "mblk_io_submit"}, {Opt_removed, "nomblk_io_submit"}, @@ -1557,6 +1578,7 @@ static const struct mount_opts { #endif {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET}, {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET}, + {Opt_debug_want_extra_isize, 0, MOPT_GTE0}, {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q}, {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q}, @@ -1670,6 +1692,8 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, if (arg == 0) arg = JBD2_DEFAULT_MAX_COMMIT_AGE; sbi->s_commit_interval = HZ * arg; + } else if (token == Opt_debug_want_extra_isize) { + sbi->s_want_extra_isize = arg; } else if (token == Opt_max_batch_time) { sbi->s_max_batch_time = arg; } else if (token == Opt_min_batch_time) { @@ -2613,9 +2637,9 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) ret = sbi->s_stripe; - else if (stripe_width <= sbi->s_blocks_per_group) + else if (stripe_width && stripe_width <= sbi->s_blocks_per_group) ret = stripe_width; - else if (stride <= sbi->s_blocks_per_group) + else if (stride && stride <= sbi->s_blocks_per_group) ret = stride; else ret = 0; @@ -3836,7 +3860,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); if (ext4_has_feature_meta_bg(sb)) { - if (le32_to_cpu(es->s_first_meta_bg) >= db_count) { + if (le32_to_cpu(es->s_first_meta_bg) > db_count) { ext4_msg(sb, KERN_WARNING, "first meta block group too large: %u " "(group descriptor block count %u)", @@ -3919,7 +3943,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * root first: it may be modified in the journal! */ if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) { - if (ext4_load_journal(sb, es, journal_devnum)) + err = ext4_load_journal(sb, es, journal_devnum); + if (err) goto failed_mount3a; } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && ext4_has_feature_journal_needs_recovery(sb)) { @@ -4081,7 +4106,8 @@ no_journal: sb->s_flags |= MS_RDONLY; /* determine the minimum size of new large inodes, if present */ - if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { + if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE && + sbi->s_want_extra_isize == 0) { sbi->s_want_extra_isize = sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE; if (ext4_has_feature_extra_isize(sb)) { @@ -4709,6 +4735,9 @@ static int ext4_sync_fs(struct super_block *sb, int wait) bool needs_barrier = false; struct ext4_sb_info *sbi = EXT4_SB(sb); + if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) + return 0; + trace_ext4_sync_fs(sb, wait); flush_workqueue(sbi->rsv_conversion_wq); /* @@ -4792,7 +4821,7 @@ out: */ static int ext4_unfreeze(struct super_block *sb) { - if (sb->s_flags & MS_RDONLY) + if ((sb->s_flags & MS_RDONLY) || ext4_forced_shutdown(EXT4_SB(sb))) return 0; if (EXT4_SB(sb)->s_journal) { diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 5a94fa52b74f..67636acf7624 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -411,6 +411,9 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name, { int error; + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; + if (strlen(name) > 255) return -ERANGE; @@ -1188,16 +1191,14 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, struct ext4_xattr_block_find bs = { .s = { .not_found = -ENODATA, }, }; - unsigned long no_expand; + int no_expand; int error; if (!name) return -EINVAL; if (strlen(name) > 255) return -ERANGE; - down_write(&EXT4_I(inode)->xattr_sem); - no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); - ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); + ext4_write_lock_xattr(inode, &no_expand); error = ext4_reserve_inode_write(handle, inode, &is.iloc); if (error) @@ -1264,7 +1265,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, ext4_xattr_update_super_block(handle, inode->i_sb); inode->i_ctime = current_time(inode); if (!value) - ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); + no_expand = 0; error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); /* * The bh is consumed by ext4_mark_iloc_dirty, even with @@ -1278,9 +1279,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, cleanup: brelse(is.iloc.bh); brelse(bs.bh); - if (no_expand == 0) - ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); return error; } @@ -1497,12 +1496,11 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, int error = 0, tried_min_extra_isize = 0; int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); int isize_diff; /* How much do we need to grow i_extra_isize */ + int no_expand; + + if (ext4_write_trylock_xattr(inode, &no_expand) == 0) + return 0; - down_write(&EXT4_I(inode)->xattr_sem); - /* - * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty - */ - ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); retry: isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize; if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) @@ -1584,17 +1582,16 @@ shift: EXT4_I(inode)->i_extra_isize = new_extra_isize; brelse(bh); out: - ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); return 0; cleanup: brelse(bh); /* - * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode - * size expansion failed. + * Inode size expansion failed; don't try again */ - up_write(&EXT4_I(inode)->xattr_sem); + no_expand = 1; + ext4_write_unlock_xattr(inode, &no_expand); return error; } diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index a92e783fa057..099c8b670ef5 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -102,6 +102,38 @@ extern const struct xattr_handler ext4_xattr_security_handler; #define EXT4_XATTR_NAME_ENCRYPTION_CONTEXT "c" +/* + * The EXT4_STATE_NO_EXPAND is overloaded and used for two purposes. + * The first is to signal that there the inline xattrs and data are + * taking up so much space that we might as well not keep trying to + * expand it. The second is that xattr_sem is taken for writing, so + * we shouldn't try to recurse into the inode expansion. For this + * second case, we need to make sure that we take save and restore the + * NO_EXPAND state flag appropriately. + */ +static inline void ext4_write_lock_xattr(struct inode *inode, int *save) +{ + down_write(&EXT4_I(inode)->xattr_sem); + *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); + ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); +} + +static inline int ext4_write_trylock_xattr(struct inode *inode, int *save) +{ + if (down_write_trylock(&EXT4_I(inode)->xattr_sem) == 0) + return 0; + *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); + ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); + return 1; +} + +static inline void ext4_write_unlock_xattr(struct inode *inode, int *save) +{ + if (*save == 0) + ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); + up_write(&EXT4_I(inode)->xattr_sem); +} + extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); |