summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/extent-tree.c5
-rw-r--r--fs/btrfs/file.c1
-rw-r--r--fs/btrfs/inode.c3
-rw-r--r--fs/btrfs/locking.h1
-rw-r--r--fs/btrfs/qgroup.c10
-rw-r--r--fs/btrfs/transaction.c11
-rw-r--r--fs/btrfs/volumes.c6
-rw-r--r--fs/cifs/asn1.c53
-rw-r--r--fs/cifs/cifsfs.c24
-rw-r--r--fs/cifs/cifsfs.h4
-rw-r--r--fs/cifs/file.c6
-rw-r--r--fs/cifs/inode.c10
-rw-r--r--fs/cifs/netmisc.c2
-rw-r--r--fs/ext4/ext4.h8
-rw-r--r--fs/ext4/extents.c105
-rw-r--r--fs/ext4/extents_status.c212
-rw-r--r--fs/ext4/extents_status.h9
-rw-r--r--fs/ext4/ialloc.c4
-rw-r--r--fs/ext4/inode.c182
-rw-r--r--fs/ext4/mballoc.c23
-rw-r--r--fs/ext4/move_extent.c43
-rw-r--r--fs/ext4/page-io.c12
-rw-r--r--fs/ext4/resize.c4
-rw-r--r--fs/ext4/super.c4
-rw-r--r--fs/jbd2/transaction.c15
-rw-r--r--fs/proc/inode.c6
-rw-r--r--fs/xfs/xfs_buf.c6
-rw-r--r--fs/xfs/xfs_iomap.c4
28 files changed, 619 insertions, 154 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3e074dab2d57..9ac2eca681eb 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1467,8 +1467,11 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
if (ret && !insert) {
err = -ENOENT;
goto out;
+ } else if (ret) {
+ err = -EIO;
+ WARN_ON(1);
+ goto out;
}
- BUG_ON(ret); /* Corruption */
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index af1d0605a5c1..5b4ea5f55b8f 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -591,6 +591,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
}
compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
+ clear_bit(EXTENT_FLAG_LOGGING, &flags);
remove_extent_mapping(em_tree, em);
if (no_splits)
goto next;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d1470adca8f8..ca1b767d51f7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2312,6 +2312,7 @@ again:
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = start;
+ path->leave_spinning = 1;
if (merge) {
struct btrfs_file_extent_item *fi;
u64 extent_len;
@@ -2368,6 +2369,7 @@ again:
btrfs_mark_buffer_dirty(leaf);
inode_add_bytes(inode, len);
+ btrfs_release_path(path);
ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
new->disk_len, 0,
@@ -2381,6 +2383,7 @@ again:
ret = 1;
out_free_path:
btrfs_release_path(path);
+ path->leave_spinning = 0;
btrfs_end_transaction(trans, root);
out_unlock:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index ca52681e5f40..b81e0e9a4894 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -26,7 +26,6 @@
void btrfs_tree_lock(struct extent_buffer *eb);
void btrfs_tree_unlock(struct extent_buffer *eb);
-int btrfs_try_spin_lock(struct extent_buffer *eb);
void btrfs_tree_read_lock(struct extent_buffer *eb);
void btrfs_tree_read_unlock(struct extent_buffer *eb);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index aee4b1cc3d98..5471e47d6559 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1525,21 +1525,23 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
qg->reserved + qg->rfer + num_bytes >
- qg->max_rfer)
+ qg->max_rfer) {
ret = -EDQUOT;
+ goto out;
+ }
if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
qg->reserved + qg->excl + num_bytes >
- qg->max_excl)
+ qg->max_excl) {
ret = -EDQUOT;
+ goto out;
+ }
list_for_each_entry(glist, &qg->groups, next_group) {
ulist_add(ulist, glist->group->qgroupid,
(uintptr_t)glist->group, GFP_ATOMIC);
}
}
- if (ret)
- goto out;
/*
* no limits exceeded, now record the reservation into all qgroups
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 9250b9c4f01e..50767bbaad6c 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -625,14 +625,13 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;
- /*
- * the same root has to be passed to start_transaction and
- * end_transaction. Subvolume quota depends on this.
- */
- WARN_ON(trans->root != root);
if (trans->qgroup_reserved) {
- btrfs_qgroup_free(root, trans->qgroup_reserved);
+ /*
+ * the same root has to be passed here between start_transaction
+ * and end_transaction. Subvolume quota depends on this.
+ */
+ btrfs_qgroup_free(trans->root, trans->qgroup_reserved);
trans->qgroup_reserved = 0;
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6b9cff42265d..5989a92236f7 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -684,6 +684,12 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
__btrfs_close_devices(fs_devices);
free_fs_devices(fs_devices);
}
+ /*
+ * Wait for rcu kworkers under __btrfs_close_devices
+ * to finish all blkdev_puts so device is really
+ * free when umount is done.
+ */
+ rcu_barrier();
return ret;
}
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index cfd1ce34e0bc..1d36db114772 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -614,53 +614,10 @@ decode_negTokenInit(unsigned char *security_blob, int length,
}
}
- /* mechlistMIC */
- if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
- /* Check if we have reached the end of the blob, but with
- no mechListMic (e.g. NTLMSSP instead of KRB5) */
- if (ctx.error == ASN1_ERR_DEC_EMPTY)
- goto decode_negtoken_exit;
- cFYI(1, "Error decoding last part negTokenInit exit3");
- return 0;
- } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
- /* tag = 3 indicating mechListMIC */
- cFYI(1, "Exit 4 cls = %d con = %d tag = %d end = %p (%d)",
- cls, con, tag, end, *end);
- return 0;
- }
-
- /* sequence */
- if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
- cFYI(1, "Error decoding last part negTokenInit exit5");
- return 0;
- } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
- || (tag != ASN1_SEQ)) {
- cFYI(1, "cls = %d con = %d tag = %d end = %p (%d)",
- cls, con, tag, end, *end);
- }
-
- /* sequence of */
- if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
- cFYI(1, "Error decoding last part negTokenInit exit 7");
- return 0;
- } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
- cFYI(1, "Exit 8 cls = %d con = %d tag = %d end = %p (%d)",
- cls, con, tag, end, *end);
- return 0;
- }
-
- /* general string */
- if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
- cFYI(1, "Error decoding last part negTokenInit exit9");
- return 0;
- } else if ((cls != ASN1_UNI) || (con != ASN1_PRI)
- || (tag != ASN1_GENSTR)) {
- cFYI(1, "Exit10 cls = %d con = %d tag = %d end = %p (%d)",
- cls, con, tag, end, *end);
- return 0;
- }
- cFYI(1, "Need to call asn1_octets_decode() function for %s",
- ctx.pointer); /* is this UTF-8 or ASCII? */
-decode_negtoken_exit:
+ /*
+ * We currently ignore anything at the end of the SPNEGO blob after
+ * the mechTypes have been parsed, since none of that info is
+ * used at the moment.
+ */
return 1;
}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 3cf8a15af916..345fc89c4286 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -91,6 +91,30 @@ struct workqueue_struct *cifsiod_wq;
__u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE];
#endif
+/*
+ * Bumps refcount for cifs super block.
+ * Note that it should be only called if a referece to VFS super block is
+ * already held, e.g. in open-type syscalls context. Otherwise it can race with
+ * atomic_dec_and_test in deactivate_locked_super.
+ */
+void
+cifs_sb_active(struct super_block *sb)
+{
+ struct cifs_sb_info *server = CIFS_SB(sb);
+
+ if (atomic_inc_return(&server->active) == 1)
+ atomic_inc(&sb->s_active);
+}
+
+void
+cifs_sb_deactive(struct super_block *sb)
+{
+ struct cifs_sb_info *server = CIFS_SB(sb);
+
+ if (atomic_dec_and_test(&server->active))
+ deactivate_super(sb);
+}
+
static int
cifs_read_super(struct super_block *sb)
{
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 7163419cecd9..0e32c3446ce9 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -41,6 +41,10 @@ extern struct file_system_type cifs_fs_type;
extern const struct address_space_operations cifs_addr_ops;
extern const struct address_space_operations cifs_addr_ops_smallbuf;
+/* Functions related to super block operations */
+extern void cifs_sb_active(struct super_block *sb);
+extern void cifs_sb_deactive(struct super_block *sb);
+
/* Functions related to inodes */
extern const struct inode_operations cifs_dir_inode_ops;
extern struct inode *cifs_root_iget(struct super_block *);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8c0d85577314..7a0dd99e4507 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -300,6 +300,8 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
mutex_init(&cfile->fh_mutex);
+ cifs_sb_active(inode->i_sb);
+
/*
* If the server returned a read oplock and we have mandatory brlocks,
* set oplock level to None.
@@ -349,7 +351,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
struct TCP_Server_Info *server = tcon->ses->server;
struct cifsInodeInfo *cifsi = CIFS_I(inode);
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct super_block *sb = inode->i_sb;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
struct cifsLockInfo *li, *tmp;
struct cifs_fid fid;
struct cifs_pending_open open;
@@ -414,6 +417,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
cifs_put_tlink(cifs_file->tlink);
dput(cifs_file->dentry);
+ cifs_sb_deactive(sb);
kfree(cifs_file);
}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 0079696305c9..20887bf63121 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1043,7 +1043,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry,
cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_MAP_SPECIAL_CHR);
if (rc != 0) {
- rc = -ETXTBSY;
+ rc = -EBUSY;
goto undo_setattr;
}
@@ -1062,7 +1062,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry,
if (rc == -ENOENT)
rc = 0;
else if (rc != 0) {
- rc = -ETXTBSY;
+ rc = -EBUSY;
goto undo_rename;
}
cifsInode->delete_pending = true;
@@ -1169,15 +1169,13 @@ psx_del_no_retry:
cifs_drop_nlink(inode);
} else if (rc == -ENOENT) {
d_drop(dentry);
- } else if (rc == -ETXTBSY) {
+ } else if (rc == -EBUSY) {
if (server->ops->rename_pending_delete) {
rc = server->ops->rename_pending_delete(full_path,
dentry, xid);
if (rc == 0)
cifs_drop_nlink(inode);
}
- if (rc == -ETXTBSY)
- rc = -EBUSY;
} else if ((rc == -EACCES) && (dosattr == 0) && inode) {
attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
if (attrs == NULL) {
@@ -1518,7 +1516,7 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry,
* source. Note that cross directory moves do not work with
* rename by filehandle to various Windows servers.
*/
- if (rc == 0 || rc != -ETXTBSY)
+ if (rc == 0 || rc != -EBUSY)
goto do_rename_exit;
/* open-file renames don't work across directories */
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index a82bc51fdc82..c0b25b28be6c 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -62,7 +62,7 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = {
{ERRdiffdevice, -EXDEV},
{ERRnofiles, -ENOENT},
{ERRwriteprot, -EROFS},
- {ERRbadshare, -ETXTBSY},
+ {ERRbadshare, -EBUSY},
{ERRlock, -EACCES},
{ERRunsup, -EINVAL},
{ERRnosuchshare, -ENXIO},
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 4a01ba315262..3b83cd604796 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -335,9 +335,9 @@ struct ext4_group_desc
*/
struct flex_groups {
- atomic_t free_inodes;
- atomic_t free_clusters;
- atomic_t used_dirs;
+ atomic64_t free_clusters;
+ atomic_t free_inodes;
+ atomic_t used_dirs;
};
#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
@@ -2617,7 +2617,7 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
extern int __init ext4_init_pageio(void);
extern void ext4_add_complete_io(ext4_io_end_t *io_end);
extern void ext4_exit_pageio(void);
-extern void ext4_ioend_wait(struct inode *);
+extern void ext4_ioend_shutdown(struct inode *);
extern void ext4_free_io_end(ext4_io_end_t *io);
extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
extern void ext4_end_io_work(struct work_struct *work);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 28dd8eeea6a9..56efcaadf848 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1584,10 +1584,12 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
unsigned short ext1_ee_len, ext2_ee_len, max_len;
/*
- * Make sure that either both extents are uninitialized, or
- * both are _not_.
+ * Make sure that both extents are initialized. We don't merge
+ * uninitialized extents so that we can be sure that end_io code has
+ * the extent that was written properly split out and conversion to
+ * initialized is trivial.
*/
- if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2))
+ if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2))
return 0;
if (ext4_ext_is_uninitialized(ex1))
@@ -2923,7 +2925,7 @@ static int ext4_split_extent_at(handle_t *handle,
{
ext4_fsblk_t newblock;
ext4_lblk_t ee_block;
- struct ext4_extent *ex, newex, orig_ex;
+ struct ext4_extent *ex, newex, orig_ex, zero_ex;
struct ext4_extent *ex2 = NULL;
unsigned int ee_len, depth;
int err = 0;
@@ -2943,6 +2945,10 @@ static int ext4_split_extent_at(handle_t *handle,
newblock = split - ee_block + ext4_ext_pblock(ex);
BUG_ON(split < ee_block || split >= (ee_block + ee_len));
+ BUG_ON(!ext4_ext_is_uninitialized(ex) &&
+ split_flag & (EXT4_EXT_MAY_ZEROOUT |
+ EXT4_EXT_MARK_UNINIT1 |
+ EXT4_EXT_MARK_UNINIT2));
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
@@ -2990,12 +2996,26 @@ static int ext4_split_extent_at(handle_t *handle,
err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
- if (split_flag & EXT4_EXT_DATA_VALID1)
+ if (split_flag & EXT4_EXT_DATA_VALID1) {
err = ext4_ext_zeroout(inode, ex2);
- else
+ zero_ex.ee_block = ex2->ee_block;
+ zero_ex.ee_len = ext4_ext_get_actual_len(ex2);
+ ext4_ext_store_pblock(&zero_ex,
+ ext4_ext_pblock(ex2));
+ } else {
err = ext4_ext_zeroout(inode, ex);
- } else
+ zero_ex.ee_block = ex->ee_block;
+ zero_ex.ee_len = ext4_ext_get_actual_len(ex);
+ ext4_ext_store_pblock(&zero_ex,
+ ext4_ext_pblock(ex));
+ }
+ } else {
err = ext4_ext_zeroout(inode, &orig_ex);
+ zero_ex.ee_block = orig_ex.ee_block;
+ zero_ex.ee_len = ext4_ext_get_actual_len(&orig_ex);
+ ext4_ext_store_pblock(&zero_ex,
+ ext4_ext_pblock(&orig_ex));
+ }
if (err)
goto fix_extent_len;
@@ -3003,6 +3023,12 @@ static int ext4_split_extent_at(handle_t *handle,
ex->ee_len = cpu_to_le16(ee_len);
ext4_ext_try_to_merge(handle, inode, path, ex);
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
+ if (err)
+ goto fix_extent_len;
+
+ /* update extent status tree */
+ err = ext4_es_zeroout(inode, &zero_ex);
+
goto out;
} else if (err)
goto fix_extent_len;
@@ -3041,6 +3067,7 @@ static int ext4_split_extent(handle_t *handle,
int err = 0;
int uninitialized;
int split_flag1, flags1;
+ int allocated = map->m_len;
depth = ext_depth(inode);
ex = path[depth].p_ext;
@@ -3060,20 +3087,29 @@ static int ext4_split_extent(handle_t *handle,
map->m_lblk + map->m_len, split_flag1, flags1);
if (err)
goto out;
+ } else {
+ allocated = ee_len - (map->m_lblk - ee_block);
}
-
+ /*
+ * Update path is required because previous ext4_split_extent_at() may
+ * result in split of original leaf or extent zeroout.
+ */
ext4_ext_drop_refs(path);
path = ext4_ext_find_extent(inode, map->m_lblk, path);
if (IS_ERR(path))
return PTR_ERR(path);
+ depth = ext_depth(inode);
+ ex = path[depth].p_ext;
+ uninitialized = ext4_ext_is_uninitialized(ex);
+ split_flag1 = 0;
if (map->m_lblk >= ee_block) {
- split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT |
- EXT4_EXT_DATA_VALID2);
- if (uninitialized)
+ split_flag1 = split_flag & EXT4_EXT_DATA_VALID2;
+ if (uninitialized) {
split_flag1 |= EXT4_EXT_MARK_UNINIT1;
- if (split_flag & EXT4_EXT_MARK_UNINIT2)
- split_flag1 |= EXT4_EXT_MARK_UNINIT2;
+ split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |
+ EXT4_EXT_MARK_UNINIT2);
+ }
err = ext4_split_extent_at(handle, inode, path,
map->m_lblk, split_flag1, flags);
if (err)
@@ -3082,7 +3118,7 @@ static int ext4_split_extent(handle_t *handle,
ext4_ext_show_leaf(inode, path);
out:
- return err ? err : map->m_len;
+ return err ? err : allocated;
}
/*
@@ -3137,6 +3173,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex);
allocated = ee_len - (map->m_lblk - ee_block);
+ zero_ex.ee_len = 0;
trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
@@ -3227,13 +3264,16 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
if (EXT4_EXT_MAY_ZEROOUT & split_flag)
max_zeroout = sbi->s_extent_max_zeroout_kb >>
- inode->i_sb->s_blocksize_bits;
+ (inode->i_sb->s_blocksize_bits - 10);
/* If extent is less than s_max_zeroout_kb, zeroout directly */
if (max_zeroout && (ee_len <= max_zeroout)) {
err = ext4_ext_zeroout(inode, ex);
if (err)
goto out;
+ zero_ex.ee_block = ex->ee_block;
+ zero_ex.ee_len = ext4_ext_get_actual_len(ex);
+ ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex));
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
@@ -3292,6 +3332,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
err = allocated;
out:
+ /* If we have gotten a failure, don't zero out status tree */
+ if (!err)
+ err = ext4_es_zeroout(inode, &zero_ex);
return err ? err : allocated;
}
@@ -3374,8 +3417,19 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
"block %llu, max_blocks %u\n", inode->i_ino,
(unsigned long long)ee_block, ee_len);
- /* If extent is larger than requested then split is required */
+ /* If extent is larger than requested it is a clear sign that we still
+ * have some extent state machine issues left. So extent_split is still
+ * required.
+ * TODO: Once all related issues will be fixed this situation should be
+ * illegal.
+ */
if (ee_block != map->m_lblk || ee_len > map->m_len) {
+#ifdef EXT4_DEBUG
+ ext4_warning("Inode (%ld) finished: extent logical block %llu,"
+ " len %u; IO logical block %llu, len %u\n",
+ inode->i_ino, (unsigned long long)ee_block, ee_len,
+ (unsigned long long)map->m_lblk, map->m_len);
+#endif
err = ext4_split_unwritten_extents(handle, inode, map, path,
EXT4_GET_BLOCKS_CONVERT);
if (err < 0)
@@ -3626,6 +3680,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
path, map->m_len);
} else
err = ret;
+ map->m_flags |= EXT4_MAP_MAPPED;
+ if (allocated > map->m_len)
+ allocated = map->m_len;
+ map->m_len = allocated;
goto out2;
}
/* buffered IO case */
@@ -3675,6 +3733,7 @@ out:
allocated - map->m_len);
allocated = map->m_len;
}
+ map->m_len = allocated;
/*
* If we have done fallocate with the offset that is already
@@ -4106,9 +4165,6 @@ got_allocated_blocks:
}
} else {
BUG_ON(allocated_clusters < reserved_clusters);
- /* We will claim quota for all newly allocated blocks.*/
- ext4_da_update_reserve_space(inode, allocated_clusters,
- 1);
if (reserved_clusters < allocated_clusters) {
struct ext4_inode_info *ei = EXT4_I(inode);
int reservation = allocated_clusters -
@@ -4159,6 +4215,15 @@ got_allocated_blocks:
ei->i_reserved_data_blocks += reservation;
spin_unlock(&ei->i_block_reservation_lock);
}
+ /*
+ * We will claim quota for all newly allocated blocks.
+ * We're updating the reserved space *after* the
+ * correction above so we do not accidentally free
+ * all the metadata reservation because we might
+ * actually need it later on.
+ */
+ ext4_da_update_reserve_space(inode, allocated_clusters,
+ 1);
}
}
@@ -4368,8 +4433,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (len <= EXT_UNINIT_MAX_LEN << blkbits)
flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
- /* Prevent race condition between unwritten */
- ext4_flush_unwritten_io(inode);
retry:
while (ret >= 0 && ret < max_blocks) {
map.m_lblk = map.m_lblk + ret;
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 95796a1b7522..fe3337a85ede 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -333,17 +333,27 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
static int ext4_es_can_be_merged(struct extent_status *es1,
struct extent_status *es2)
{
- if (es1->es_lblk + es1->es_len != es2->es_lblk)
+ if (ext4_es_status(es1) != ext4_es_status(es2))
return 0;
- if (ext4_es_status(es1) != ext4_es_status(es2))
+ if (((__u64) es1->es_len) + es2->es_len > 0xFFFFFFFFULL)
return 0;
- if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
- (ext4_es_pblock(es1) + es1->es_len != ext4_es_pblock(es2)))
+ if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk)
return 0;
- return 1;
+ if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
+ (ext4_es_pblock(es1) + es1->es_len == ext4_es_pblock(es2)))
+ return 1;
+
+ if (ext4_es_is_hole(es1))
+ return 1;
+
+ /* we need to check delayed extent is without unwritten status */
+ if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1))
+ return 1;
+
+ return 0;
}
static struct extent_status *
@@ -389,6 +399,179 @@ ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
return es;
}
+#ifdef ES_AGGRESSIVE_TEST
+static void ext4_es_insert_extent_ext_check(struct inode *inode,
+ struct extent_status *es)
+{
+ struct ext4_ext_path *path = NULL;
+ struct ext4_extent *ex;
+ ext4_lblk_t ee_block;
+ ext4_fsblk_t ee_start;
+ unsigned short ee_len;
+ int depth, ee_status, es_status;
+
+ path = ext4_ext_find_extent(inode, es->es_lblk, NULL);
+ if (IS_ERR(path))
+ return;
+
+ depth = ext_depth(inode);
+ ex = path[depth].p_ext;
+
+ if (ex) {
+
+ ee_block = le32_to_cpu(ex->ee_block);
+ ee_start = ext4_ext_pblock(ex);
+ ee_len = ext4_ext_get_actual_len(ex);
+
+ ee_status = ext4_ext_is_uninitialized(ex) ? 1 : 0;
+ es_status = ext4_es_is_unwritten(es) ? 1 : 0;
+
+ /*
+ * Make sure ex and es are not overlap when we try to insert
+ * a delayed/hole extent.
+ */
+ if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) {
+ if (in_range(es->es_lblk, ee_block, ee_len)) {
+ pr_warn("ES insert assertation failed for "
+ "inode: %lu we can find an extent "
+ "at block [%d/%d/%llu/%c], but we "
+ "want to add an delayed/hole extent "
+ "[%d/%d/%llu/%llx]\n",
+ inode->i_ino, ee_block, ee_len,
+ ee_start, ee_status ? 'u' : 'w',
+ es->es_lblk, es->es_len,
+ ext4_es_pblock(es), ext4_es_status(es));
+ }
+ goto out;
+ }
+
+ /*
+ * We don't check ee_block == es->es_lblk, etc. because es
+ * might be a part of whole extent, vice versa.
+ */
+ if (es->es_lblk < ee_block ||
+ ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) {
+ pr_warn("ES insert assertation failed for inode: %lu "
+ "ex_status [%d/%d/%llu/%c] != "
+ "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
+ ee_block, ee_len, ee_start,
+ ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
+ ext4_es_pblock(es), es_status ? 'u' : 'w');
+ goto out;
+ }
+
+ if (ee_status ^ es_status) {
+ pr_warn("ES insert assertation failed for inode: %lu "
+ "ex_status [%d/%d/%llu/%c] != "
+ "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
+ ee_block, ee_len, ee_start,
+ ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
+ ext4_es_pblock(es), es_status ? 'u' : 'w');
+ }
+ } else {
+ /*
+ * We can't find an extent on disk. So we need to make sure
+ * that we don't want to add an written/unwritten extent.
+ */
+ if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) {
+ pr_warn("ES insert assertation failed for inode: %lu "
+ "can't find an extent at block %d but we want "
+ "to add an written/unwritten extent "
+ "[%d/%d/%llu/%llx]\n", inode->i_ino,
+ es->es_lblk, es->es_lblk, es->es_len,
+ ext4_es_pblock(es), ext4_es_status(es));
+ }
+ }
+out:
+ if (path) {
+ ext4_ext_drop_refs(path);
+ kfree(path);
+ }
+}
+
+static void ext4_es_insert_extent_ind_check(struct inode *inode,
+ struct extent_status *es)
+{
+ struct ext4_map_blocks map;
+ int retval;
+
+ /*
+ * Here we call ext4_ind_map_blocks to lookup a block mapping because
+ * 'Indirect' structure is defined in indirect.c. So we couldn't
+ * access direct/indirect tree from outside. It is too dirty to define
+ * this function in indirect.c file.
+ */
+
+ map.m_lblk = es->es_lblk;
+ map.m_len = es->es_len;
+
+ retval = ext4_ind_map_blocks(NULL, inode, &map, 0);
+ if (retval > 0) {
+ if (ext4_es_is_delayed(es) || ext4_es_is_hole(es)) {
+ /*
+ * We want to add a delayed/hole extent but this
+ * block has been allocated.
+ */
+ pr_warn("ES insert assertation failed for inode: %lu "
+ "We can find blocks but we want to add a "
+ "delayed/hole extent [%d/%d/%llu/%llx]\n",
+ inode->i_ino, es->es_lblk, es->es_len,
+ ext4_es_pblock(es), ext4_es_status(es));
+ return;
+ } else if (ext4_es_is_written(es)) {
+ if (retval != es->es_len) {
+ pr_warn("ES insert assertation failed for "
+ "inode: %lu retval %d != es_len %d\n",
+ inode->i_ino, retval, es->es_len);
+ return;
+ }
+ if (map.m_pblk != ext4_es_pblock(es)) {
+ pr_warn("ES insert assertation failed for "
+ "inode: %lu m_pblk %llu != "
+ "es_pblk %llu\n",
+ inode->i_ino, map.m_pblk,
+ ext4_es_pblock(es));
+ return;
+ }
+ } else {
+ /*
+ * We don't need to check unwritten extent because
+ * indirect-based file doesn't have it.
+ */
+ BUG_ON(1);
+ }
+ } else if (retval == 0) {
+ if (ext4_es_is_written(es)) {
+ pr_warn("ES insert assertation failed for inode: %lu "
+ "We can't find the block but we want to add "
+ "an written extent [%d/%d/%llu/%llx]\n",
+ inode->i_ino, es->es_lblk, es->es_len,
+ ext4_es_pblock(es), ext4_es_status(es));
+ return;
+ }
+ }
+}
+
+static inline void ext4_es_insert_extent_check(struct inode *inode,
+ struct extent_status *es)
+{
+ /*
+ * We don't need to worry about the race condition because
+ * caller takes i_data_sem locking.
+ */
+ BUG_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
+ if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+ ext4_es_insert_extent_ext_check(inode, es);
+ else
+ ext4_es_insert_extent_ind_check(inode, es);
+}
+#else
+static inline void ext4_es_insert_extent_check(struct inode *inode,
+ struct extent_status *es)
+{
+}
+#endif
+
static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
{
struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
@@ -471,6 +654,8 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_es_store_status(&newes, status);
trace_ext4_es_insert_extent(inode, &newes);
+ ext4_es_insert_extent_check(inode, &newes);
+
write_lock(&EXT4_I(inode)->i_es_lock);
err = __es_remove_extent(inode, lblk, end);
if (err != 0)
@@ -669,6 +854,23 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
return err;
}
+int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex)
+{
+ ext4_lblk_t ee_block;
+ ext4_fsblk_t ee_pblock;
+ unsigned int ee_len;
+
+ ee_block = le32_to_cpu(ex->ee_block);
+ ee_len = ext4_ext_get_actual_len(ex);
+ ee_pblock = ext4_ext_pblock(ex);
+
+ if (ee_len == 0)
+ return 0;
+
+ return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
+ EXTENT_STATUS_WRITTEN);
+}
+
static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
{
struct ext4_sb_info *sbi = container_of(shrink,
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index f190dfe969da..d8e2d4dc311e 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -21,6 +21,12 @@
#endif
/*
+ * With ES_AGGRESSIVE_TEST defined, the result of es caching will be
+ * checked with old map_block's result.
+ */
+#define ES_AGGRESSIVE_TEST__
+
+/*
* These flags live in the high bits of extent_status.es_pblk
*/
#define EXTENT_STATUS_WRITTEN (1ULL << 63)
@@ -33,6 +39,8 @@
EXTENT_STATUS_DELAYED | \
EXTENT_STATUS_HOLE)
+struct ext4_extent;
+
struct extent_status {
struct rb_node rb_node;
ext4_lblk_t es_lblk; /* first logical block extent covers */
@@ -58,6 +66,7 @@ extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
struct extent_status *es);
extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
struct extent_status *es);
+extern int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex);
static inline int ext4_es_is_written(struct extent_status *es)
{
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 32fd2b9075dd..6c5bb8d993fe 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -324,8 +324,8 @@ error_return:
}
struct orlov_stats {
+ __u64 free_clusters;
__u32 free_inodes;
- __u32 free_clusters;
__u32 used_dirs;
};
@@ -342,7 +342,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
if (flex_size > 1) {
stats->free_inodes = atomic_read(&flex_group[g].free_inodes);
- stats->free_clusters = atomic_read(&flex_group[g].free_clusters);
+ stats->free_clusters = atomic64_read(&flex_group[g].free_clusters);
stats->used_dirs = atomic_read(&flex_group[g].used_dirs);
return;
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9ea0cde3fa9e..b3a5213bc73e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -185,8 +185,6 @@ void ext4_evict_inode(struct inode *inode)
trace_ext4_evict_inode(inode);
- ext4_ioend_wait(inode);
-
if (inode->i_nlink) {
/*
* When journalling data dirty buffers are tracked only in the
@@ -207,7 +205,8 @@ void ext4_evict_inode(struct inode *inode)
* don't use page cache.
*/
if (ext4_should_journal_data(inode) &&
- (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
+ (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
+ inode->i_ino != EXT4_JOURNAL_INO) {
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
@@ -216,6 +215,7 @@ void ext4_evict_inode(struct inode *inode)
filemap_write_and_wait(&inode->i_data);
}
truncate_inode_pages(&inode->i_data, 0);
+ ext4_ioend_shutdown(inode);
goto no_delete;
}
@@ -225,6 +225,7 @@ void ext4_evict_inode(struct inode *inode)
if (ext4_should_order_data(inode))
ext4_begin_ordered_truncate(inode, 0);
truncate_inode_pages(&inode->i_data, 0);
+ ext4_ioend_shutdown(inode);
if (is_bad_inode(inode))
goto no_delete;
@@ -482,6 +483,58 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
return num;
}
+#ifdef ES_AGGRESSIVE_TEST
+static void ext4_map_blocks_es_recheck(handle_t *handle,
+ struct inode *inode,
+ struct ext4_map_blocks *es_map,
+ struct ext4_map_blocks *map,
+ int flags)
+{
+ int retval;
+
+ map->m_flags = 0;
+ /*
+ * There is a race window that the result is not the same.
+ * e.g. xfstests #223 when dioread_nolock enables. The reason
+ * is that we lookup a block mapping in extent status tree with
+ * out taking i_data_sem. So at the time the unwritten extent
+ * could be converted.
+ */
+ if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
+ down_read((&EXT4_I(inode)->i_data_sem));
+ if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+ retval = ext4_ext_map_blocks(handle, inode, map, flags &
+ EXT4_GET_BLOCKS_KEEP_SIZE);
+ } else {
+ retval = ext4_ind_map_blocks(handle, inode, map, flags &
+ EXT4_GET_BLOCKS_KEEP_SIZE);
+ }
+ if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
+ up_read((&EXT4_I(inode)->i_data_sem));
+ /*
+ * Clear EXT4_MAP_FROM_CLUSTER and EXT4_MAP_BOUNDARY flag
+ * because it shouldn't be marked in es_map->m_flags.
+ */
+ map->m_flags &= ~(EXT4_MAP_FROM_CLUSTER | EXT4_MAP_BOUNDARY);
+
+ /*
+ * We don't check m_len because extent will be collpased in status
+ * tree. So the m_len might not equal.
+ */
+ if (es_map->m_lblk != map->m_lblk ||
+ es_map->m_flags != map->m_flags ||
+ es_map->m_pblk != map->m_pblk) {
+ printk("ES cache assertation failed for inode: %lu "
+ "es_cached ex [%d/%d/%llu/%x] != "
+ "found ex [%d/%d/%llu/%x] retval %d flags %x\n",
+ inode->i_ino, es_map->m_lblk, es_map->m_len,
+ es_map->m_pblk, es_map->m_flags, map->m_lblk,
+ map->m_len, map->m_pblk, map->m_flags,
+ retval, flags);
+ }
+}
+#endif /* ES_AGGRESSIVE_TEST */
+
/*
* The ext4_map_blocks() function tries to look up the requested blocks,
* and returns if the blocks are already mapped.
@@ -509,6 +562,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
{
struct extent_status es;
int retval;
+#ifdef ES_AGGRESSIVE_TEST
+ struct ext4_map_blocks orig_map;
+
+ memcpy(&orig_map, map, sizeof(*map));
+#endif
map->m_flags = 0;
ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
@@ -531,6 +589,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
} else {
BUG_ON(1);
}
+#ifdef ES_AGGRESSIVE_TEST
+ ext4_map_blocks_es_recheck(handle, inode, map,
+ &orig_map, flags);
+#endif
goto found;
}
@@ -551,6 +613,15 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
int ret;
unsigned long long status;
+#ifdef ES_AGGRESSIVE_TEST
+ if (retval != map->m_len) {
+ printk("ES len assertation failed for inode: %lu "
+ "retval %d != map->m_len %d "
+ "in %s (lookup)\n", inode->i_ino, retval,
+ map->m_len, __func__);
+ }
+#endif
+
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
@@ -643,6 +714,24 @@ found:
int ret;
unsigned long long status;
+#ifdef ES_AGGRESSIVE_TEST
+ if (retval != map->m_len) {
+ printk("ES len assertation failed for inode: %lu "
+ "retval %d != map->m_len %d "
+ "in %s (allocation)\n", inode->i_ino, retval,
+ map->m_len, __func__);
+ }
+#endif
+
+ /*
+ * If the extent has been zeroed out, we don't need to update
+ * extent status tree.
+ */
+ if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
+ ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
+ if (ext4_es_is_written(&es))
+ goto has_zeroout;
+ }
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
@@ -655,6 +744,7 @@ found:
retval = ret;
}
+has_zeroout:
up_write((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
int ret = check_block_validity(inode, map);
@@ -1216,6 +1306,55 @@ static int ext4_journalled_write_end(struct file *file,
}
/*
+ * Reserve a metadata for a single block located at lblock
+ */
+static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
+{
+ int retries = 0;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ unsigned int md_needed;
+ ext4_lblk_t save_last_lblock;
+ int save_len;
+
+ /*
+ * recalculate the amount of metadata blocks to reserve
+ * in order to allocate nrblocks
+ * worse case is one extent per block
+ */
+repeat:
+ spin_lock(&ei->i_block_reservation_lock);
+ /*
+ * ext4_calc_metadata_amount() has side effects, which we have
+ * to be prepared undo if we fail to claim space.
+ */
+ save_len = ei->i_da_metadata_calc_len;
+ save_last_lblock = ei->i_da_metadata_calc_last_lblock;
+ md_needed = EXT4_NUM_B2C(sbi,
+ ext4_calc_metadata_amount(inode, lblock));
+ trace_ext4_da_reserve_space(inode, md_needed);
+
+ /*
+ * We do still charge estimated metadata to the sb though;
+ * we cannot afford to run out of free blocks.
+ */
+ if (ext4_claim_free_clusters(sbi, md_needed, 0)) {
+ ei->i_da_metadata_calc_len = save_len;
+ ei->i_da_metadata_calc_last_lblock = save_last_lblock;
+ spin_unlock(&ei->i_block_reservation_lock);
+ if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
+ cond_resched();
+ goto repeat;
+ }
+ return -ENOSPC;
+ }
+ ei->i_reserved_meta_blocks += md_needed;
+ spin_unlock(&ei->i_block_reservation_lock);
+
+ return 0; /* success */
+}
+
+/*
* Reserve a single cluster located at lblock
*/
static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
@@ -1263,7 +1402,7 @@ repeat:
ei->i_da_metadata_calc_last_lblock = save_last_lblock;
spin_unlock(&ei->i_block_reservation_lock);
if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
- yield();
+ cond_resched();
goto repeat;
}
dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
@@ -1768,6 +1907,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
struct extent_status es;
int retval;
sector_t invalid_block = ~((sector_t) 0xffff);
+#ifdef ES_AGGRESSIVE_TEST
+ struct ext4_map_blocks orig_map;
+
+ memcpy(&orig_map, map, sizeof(*map));
+#endif
if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
invalid_block = ~0;
@@ -1809,6 +1953,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
else
BUG_ON(1);
+#ifdef ES_AGGRESSIVE_TEST
+ ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0);
+#endif
return retval;
}
@@ -1843,8 +1990,11 @@ add_delayed:
* XXX: __block_prepare_write() unmaps passed block,
* is it OK?
*/
- /* If the block was allocated from previously allocated cluster,
- * then we dont need to reserve it again. */
+ /*
+ * If the block was allocated from previously allocated cluster,
+ * then we don't need to reserve it again. However we still need
+ * to reserve metadata for every block we're going to write.
+ */
if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
ret = ext4_da_reserve_space(inode, iblock);
if (ret) {
@@ -1852,6 +2002,13 @@ add_delayed:
retval = ret;
goto out_unlock;
}
+ } else {
+ ret = ext4_da_reserve_metadata(inode, iblock);
+ if (ret) {
+ /* not enough space to reserve */
+ retval = ret;
+ goto out_unlock;
+ }
}
ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
@@ -1873,6 +2030,15 @@ add_delayed:
int ret;
unsigned long long status;
+#ifdef ES_AGGRESSIVE_TEST
+ if (retval != map->m_len) {
+ printk("ES len assertation failed for inode: %lu "
+ "retval %d != map->m_len %d "
+ "in %s (lookup)\n", inode->i_ino, retval,
+ map->m_len, __func__);
+ }
+#endif
+
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
@@ -2908,8 +3074,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
trace_ext4_releasepage(page);
- WARN_ON(PageChecked(page));
- if (!page_has_buffers(page))
+ /* Page has dirty journalled data -> cannot release */
+ if (PageChecked(page))
return 0;
if (journal)
return jbd2_journal_try_to_free_buffers(journal, page, wait);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 7bb713a46fe4..ee6614bdb639 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2804,8 +2804,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi,
ac->ac_b_ex.fe_group);
- atomic_sub(ac->ac_b_ex.fe_len,
- &sbi->s_flex_groups[flex_group].free_clusters);
+ atomic64_sub(ac->ac_b_ex.fe_len,
+ &sbi->s_flex_groups[flex_group].free_clusters);
}
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -3692,11 +3692,7 @@ repeat:
if (free < needed && busy) {
busy = 0;
ext4_unlock_group(sb, group);
- /*
- * Yield the CPU here so that we don't get soft lockup
- * in non preempt case.
- */
- yield();
+ cond_resched();
goto repeat;
}
@@ -4246,7 +4242,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
/* let others to free the space */
- yield();
+ cond_resched();
ar->len = ar->len >> 1;
}
if (!ar->len) {
@@ -4464,7 +4460,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *bitmap_bh = NULL;
struct super_block *sb = inode->i_sb;
struct ext4_group_desc *gdp;
- unsigned long freed = 0;
unsigned int overflow;
ext4_grpblk_t bit;
struct buffer_head *gd_bh;
@@ -4666,14 +4661,12 @@ do_more:
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
- atomic_add(count_clusters,
- &sbi->s_flex_groups[flex_group].free_clusters);
+ atomic64_add(count_clusters,
+ &sbi->s_flex_groups[flex_group].free_clusters);
}
ext4_mb_unload_buddy(&e4b);
- freed += count;
-
if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
@@ -4811,8 +4804,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
- atomic_add(EXT4_NUM_B2C(sbi, blocks_freed),
- &sbi->s_flex_groups[flex_group].free_clusters);
+ atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed),
+ &sbi->s_flex_groups[flex_group].free_clusters);
}
ext4_mb_unload_buddy(&e4b);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 4e81d47aa8cb..33e1c086858b 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -32,16 +32,18 @@
*/
static inline int
get_ext_path(struct inode *inode, ext4_lblk_t lblock,
- struct ext4_ext_path **path)
+ struct ext4_ext_path **orig_path)
{
int ret = 0;
+ struct ext4_ext_path *path;
- *path = ext4_ext_find_extent(inode, lblock, *path);
- if (IS_ERR(*path)) {
- ret = PTR_ERR(*path);
- *path = NULL;
- } else if ((*path)[ext_depth(inode)].p_ext == NULL)
+ path = ext4_ext_find_extent(inode, lblock, *orig_path);
+ if (IS_ERR(path))
+ ret = PTR_ERR(path);
+ else if (path[ext_depth(inode)].p_ext == NULL)
ret = -ENODATA;
+ else
+ *orig_path = path;
return ret;
}
@@ -611,24 +613,25 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
{
struct ext4_ext_path *path = NULL;
struct ext4_extent *ext;
+ int ret = 0;
ext4_lblk_t last = from + count;
while (from < last) {
*err = get_ext_path(inode, from, &path);
if (*err)
- return 0;
+ goto out;
ext = path[ext_depth(inode)].p_ext;
- if (!ext) {
- ext4_ext_drop_refs(path);
- return 0;
- }
- if (uninit != ext4_ext_is_uninitialized(ext)) {
- ext4_ext_drop_refs(path);
- return 0;
- }
+ if (uninit != ext4_ext_is_uninitialized(ext))
+ goto out;
from += ext4_ext_get_actual_len(ext);
ext4_ext_drop_refs(path);
}
- return 1;
+ ret = 1;
+out:
+ if (path) {
+ ext4_ext_drop_refs(path);
+ kfree(path);
+ }
+ return ret;
}
/**
@@ -666,6 +669,14 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
int replaced_count = 0;
int dext_alen;
+ *err = ext4_es_remove_extent(orig_inode, from, count);
+ if (*err)
+ goto out;
+
+ *err = ext4_es_remove_extent(donor_inode, from, count);
+ if (*err)
+ goto out;
+
/* Get the original extent for the block "orig_off" */
*err = get_ext_path(orig_inode, orig_off, &orig_path);
if (*err)
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 809b31003ecc..047a6de04a0a 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -50,11 +50,21 @@ void ext4_exit_pageio(void)
kmem_cache_destroy(io_page_cachep);
}
-void ext4_ioend_wait(struct inode *inode)
+/*
+ * This function is called by ext4_evict_inode() to make sure there is
+ * no more pending I/O completion work left to do.
+ */
+void ext4_ioend_shutdown(struct inode *inode)
{
wait_queue_head_t *wq = ext4_ioend_wq(inode);
wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
+ /*
+ * We need to make sure the work structure is finished being
+ * used before we let the inode get destroyed.
+ */
+ if (work_pending(&EXT4_I(inode)->i_unwritten_work))
+ cancel_work_sync(&EXT4_I(inode)->i_unwritten_work);
}
static void put_io_page(struct ext4_io_page *io_page)
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index b2c8ee56eb98..c169477a62c9 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1360,8 +1360,8 @@ static void ext4_update_super(struct super_block *sb,
sbi->s_log_groups_per_flex) {
ext4_group_t flex_group;
flex_group = ext4_flex_group(sbi, group_data[0].group);
- atomic_add(EXT4_NUM_B2C(sbi, free_blocks),
- &sbi->s_flex_groups[flex_group].free_clusters);
+ atomic64_add(EXT4_NUM_B2C(sbi, free_blocks),
+ &sbi->s_flex_groups[flex_group].free_clusters);
atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count,
&sbi->s_flex_groups[flex_group].free_inodes);
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b3818b48f418..5d6d53578124 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1927,8 +1927,8 @@ static int ext4_fill_flex_info(struct super_block *sb)
flex_group = ext4_flex_group(sbi, i);
atomic_add(ext4_free_inodes_count(sb, gdp),
&sbi->s_flex_groups[flex_group].free_inodes);
- atomic_add(ext4_free_group_clusters(sb, gdp),
- &sbi->s_flex_groups[flex_group].free_clusters);
+ atomic64_add(ext4_free_group_clusters(sb, gdp),
+ &sbi->s_flex_groups[flex_group].free_clusters);
atomic_add(ext4_used_dirs_count(sb, gdp),
&sbi->s_flex_groups[flex_group].used_dirs);
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index d6ee5aed56b1..325bc019ed88 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1065,9 +1065,12 @@ out:
void jbd2_journal_set_triggers(struct buffer_head *bh,
struct jbd2_buffer_trigger_type *type)
{
- struct journal_head *jh = bh2jh(bh);
+ struct journal_head *jh = jbd2_journal_grab_journal_head(bh);
+ if (WARN_ON(!jh))
+ return;
jh->b_triggers = type;
+ jbd2_journal_put_journal_head(jh);
}
void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
@@ -1119,17 +1122,18 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
{
transaction_t *transaction = handle->h_transaction;
journal_t *journal = transaction->t_journal;
- struct journal_head *jh = bh2jh(bh);
+ struct journal_head *jh;
int ret = 0;
- jbd_debug(5, "journal_head %p\n", jh);
- JBUFFER_TRACE(jh, "entry");
if (is_handle_aborted(handle))
goto out;
- if (!buffer_jbd(bh)) {
+ jh = jbd2_journal_grab_journal_head(bh);
+ if (!jh) {
ret = -EUCLEAN;
goto out;
}
+ jbd_debug(5, "journal_head %p\n", jh);
+ JBUFFER_TRACE(jh, "entry");
jbd_lock_bh_state(bh);
@@ -1220,6 +1224,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
spin_unlock(&journal->j_list_lock);
out_unlock_bh:
jbd_unlock_bh_state(bh);
+ jbd2_journal_put_journal_head(jh);
out:
JBUFFER_TRACE(jh, "exit");
WARN_ON(ret); /* All errors are bugs, so dump the stack */
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index a86aebc9ba7c..869116c2afbe 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -446,9 +446,10 @@ static const struct file_operations proc_reg_file_ops_no_compat = {
struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
{
- struct inode *inode = iget_locked(sb, de->low_ino);
+ struct inode *inode = new_inode_pseudo(sb);
- if (inode && (inode->i_state & I_NEW)) {
+ if (inode) {
+ inode->i_ino = de->low_ino;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
PROC_I(inode)->pde = de;
@@ -476,7 +477,6 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
inode->i_fop = de->proc_fops;
}
}
- unlock_new_inode(inode);
} else
pde_put(de);
return inode;
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 4e8f0df82d02..8459b5d8cb71 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1334,6 +1334,12 @@ _xfs_buf_ioapply(
int size;
int i;
+ /*
+ * Make sure we capture only current IO errors rather than stale errors
+ * left over from previous use of the buffer (e.g. failed readahead).
+ */
+ bp->b_error = 0;
+
if (bp->b_flags & XBF_WRITE) {
if (bp->b_flags & XBF_SYNCIO)
rw = WRITE_SYNC;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 912d83d8860a..5a30dd899d2b 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -325,7 +325,7 @@ xfs_iomap_eof_want_preallocate(
* rather than falling short due to things like stripe unit/width alignment of
* real extents.
*/
-STATIC int
+STATIC xfs_fsblock_t
xfs_iomap_eof_prealloc_initial_size(
struct xfs_mount *mp,
struct xfs_inode *ip,
@@ -413,7 +413,7 @@ xfs_iomap_prealloc_size(
* have a large file on a small filesystem and the above
* lowspace thresholds are smaller than MAXEXTLEN.
*/
- while (alloc_blocks >= freesp)
+ while (alloc_blocks && alloc_blocks >= freesp)
alloc_blocks >>= 4;
}