diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2021-10-11 12:03:19 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:09:15 -0400 |
commit | 2027875bd8318171159495c948461eae2f84936d (patch) | |
tree | 891d2abcd3f50b2bd3bf9b3b60cd325aea5fad66 | |
parent | f3b1e1937973624d3bc5f3ba0824e228ae256b88 (diff) |
bcachefs: Add BCH_SUBVOLUME_UNLINKED
Snapshot deletion needs to become a multi step process, where we unlink,
then tear down the page cache, then delete the subvolume - the deleting
flag is equivalent to an inode with i_nlink = 0.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r-- | fs/bcachefs/bcachefs.h | 4 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs_format.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/fs-common.c | 30 | ||||
-rw-r--r-- | fs/bcachefs/fs-common.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/fs-ioctl.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/fs.c | 11 | ||||
-rw-r--r-- | fs/bcachefs/fs.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/fsck.c | 18 | ||||
-rw-r--r-- | fs/bcachefs/inode.c | 6 | ||||
-rw-r--r-- | fs/bcachefs/subvolume.c | 182 | ||||
-rw-r--r-- | fs/bcachefs/subvolume.h | 5 | ||||
-rw-r--r-- | fs/bcachefs/subvolume_types.h | 11 |
12 files changed, 223 insertions, 51 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 1608faae0d0b..567270015008 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -353,6 +353,7 @@ enum bch_time_stats { #include "quota_types.h" #include "rebalance_types.h" #include "replicas_types.h" +#include "subvolume_types.h" #include "super_types.h" /* Number of nodes btree coalesce will try to coalesce at once */ @@ -657,6 +658,9 @@ struct bch_fs { struct bch_snapshot_table __rcu *snapshot_table; struct mutex snapshot_table_lock; struct work_struct snapshot_delete_work; + struct work_struct snapshot_wait_for_pagecache_and_delete_work; + struct snapshot_id_list snapshots_unlinked; + struct mutex snapshots_unlinked_lock; /* BTREE CACHE */ struct bio_set btree_bio; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 481bf643bd6f..8e1423b138a6 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -974,6 +974,7 @@ LE32_BITMASK(BCH_SUBVOLUME_RO, struct bch_subvolume, flags, 0, 1) * can delete it (or whether it should just be rm -rf'd) */ LE32_BITMASK(BCH_SUBVOLUME_SNAP, struct bch_subvolume, flags, 1, 2) +LE32_BITMASK(BCH_SUBVOLUME_UNLINKED, struct bch_subvolume, flags, 2, 3) /* Snapshots */ diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c index c49de741e1e3..5f3429e99115 100644 --- a/fs/bcachefs/fs-common.c +++ b/fs/bcachefs/fs-common.c @@ -239,7 +239,7 @@ int bch2_unlink_trans(struct btree_trans *trans, struct bch_inode_unpacked *dir_u, struct bch_inode_unpacked *inode_u, const struct qstr *name, - int deleting_snapshot) + bool deleting_snapshot) { struct bch_fs *c = trans->c; struct btree_iter dir_iter = { NULL }; @@ -267,35 +267,19 @@ int bch2_unlink_trans(struct btree_trans *trans, if (ret) goto err; - if (deleting_snapshot <= 0 && S_ISDIR(inode_u->bi_mode)) { + if (!deleting_snapshot && S_ISDIR(inode_u->bi_mode)) { ret = bch2_empty_dir_trans(trans, inum); if (ret) goto err; } - if (deleting_snapshot < 0 && - inode_u->bi_subvol) { - struct bch_subvolume s; - - ret = bch2_subvolume_get(trans, inode_u->bi_subvol, true, - BTREE_ITER_CACHED| - BTREE_ITER_WITH_UPDATES, - &s); - if (ret) - goto err; - - if (BCH_SUBVOLUME_SNAP(&s)) - deleting_snapshot = 1; + if (deleting_snapshot && !inode_u->bi_subvol) { + ret = -ENOENT; + goto err; } - if (deleting_snapshot == 1) { - if (!inode_u->bi_subvol) { - ret = -ENOENT; - goto err; - } - - ret = bch2_subvolume_delete(trans, inode_u->bi_subvol, - deleting_snapshot); + if (deleting_snapshot || inode_u->bi_subvol) { + ret = bch2_subvolume_unlink(trans, inode_u->bi_subvol); if (ret) goto err; diff --git a/fs/bcachefs/fs-common.h b/fs/bcachefs/fs-common.h index 9bb0a9676147..dde237859514 100644 --- a/fs/bcachefs/fs-common.h +++ b/fs/bcachefs/fs-common.h @@ -26,7 +26,7 @@ int bch2_link_trans(struct btree_trans *, int bch2_unlink_trans(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, struct bch_inode_unpacked *, - const struct qstr *, int); + const struct qstr *, bool); int bch2_rename_trans(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index a12b591ec9ca..de94895ace9f 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -441,7 +441,7 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp, dir = path.dentry->d_parent->d_inode; - ret = __bch2_unlink(dir, path.dentry, 1); + ret = __bch2_unlink(dir, path.dentry, true); if (!ret) { fsnotify_rmdir(dir, path.dentry); d_delete(path.dentry); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 334cd335ff11..c325e5c4325c 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -490,7 +490,7 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir, } int __bch2_unlink(struct inode *vdir, struct dentry *dentry, - int deleting_snapshot) + bool deleting_snapshot) { struct bch_fs *c = vdir->i_sb->s_fs_info; struct bch_inode_info *dir = to_bch_ei(vdir); @@ -527,7 +527,7 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, static int bch2_unlink(struct inode *vdir, struct dentry *dentry) { - return __bch2_unlink(vdir, dentry, -1); + return __bch2_unlink(vdir, dentry, false); } static int bch2_symlink(struct mnt_idmap *idmap, @@ -1292,6 +1292,12 @@ static int bch2_vfs_write_inode(struct inode *vinode, return ret; } +static int bch2_drop_inode(struct inode *vinode) +{ + + return generic_drop_inode(vinode); +} + static void bch2_evict_inode(struct inode *vinode) { struct bch_fs *c = vinode->i_sb->s_fs_info; @@ -1496,6 +1502,7 @@ static const struct super_operations bch_super_operations = { .alloc_inode = bch2_alloc_inode, .destroy_inode = bch2_destroy_inode, .write_inode = bch2_vfs_write_inode, + .drop_inode = bch2_drop_inode, .evict_inode = bch2_evict_inode, .sync_fs = bch2_sync_fs, .statfs = bch2_statfs, diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index 40898c4d197b..2616b15eb51c 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -183,7 +183,7 @@ int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *, int bch2_setattr_nonsize(struct mnt_idmap *, struct bch_inode_info *, struct iattr *); -int __bch2_unlink(struct inode *, struct dentry *, int); +int __bch2_unlink(struct inode *, struct dentry *, bool); void bch2_vfs_exit(void); int bch2_vfs_init(void); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index a61d380a47b6..6b3eecdef81a 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -256,7 +256,7 @@ retry: /* Subvolume root? */ if (inode_u.bi_subvol) { - ret = bch2_subvolume_delete(trans, inode_u.bi_subvol, -1); + ret = bch2_subvolume_delete(trans, inode_u.bi_subvol); if (ret) goto err; } @@ -992,12 +992,28 @@ static int check_subvols(struct bch_fs *c) struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; + struct bkey_s_c_subvolume subvol; int ret; bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); for_each_btree_key(&trans, iter, BTREE_ID_subvolumes, POS_MIN, 0, k, ret) { + if (k.k->type != KEY_TYPE_subvolume) + continue; + + subvol = bkey_s_c_to_subvolume(k); + + if (BCH_SUBVOLUME_UNLINKED(subvol.v)) { + ret = __bch2_trans_do(&trans, NULL, NULL, + BTREE_INSERT_LAZY_RW, + bch2_subvolume_delete(&trans, iter.pos.offset)); + if (ret) { + bch_err(c, "error deleting subvolume %llu: %i", + iter.pos.offset, ret); + break; + } + } } bch2_trans_iter_exit(&trans, &iter); diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 7fccf842a46b..3ae321a99cee 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -709,11 +709,7 @@ retry: bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u); /* Subvolume root? */ - if (inode_u.bi_subvol) { - ret = bch2_subvolume_delete(&trans, inode_u.bi_subvol, -1); - if (ret) - goto err; - } + BUG_ON(inode_u.bi_subvol); bkey_inode_generation_init(&delete.k_i); delete.k.p = iter.pos; diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 9bd8d61c96fe..58cda98989b1 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -4,6 +4,7 @@ #include "btree_key_cache.h" #include "btree_update.h" #include "error.h" +#include "fs.h" #include "subvolume.h" /* Snapshot tree: */ @@ -541,13 +542,6 @@ err: return ret; } -/* List of snapshot IDs that are being deleted: */ -struct snapshot_id_list { - u32 nr; - u32 size; - u32 *d; -}; - static bool snapshot_list_has_id(struct snapshot_id_list *s, u32 id) { unsigned i; @@ -819,9 +813,11 @@ int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvol, return ret; } -/* XXX: mark snapshot id for deletion, walk btree and delete: */ -int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid, - int deleting_snapshot) +/* + * Delete subvolume, mark snapshot ID as deleted, queue up snapshot + * deletion/cleanup: + */ +int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) { struct btree_iter iter; struct bkey_s_c k; @@ -849,12 +845,6 @@ int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid, subvol = bkey_s_c_to_subvolume(k); snapid = le32_to_cpu(subvol.v->snapshot); - if (deleting_snapshot >= 0 && - deleting_snapshot != BCH_SUBVOLUME_SNAP(subvol.v)) { - ret = -ENOENT; - goto err; - } - delete = bch2_trans_kmalloc(trans, sizeof(*delete)); ret = PTR_ERR_OR_ZERO(delete); if (ret) @@ -880,6 +870,163 @@ err: return ret; } +static void bch2_evict_subvolume_inodes(struct bch_fs *c, + struct snapshot_id_list *s) +{ + struct super_block *sb = c->vfs_sb; + struct inode *inode; + + spin_lock(&sb->s_inode_list_lock); + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) || + (inode->i_state & I_FREEING)) + continue; + + d_mark_dontcache(inode); + d_prune_aliases(inode); + } + spin_unlock(&sb->s_inode_list_lock); +again: + cond_resched(); + spin_lock(&sb->s_inode_list_lock); + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) || + (inode->i_state & I_FREEING)) + continue; + + if (!(inode->i_state & I_DONTCACHE)) { + d_mark_dontcache(inode); + d_prune_aliases(inode); + } + + spin_lock(&inode->i_lock); + if (snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) && + !(inode->i_state & I_FREEING)) { + wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_NEW); + DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); + prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); + spin_unlock(&inode->i_lock); + spin_unlock(&sb->s_inode_list_lock); + schedule(); + finish_wait(wq, &wait.wq_entry); + goto again; + } + + spin_unlock(&inode->i_lock); + } + spin_unlock(&sb->s_inode_list_lock); +} + +void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work) +{ + struct bch_fs *c = container_of(work, struct bch_fs, + snapshot_wait_for_pagecache_and_delete_work); + struct snapshot_id_list s; + u32 *id; + int ret = 0; + + while (!ret) { + mutex_lock(&c->snapshots_unlinked_lock); + s = c->snapshots_unlinked; + memset(&c->snapshots_unlinked, 0, sizeof(c->snapshots_unlinked)); + mutex_unlock(&c->snapshots_unlinked_lock); + + if (!s.nr) + break; + + bch2_evict_subvolume_inodes(c, &s); + + for (id = s.d; id < s.d + s.nr; id++) { + ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL, + bch2_subvolume_delete(&trans, *id)); + if (ret) { + bch_err(c, "error %i deleting subvolume %u", ret, *id); + break; + } + } + + kfree(s.d); + } + + percpu_ref_put(&c->writes); +} + +struct subvolume_unlink_hook { + struct btree_trans_commit_hook h; + u32 subvol; +}; + +int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans, + struct btree_trans_commit_hook *_h) +{ + struct subvolume_unlink_hook *h = container_of(_h, struct subvolume_unlink_hook, h); + struct bch_fs *c = trans->c; + int ret = 0; + + mutex_lock(&c->snapshots_unlinked_lock); + if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol)) + ret = snapshot_id_add(&c->snapshots_unlinked, h->subvol); + mutex_unlock(&c->snapshots_unlinked_lock); + + if (ret) + return ret; + + if (unlikely(!percpu_ref_tryget(&c->writes))) + return -EROFS; + + if (!queue_work(system_long_wq, &c->snapshot_wait_for_pagecache_and_delete_work)) + percpu_ref_put(&c->writes); + return 0; +} + +int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid) +{ + struct btree_iter iter; + struct bkey_s_c k; + struct bkey_i_subvolume *n; + struct subvolume_unlink_hook *h; + int ret = 0; + + bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolumes, + POS(0, subvolid), + BTREE_ITER_CACHED| + BTREE_ITER_INTENT); + k = bch2_btree_iter_peek_slot(&iter); + ret = bkey_err(k); + if (ret) + goto err; + + if (k.k->type != KEY_TYPE_subvolume) { + bch2_fs_inconsistent(trans->c, "missing subvolume %u", subvolid); + ret = -EIO; + goto err; + } + + n = bch2_trans_kmalloc(trans, sizeof(*n)); + ret = PTR_ERR_OR_ZERO(n); + if (ret) + goto err; + + bkey_reassemble(&n->k_i, k); + SET_BCH_SUBVOLUME_UNLINKED(&n->v, true); + + ret = bch2_trans_update(trans, &iter, &n->k_i, 0); + if (ret) + goto err; + + h = bch2_trans_kmalloc(trans, sizeof(*h)); + ret = PTR_ERR_OR_ZERO(h); + if (ret) + goto err; + + h->h.fn = bch2_subvolume_wait_for_pagecache_and_delete_hook; + h->subvol = subvolid; + bch2_trans_commit_hook(trans, &h->h); +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + int bch2_subvolume_create(struct btree_trans *trans, u64 inode, u32 src_subvolid, u32 *new_subvolid, @@ -977,5 +1124,8 @@ err: int bch2_fs_subvolumes_init(struct bch_fs *c) { INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work); + INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work, + bch2_subvolume_wait_for_pagecache_and_delete); + mutex_init(&c->snapshots_unlinked_lock); return 0; } diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index f98c8c0dbea2..45234c9de0f6 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -2,6 +2,8 @@ #ifndef _BCACHEFS_SUBVOLUME_H #define _BCACHEFS_SUBVOLUME_H +#include "subvolume_types.h" + void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); const char *bch2_snapshot_invalid(const struct bch_fs *, struct bkey_s_c); @@ -108,7 +110,8 @@ int bch2_subvolume_get(struct btree_trans *, unsigned, bool, int, struct bch_subvolume *); int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *); -int bch2_subvolume_delete(struct btree_trans *, u32, int); +int bch2_subvolume_delete(struct btree_trans *, u32); +int bch2_subvolume_unlink(struct btree_trans *, u32); int bch2_subvolume_create(struct btree_trans *, u64, u32, u32 *, u32 *, bool); diff --git a/fs/bcachefs/subvolume_types.h b/fs/bcachefs/subvolume_types.h new file mode 100644 index 000000000000..9410b9587591 --- /dev/null +++ b/fs/bcachefs/subvolume_types.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_SUBVOLUME_TYPES_H +#define _BCACHEFS_SUBVOLUME_TYPES_H + +struct snapshot_id_list { + u32 nr; + u32 size; + u32 *d; +}; + +#endif /* _BCACHEFS_SUBVOLUME_TYPES_H */ |