summaryrefslogtreecommitdiff
path: root/fs/xfs/scrub
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-01-31 10:18:00 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-01-31 10:18:00 -0800
commit20c59c71ae711aff845eef640b25935bc9578c93 (patch)
treebba42c29760903c293fdd2e76c2a5ed078500820 /fs/xfs/scrub
parent5a87e37ee0943afe11504299e4b87d2e4d8d88d5 (diff)
parent1e369b0e199bbfbab5218e1c1443d839700d8884 (diff)
Merge tag 'xfs-4.16-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs updates from Darrick Wong: "This merge cycle, we're again some substantive changes to XFS. Metadata verifiers have been restructured to provide more detail about which part of a metadata structure failed checks, and we've enhanced the new online fsck feature to cross-reference extent allocation information with the other metadata structures. With this pull, the metadata verification part of online fsck is more or less finished, though the feature is still experimental and still disabled by default. We're also preparing to remove the EXPERIMENTAL tag from a couple of features this cycle. This week we're committing a bunch of space accounting fixes for reflink and removing the EXPERIMENTAL tag from reflink; I anticipate that we'll be ready to do the same for the reverse mapping feature next week. (I don't have any pending fixes for rmap; however I wish to remove the tags one at a time.) This giant pile of patches has been run through a full xfstests run over the weekend and through a quick xfstests run against this morning's master, with no major failures reported. Let me know if there's any merge problems -- git merge reported that one of our patches touched the same function as the i_version series, but it resolved things cleanly. Summary: - Log faulting code locations when verifiers fail, for improved diagnosis of corrupt filesystems. - Implement metadata verifiers for local format inode fork data. - Online scrub now cross-references metadata records with other metadata. - Refactor the fs geometry ioctl generation functions. - Harden various metadata verifiers. - Fix various accounting problems. - Fix uncancelled transactions leaking when xattr functions fail. - Prevent the copy-on-write speculative preallocation garbage collector from racing with writeback. - Emit log reservation type information as trace data so that we can compare against xfsprogs. - Fix some erroneous asserts in the online scrub code. - Clean up the transaction reservation calculations. - Fix various minor bugs in online scrub. - Log complaints about mixed dio/buffered writes once per day and less noisily than before. - Refactor buffer log item lists to use list_head. - Break PNFS leases before reflinking blocks. - Reduce lock contention on reflink source files. - Fix some quota accounting problems with reflink. - Fix a serious corruption problem in the direct cow write code where we fed bad iomaps to the vfs iomap consumers. - Various other refactorings. - Remove EXPERIMENTAL tag from reflink!" * tag 'xfs-4.16-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (94 commits) xfs: remove experimental tag for reflinks xfs: don't screw up direct writes when freesp is fragmented xfs: check reflink allocation mappings iomap: warn on zero-length mappings xfs: treat CoW fork operations as delalloc for quota accounting xfs: only grab shared inode locks for source file during reflink xfs: allow xfs_lock_two_inodes to take different EXCL/SHARED modes xfs: reflink should break pnfs leases before sharing blocks xfs: don't clobber inobt/finobt cursors when xref with rmap xfs: skip CoW writes past EOF when writeback races with truncate xfs: preserve i_rdev when recycling a reclaimable inode xfs: refactor accounting updates out of xfs_bmap_btalloc xfs: refactor inode verifier corruption error printing xfs: make tracepoint inode number format consistent xfs: always zero di_flags2 when we free the inode xfs: call xfs_qm_dqattach before performing reflink operations xfs: bmap code cleanup Use list_head infra-structure for buffer's log items list Split buffer's b_fspriv field Get rid of xfs_buf_log_item_t typedef ...
Diffstat (limited to 'fs/xfs/scrub')
-rw-r--r--fs/xfs/scrub/agheader.c340
-rw-r--r--fs/xfs/scrub/alloc.c81
-rw-r--r--fs/xfs/scrub/bmap.c219
-rw-r--r--fs/xfs/scrub/btree.c184
-rw-r--r--fs/xfs/scrub/btree.h9
-rw-r--r--fs/xfs/scrub/common.c255
-rw-r--r--fs/xfs/scrub/common.h23
-rw-r--r--fs/xfs/scrub/dabtree.c22
-rw-r--r--fs/xfs/scrub/dir.c44
-rw-r--r--fs/xfs/scrub/ialloc.c194
-rw-r--r--fs/xfs/scrub/inode.c178
-rw-r--r--fs/xfs/scrub/parent.c8
-rw-r--r--fs/xfs/scrub/quota.c7
-rw-r--r--fs/xfs/scrub/refcount.c420
-rw-r--r--fs/xfs/scrub/rmap.c123
-rw-r--r--fs/xfs/scrub/rtbitmap.c35
-rw-r--r--fs/xfs/scrub/scrub.c203
-rw-r--r--fs/xfs/scrub/scrub.h37
-rw-r--r--fs/xfs/scrub/trace.h44
19 files changed, 2237 insertions, 189 deletions
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index 2a9b4f9e93c6..fd975524f460 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -32,30 +32,17 @@
#include "xfs_inode.h"
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
+#include "xfs_rmap.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
/*
- * Set up scrub to check all the static metadata in each AG.
- * This means the SB, AGF, AGI, and AGFL headers.
+ * Walk all the blocks in the AGFL. The fn function can return any negative
+ * error code or XFS_BTREE_QUERY_RANGE_ABORT.
*/
int
-xfs_scrub_setup_ag_header(
- struct xfs_scrub_context *sc,
- struct xfs_inode *ip)
-{
- struct xfs_mount *mp = sc->mp;
-
- if (sc->sm->sm_agno >= mp->m_sb.sb_agcount ||
- sc->sm->sm_ino || sc->sm->sm_gen)
- return -EINVAL;
- return xfs_scrub_setup_fs(sc, ip);
-}
-
-/* Walk all the blocks in the AGFL. */
-int
xfs_scrub_walk_agfl(
struct xfs_scrub_context *sc,
int (*fn)(struct xfs_scrub_context *,
@@ -115,6 +102,36 @@ xfs_scrub_walk_agfl(
/* Superblock */
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_superblock_xref(
+ struct xfs_scrub_context *sc,
+ struct xfs_buf *bp)
+{
+ struct xfs_owner_info oinfo;
+ struct xfs_mount *mp = sc->mp;
+ xfs_agnumber_t agno = sc->sm->sm_agno;
+ xfs_agblock_t agbno;
+ int error;
+
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ agbno = XFS_SB_BLOCK(mp);
+
+ error = xfs_scrub_ag_init(sc, agno, &sc->sa);
+ if (!xfs_scrub_xref_process_error(sc, agno, agbno, &error))
+ return;
+
+ xfs_scrub_xref_is_used_space(sc, agbno, 1);
+ xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
+ xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
+ xfs_scrub_xref_is_not_shared(sc, agbno, 1);
+
+ /* scrub teardown will take care of sc->sa for us */
+}
+
/*
* Scrub the filesystem superblock.
*
@@ -143,6 +160,22 @@ xfs_scrub_superblock(
error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops);
+ /*
+ * The superblock verifier can return several different error codes
+ * if it thinks the superblock doesn't look right. For a mount these
+ * would all get bounced back to userspace, but if we're here then the
+ * fs mounted successfully, which means that this secondary superblock
+ * is simply incorrect. Treat all these codes the same way we treat
+ * any corruption.
+ */
+ switch (error) {
+ case -EINVAL: /* also -EWRONGFS */
+ case -ENOSYS:
+ case -EFBIG:
+ error = -EFSCORRUPTED;
+ default:
+ break;
+ }
if (!xfs_scrub_process_error(sc, agno, XFS_SB_BLOCK(mp), &error))
return error;
@@ -387,11 +420,175 @@ xfs_scrub_superblock(
BBTOB(bp->b_length) - sizeof(struct xfs_dsb)))
xfs_scrub_block_set_corrupt(sc, bp);
+ xfs_scrub_superblock_xref(sc, bp);
+
return error;
}
/* AGF */
+/* Tally freespace record lengths. */
+STATIC int
+xfs_scrub_agf_record_bno_lengths(
+ struct xfs_btree_cur *cur,
+ struct xfs_alloc_rec_incore *rec,
+ void *priv)
+{
+ xfs_extlen_t *blocks = priv;
+
+ (*blocks) += rec->ar_blockcount;
+ return 0;
+}
+
+/* Check agf_freeblks */
+static inline void
+xfs_scrub_agf_xref_freeblks(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+ xfs_extlen_t blocks = 0;
+ int error;
+
+ if (!sc->sa.bno_cur)
+ return;
+
+ error = xfs_alloc_query_all(sc->sa.bno_cur,
+ xfs_scrub_agf_record_bno_lengths, &blocks);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur))
+ return;
+ if (blocks != be32_to_cpu(agf->agf_freeblks))
+ xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
+}
+
+/* Cross reference the AGF with the cntbt (freespace by length btree) */
+static inline void
+xfs_scrub_agf_xref_cntbt(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+ xfs_agblock_t agbno;
+ xfs_extlen_t blocks;
+ int have;
+ int error;
+
+ if (!sc->sa.cnt_cur)
+ return;
+
+ /* Any freespace at all? */
+ error = xfs_alloc_lookup_le(sc->sa.cnt_cur, 0, -1U, &have);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur))
+ return;
+ if (!have) {
+ if (agf->agf_freeblks != be32_to_cpu(0))
+ xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
+ return;
+ }
+
+ /* Check agf_longest */
+ error = xfs_alloc_get_rec(sc->sa.cnt_cur, &agbno, &blocks, &have);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur))
+ return;
+ if (!have || blocks != be32_to_cpu(agf->agf_longest))
+ xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
+}
+
+/* Check the btree block counts in the AGF against the btrees. */
+STATIC void
+xfs_scrub_agf_xref_btreeblks(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+ struct xfs_mount *mp = sc->mp;
+ xfs_agblock_t blocks;
+ xfs_agblock_t btreeblks;
+ int error;
+
+ /* Check agf_rmap_blocks; set up for agf_btreeblks check */
+ if (sc->sa.rmap_cur) {
+ error = xfs_btree_count_blocks(sc->sa.rmap_cur, &blocks);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+ return;
+ btreeblks = blocks - 1;
+ if (blocks != be32_to_cpu(agf->agf_rmap_blocks))
+ xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
+ } else {
+ btreeblks = 0;
+ }
+
+ /*
+ * No rmap cursor; we can't xref if we have the rmapbt feature.
+ * We also can't do it if we're missing the free space btree cursors.
+ */
+ if ((xfs_sb_version_hasrmapbt(&mp->m_sb) && !sc->sa.rmap_cur) ||
+ !sc->sa.bno_cur || !sc->sa.cnt_cur)
+ return;
+
+ /* Check agf_btreeblks */
+ error = xfs_btree_count_blocks(sc->sa.bno_cur, &blocks);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur))
+ return;
+ btreeblks += blocks - 1;
+
+ error = xfs_btree_count_blocks(sc->sa.cnt_cur, &blocks);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur))
+ return;
+ btreeblks += blocks - 1;
+
+ if (btreeblks != be32_to_cpu(agf->agf_btreeblks))
+ xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
+}
+
+/* Check agf_refcount_blocks against tree size */
+static inline void
+xfs_scrub_agf_xref_refcblks(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+ xfs_agblock_t blocks;
+ int error;
+
+ if (!sc->sa.refc_cur)
+ return;
+
+ error = xfs_btree_count_blocks(sc->sa.refc_cur, &blocks);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
+ return;
+ if (blocks != be32_to_cpu(agf->agf_refcount_blocks))
+ xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
+}
+
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_agf_xref(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_owner_info oinfo;
+ struct xfs_mount *mp = sc->mp;
+ xfs_agblock_t agbno;
+ int error;
+
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ agbno = XFS_AGF_BLOCK(mp);
+
+ error = xfs_scrub_ag_btcur_init(sc, &sc->sa);
+ if (error)
+ return;
+
+ xfs_scrub_xref_is_used_space(sc, agbno, 1);
+ xfs_scrub_agf_xref_freeblks(sc);
+ xfs_scrub_agf_xref_cntbt(sc);
+ xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
+ xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
+ xfs_scrub_agf_xref_btreeblks(sc);
+ xfs_scrub_xref_is_not_shared(sc, agbno, 1);
+ xfs_scrub_agf_xref_refcblks(sc);
+
+ /* scrub teardown will take care of sc->sa for us */
+}
+
/* Scrub the AGF. */
int
xfs_scrub_agf(
@@ -414,6 +611,7 @@ xfs_scrub_agf(
&sc->sa.agf_bp, &sc->sa.agfl_bp);
if (!xfs_scrub_process_error(sc, agno, XFS_AGF_BLOCK(sc->mp), &error))
goto out;
+ xfs_scrub_buffer_recheck(sc, sc->sa.agf_bp);
agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
@@ -470,6 +668,7 @@ xfs_scrub_agf(
if (agfl_count != 0 && fl_count != agfl_count)
xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
+ xfs_scrub_agf_xref(sc);
out:
return error;
}
@@ -477,11 +676,28 @@ out:
/* AGFL */
struct xfs_scrub_agfl_info {
+ struct xfs_owner_info oinfo;
unsigned int sz_entries;
unsigned int nr_entries;
xfs_agblock_t *entries;
};
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_agfl_block_xref(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno,
+ struct xfs_owner_info *oinfo)
+{
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ xfs_scrub_xref_is_used_space(sc, agbno, 1);
+ xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
+ xfs_scrub_xref_is_owned_by(sc, agbno, 1, oinfo);
+ xfs_scrub_xref_is_not_shared(sc, agbno, 1);
+}
+
/* Scrub an AGFL block. */
STATIC int
xfs_scrub_agfl_block(
@@ -499,6 +715,8 @@ xfs_scrub_agfl_block(
else
xfs_scrub_block_set_corrupt(sc, sc->sa.agfl_bp);
+ xfs_scrub_agfl_block_xref(sc, agbno, priv);
+
return 0;
}
@@ -513,6 +731,37 @@ xfs_scrub_agblock_cmp(
return (int)*a - (int)*b;
}
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_agfl_xref(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_owner_info oinfo;
+ struct xfs_mount *mp = sc->mp;
+ xfs_agblock_t agbno;
+ int error;
+
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ agbno = XFS_AGFL_BLOCK(mp);
+
+ error = xfs_scrub_ag_btcur_init(sc, &sc->sa);
+ if (error)
+ return;
+
+ xfs_scrub_xref_is_used_space(sc, agbno, 1);
+ xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
+ xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
+ xfs_scrub_xref_is_not_shared(sc, agbno, 1);
+
+ /*
+ * Scrub teardown will take care of sc->sa for us. Leave sc->sa
+ * active so that the agfl block xref can use it too.
+ */
+}
+
/* Scrub the AGFL. */
int
xfs_scrub_agfl(
@@ -532,6 +781,12 @@ xfs_scrub_agfl(
goto out;
if (!sc->sa.agf_bp)
return -EFSCORRUPTED;
+ xfs_scrub_buffer_recheck(sc, sc->sa.agfl_bp);
+
+ xfs_scrub_agfl_xref(sc);
+
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ goto out;
/* Allocate buffer to ensure uniqueness of AGFL entries. */
agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
@@ -548,6 +803,7 @@ xfs_scrub_agfl(
}
/* Check the blocks in the AGFL. */
+ xfs_rmap_ag_owner(&sai.oinfo, XFS_RMAP_OWN_AG);
error = xfs_scrub_walk_agfl(sc, xfs_scrub_agfl_block, &sai);
if (error)
goto out_free;
@@ -575,6 +831,56 @@ out:
/* AGI */
+/* Check agi_count/agi_freecount */
+static inline void
+xfs_scrub_agi_xref_icounts(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_agi *agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
+ xfs_agino_t icount;
+ xfs_agino_t freecount;
+ int error;
+
+ if (!sc->sa.ino_cur)
+ return;
+
+ error = xfs_ialloc_count_inodes(sc->sa.ino_cur, &icount, &freecount);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.ino_cur))
+ return;
+ if (be32_to_cpu(agi->agi_count) != icount ||
+ be32_to_cpu(agi->agi_freecount) != freecount)
+ xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agi_bp);
+}
+
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_agi_xref(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_owner_info oinfo;
+ struct xfs_mount *mp = sc->mp;
+ xfs_agblock_t agbno;
+ int error;
+
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ agbno = XFS_AGI_BLOCK(mp);
+
+ error = xfs_scrub_ag_btcur_init(sc, &sc->sa);
+ if (error)
+ return;
+
+ xfs_scrub_xref_is_used_space(sc, agbno, 1);
+ xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
+ xfs_scrub_agi_xref_icounts(sc);
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
+ xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
+ xfs_scrub_xref_is_not_shared(sc, agbno, 1);
+
+ /* scrub teardown will take care of sc->sa for us */
+}
+
/* Scrub the AGI. */
int
xfs_scrub_agi(
@@ -598,6 +904,7 @@ xfs_scrub_agi(
&sc->sa.agf_bp, &sc->sa.agfl_bp);
if (!xfs_scrub_process_error(sc, agno, XFS_AGI_BLOCK(sc->mp), &error))
goto out;
+ xfs_scrub_buffer_recheck(sc, sc->sa.agi_bp);
agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
@@ -653,6 +960,7 @@ xfs_scrub_agi(
if (agi->agi_pad32 != cpu_to_be32(0))
xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
+ xfs_scrub_agi_xref(sc);
out:
return error;
}
diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
index 059663e13414..517c079d3f68 100644
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -31,6 +31,7 @@
#include "xfs_sb.h"
#include "xfs_alloc.h"
#include "xfs_rmap.h"
+#include "xfs_alloc.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
@@ -49,6 +50,64 @@ xfs_scrub_setup_ag_allocbt(
}
/* Free space btree scrubber. */
+/*
+ * Ensure there's a corresponding cntbt/bnobt record matching this
+ * bnobt/cntbt record, respectively.
+ */
+STATIC void
+xfs_scrub_allocbt_xref_other(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len)
+{
+ struct xfs_btree_cur **pcur;
+ xfs_agblock_t fbno;
+ xfs_extlen_t flen;
+ int has_otherrec;
+ int error;
+
+ if (sc->sm->sm_type == XFS_SCRUB_TYPE_BNOBT)
+ pcur = &sc->sa.cnt_cur;
+ else
+ pcur = &sc->sa.bno_cur;
+ if (!*pcur)
+ return;
+
+ error = xfs_alloc_lookup_le(*pcur, agbno, len, &has_otherrec);
+ if (!xfs_scrub_should_check_xref(sc, &error, pcur))
+ return;
+ if (!has_otherrec) {
+ xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0);
+ return;
+ }
+
+ error = xfs_alloc_get_rec(*pcur, &fbno, &flen, &has_otherrec);
+ if (!xfs_scrub_should_check_xref(sc, &error, pcur))
+ return;
+ if (!has_otherrec) {
+ xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0);
+ return;
+ }
+
+ if (fbno != agbno || flen != len)
+ xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0);
+}
+
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_allocbt_xref(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len)
+{
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ xfs_scrub_allocbt_xref_other(sc, agbno, len);
+ xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len);
+ xfs_scrub_xref_has_no_owner(sc, agbno, len);
+ xfs_scrub_xref_is_not_shared(sc, agbno, len);
+}
/* Scrub a bnobt/cntbt record. */
STATIC int
@@ -70,6 +129,8 @@ xfs_scrub_allocbt_rec(
!xfs_verify_agbno(mp, agno, bno + len - 1))
xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+ xfs_scrub_allocbt_xref(bs->sc, bno, len);
+
return error;
}
@@ -100,3 +161,23 @@ xfs_scrub_cntbt(
{
return xfs_scrub_allocbt(sc, XFS_BTNUM_CNT);
}
+
+/* xref check that the extent is not free */
+void
+xfs_scrub_xref_is_used_space(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len)
+{
+ bool is_freesp;
+ int error;
+
+ if (!sc->sa.bno_cur)
+ return;
+
+ error = xfs_alloc_has_record(sc->sa.bno_cur, agbno, len, &is_freesp);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur))
+ return;
+ if (is_freesp)
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.bno_cur, 0);
+}
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 42fec0bcd9e1..d00282130492 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -37,6 +37,7 @@
#include "xfs_bmap_util.h"
#include "xfs_bmap_btree.h"
#include "xfs_rmap.h"
+#include "xfs_refcount.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
@@ -99,6 +100,201 @@ struct xfs_scrub_bmap_info {
int whichfork;
};
+/* Look for a corresponding rmap for this irec. */
+static inline bool
+xfs_scrub_bmap_get_rmap(
+ struct xfs_scrub_bmap_info *info,
+ struct xfs_bmbt_irec *irec,
+ xfs_agblock_t agbno,
+ uint64_t owner,
+ struct xfs_rmap_irec *rmap)
+{
+ xfs_fileoff_t offset;
+ unsigned int rflags = 0;
+ int has_rmap;
+ int error;
+
+ if (info->whichfork == XFS_ATTR_FORK)
+ rflags |= XFS_RMAP_ATTR_FORK;
+
+ /*
+ * CoW staging extents are owned (on disk) by the refcountbt, so
+ * their rmaps do not have offsets.
+ */
+ if (info->whichfork == XFS_COW_FORK)
+ offset = 0;
+ else
+ offset = irec->br_startoff;
+
+ /*
+ * If the caller thinks this could be a shared bmbt extent (IOWs,
+ * any data fork extent of a reflink inode) then we have to use the
+ * range rmap lookup to make sure we get the correct owner/offset.
+ */
+ if (info->is_shared) {
+ error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno,
+ owner, offset, rflags, rmap, &has_rmap);
+ if (!xfs_scrub_should_check_xref(info->sc, &error,
+ &info->sc->sa.rmap_cur))
+ return false;
+ goto out;
+ }
+
+ /*
+ * Otherwise, use the (faster) regular lookup.
+ */
+ error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno, 0, owner,
+ offset, rflags, &has_rmap);
+ if (!xfs_scrub_should_check_xref(info->sc, &error,
+ &info->sc->sa.rmap_cur))
+ return false;
+ if (!has_rmap)
+ goto out;
+
+ error = xfs_rmap_get_rec(info->sc->sa.rmap_cur, rmap, &has_rmap);
+ if (!xfs_scrub_should_check_xref(info->sc, &error,
+ &info->sc->sa.rmap_cur))
+ return false;
+
+out:
+ if (!has_rmap)
+ xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
+ irec->br_startoff);
+ return has_rmap;
+}
+
+/* Make sure that we have rmapbt records for this extent. */
+STATIC void
+xfs_scrub_bmap_xref_rmap(
+ struct xfs_scrub_bmap_info *info,
+ struct xfs_bmbt_irec *irec,
+ xfs_agblock_t agbno)
+{
+ struct xfs_rmap_irec rmap;
+ unsigned long long rmap_end;
+ uint64_t owner;
+
+ if (!info->sc->sa.rmap_cur)
+ return;
+
+ if (info->whichfork == XFS_COW_FORK)
+ owner = XFS_RMAP_OWN_COW;
+ else
+ owner = info->sc->ip->i_ino;
+
+ /* Find the rmap record for this irec. */
+ if (!xfs_scrub_bmap_get_rmap(info, irec, agbno, owner, &rmap))
+ return;
+
+ /* Check the rmap. */
+ rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
+ if (rmap.rm_startblock > agbno ||
+ agbno + irec->br_blockcount > rmap_end)
+ xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
+ irec->br_startoff);
+
+ /*
+ * Check the logical offsets if applicable. CoW staging extents
+ * don't track logical offsets since the mappings only exist in
+ * memory.
+ */
+ if (info->whichfork != XFS_COW_FORK) {
+ rmap_end = (unsigned long long)rmap.rm_offset +
+ rmap.rm_blockcount;
+ if (rmap.rm_offset > irec->br_startoff ||
+ irec->br_startoff + irec->br_blockcount > rmap_end)
+ xfs_scrub_fblock_xref_set_corrupt(info->sc,
+ info->whichfork, irec->br_startoff);
+ }
+
+ if (rmap.rm_owner != owner)
+ xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
+ irec->br_startoff);
+
+ /*
+ * Check for discrepancies between the unwritten flag in the irec and
+ * the rmap. Note that the (in-memory) CoW fork distinguishes between
+ * unwritten and written extents, but we don't track that in the rmap
+ * records because the blocks are owned (on-disk) by the refcountbt,
+ * which doesn't track unwritten state.
+ */
+ if (owner != XFS_RMAP_OWN_COW &&
+ irec->br_state == XFS_EXT_UNWRITTEN &&
+ !(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
+ xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
+ irec->br_startoff);
+
+ if (info->whichfork == XFS_ATTR_FORK &&
+ !(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
+ xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
+ irec->br_startoff);
+ if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
+ xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
+ irec->br_startoff);
+}
+
+/* Cross-reference a single rtdev extent record. */
+STATIC void
+xfs_scrub_bmap_rt_extent_xref(
+ struct xfs_scrub_bmap_info *info,
+ struct xfs_inode *ip,
+ struct xfs_btree_cur *cur,
+ struct xfs_bmbt_irec *irec)
+{
+ if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ xfs_scrub_xref_is_used_rt_space(info->sc, irec->br_startblock,
+ irec->br_blockcount);
+}
+
+/* Cross-reference a single datadev extent record. */
+STATIC void
+xfs_scrub_bmap_extent_xref(
+ struct xfs_scrub_bmap_info *info,
+ struct xfs_inode *ip,
+ struct xfs_btree_cur *cur,
+ struct xfs_bmbt_irec *irec)
+{
+ struct xfs_mount *mp = info->sc->mp;
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+ xfs_extlen_t len;
+ int error;
+
+ if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
+ agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
+ len = irec->br_blockcount;
+
+ error = xfs_scrub_ag_init(info->sc, agno, &info->sc->sa);
+ if (!xfs_scrub_fblock_process_error(info->sc, info->whichfork,
+ irec->br_startoff, &error))
+ return;
+
+ xfs_scrub_xref_is_used_space(info->sc, agbno, len);
+ xfs_scrub_xref_is_not_inode_chunk(info->sc, agbno, len);
+ xfs_scrub_bmap_xref_rmap(info, irec, agbno);
+ switch (info->whichfork) {
+ case XFS_DATA_FORK:
+ if (xfs_is_reflink_inode(info->sc->ip))
+ break;
+ /* fall through */
+ case XFS_ATTR_FORK:
+ xfs_scrub_xref_is_not_shared(info->sc, agbno,
+ irec->br_blockcount);
+ break;
+ case XFS_COW_FORK:
+ xfs_scrub_xref_is_cow_staging(info->sc, agbno,
+ irec->br_blockcount);
+ break;
+ }
+
+ xfs_scrub_ag_free(info->sc, &info->sc->sa);
+}
+
/* Scrub a single extent record. */
STATIC int
xfs_scrub_bmap_extent(
@@ -109,6 +305,7 @@ xfs_scrub_bmap_extent(
{
struct xfs_mount *mp = info->sc->mp;
struct xfs_buf *bp = NULL;
+ xfs_filblks_t end;
int error = 0;
if (cur)
@@ -136,19 +333,23 @@ xfs_scrub_bmap_extent(
irec->br_startoff);
/* Make sure the extent points to a valid place. */
+ if (irec->br_blockcount > MAXEXTLEN)
+ xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
+ irec->br_startoff);
if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock)
xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
+ end = irec->br_startblock + irec->br_blockcount - 1;
if (info->is_rt &&
(!xfs_verify_rtbno(mp, irec->br_startblock) ||
- !xfs_verify_rtbno(mp, irec->br_startblock +
- irec->br_blockcount - 1)))
+ !xfs_verify_rtbno(mp, end)))
xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
if (!info->is_rt &&
(!xfs_verify_fsbno(mp, irec->br_startblock) ||
- !xfs_verify_fsbno(mp, irec->br_startblock +
- irec->br_blockcount - 1)))
+ !xfs_verify_fsbno(mp, end) ||
+ XFS_FSB_TO_AGNO(mp, irec->br_startblock) !=
+ XFS_FSB_TO_AGNO(mp, end)))
xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
@@ -158,6 +359,11 @@ xfs_scrub_bmap_extent(
xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
+ if (info->is_rt)
+ xfs_scrub_bmap_rt_extent_xref(info, ip, cur, irec);
+ else
+ xfs_scrub_bmap_extent_xref(info, ip, cur, irec);
+
info->lastoff = irec->br_startoff + irec->br_blockcount;
return error;
}
@@ -235,7 +441,6 @@ xfs_scrub_bmap(
struct xfs_ifork *ifp;
xfs_fileoff_t endoff;
struct xfs_iext_cursor icur;
- bool found;
int error = 0;
ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -314,9 +519,7 @@ xfs_scrub_bmap(
/* Scrub extent records. */
info.lastoff = 0;
ifp = XFS_IFORK_PTR(ip, whichfork);
- for (found = xfs_iext_lookup_extent(ip, ifp, 0, &icur, &irec);
- found != 0;
- found = xfs_iext_next_extent(ifp, &icur, &irec)) {
+ for_each_xfs_iext(ifp, &icur, &irec) {
if (xfs_scrub_should_terminate(sc, &error))
break;
if (isnullstartblock(irec.br_startblock))
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index df0766132ace..54218168c8f9 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -42,12 +42,14 @@
* Check for btree operation errors. See the section about handling
* operational errors in common.c.
*/
-bool
-xfs_scrub_btree_process_error(
+static bool
+__xfs_scrub_btree_process_error(
struct xfs_scrub_context *sc,
struct xfs_btree_cur *cur,
int level,
- int *error)
+ int *error,
+ __u32 errflag,
+ void *ret_ip)
{
if (*error == 0)
return true;
@@ -60,36 +62,80 @@ xfs_scrub_btree_process_error(
case -EFSBADCRC:
case -EFSCORRUPTED:
/* Note the badness but don't abort. */
- sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+ sc->sm->sm_flags |= errflag;
*error = 0;
/* fall through */
default:
if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
trace_xfs_scrub_ifork_btree_op_error(sc, cur, level,
- *error, __return_address);
+ *error, ret_ip);
else
trace_xfs_scrub_btree_op_error(sc, cur, level,
- *error, __return_address);
+ *error, ret_ip);
break;
}
return false;
}
+bool
+xfs_scrub_btree_process_error(
+ struct xfs_scrub_context *sc,
+ struct xfs_btree_cur *cur,
+ int level,
+ int *error)
+{
+ return __xfs_scrub_btree_process_error(sc, cur, level, error,
+ XFS_SCRUB_OFLAG_CORRUPT, __return_address);
+}
+
+bool
+xfs_scrub_btree_xref_process_error(
+ struct xfs_scrub_context *sc,
+ struct xfs_btree_cur *cur,
+ int level,
+ int *error)
+{
+ return __xfs_scrub_btree_process_error(sc, cur, level, error,
+ XFS_SCRUB_OFLAG_XFAIL, __return_address);
+}
+
/* Record btree block corruption. */
-void
-xfs_scrub_btree_set_corrupt(
+static void
+__xfs_scrub_btree_set_corrupt(
struct xfs_scrub_context *sc,
struct xfs_btree_cur *cur,
- int level)
+ int level,
+ __u32 errflag,
+ void *ret_ip)
{
- sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+ sc->sm->sm_flags |= errflag;
if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
trace_xfs_scrub_ifork_btree_error(sc, cur, level,
- __return_address);
+ ret_ip);
else
trace_xfs_scrub_btree_error(sc, cur, level,
- __return_address);
+ ret_ip);
+}
+
+void
+xfs_scrub_btree_set_corrupt(
+ struct xfs_scrub_context *sc,
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ __xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
+ __return_address);
+}
+
+void
+xfs_scrub_btree_xref_set_corrupt(
+ struct xfs_scrub_context *sc,
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ __xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
+ __return_address);
}
/*
@@ -268,6 +314,8 @@ xfs_scrub_btree_block_check_sibling(
pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock);
if (!xfs_scrub_btree_ptr_ok(bs, level + 1, pp))
goto out;
+ if (pbp)
+ xfs_scrub_buffer_recheck(bs->sc, pbp);
if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
@@ -315,6 +363,97 @@ out:
return error;
}
+struct check_owner {
+ struct list_head list;
+ xfs_daddr_t daddr;
+ int level;
+};
+
+/*
+ * Make sure this btree block isn't in the free list and that there's
+ * an rmap record for it.
+ */
+STATIC int
+xfs_scrub_btree_check_block_owner(
+ struct xfs_scrub_btree *bs,
+ int level,
+ xfs_daddr_t daddr)
+{
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+ xfs_btnum_t btnum;
+ bool init_sa;
+ int error = 0;
+
+ if (!bs->cur)
+ return 0;
+
+ btnum = bs->cur->bc_btnum;
+ agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr);
+ agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr);
+
+ init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS;
+ if (init_sa) {
+ error = xfs_scrub_ag_init(bs->sc, agno, &bs->sc->sa);
+ if (!xfs_scrub_btree_xref_process_error(bs->sc, bs->cur,
+ level, &error))
+ return error;
+ }
+
+ xfs_scrub_xref_is_used_space(bs->sc, agbno, 1);
+ /*
+ * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we
+ * have to nullify it (to shut down further block owner checks) if
+ * self-xref encounters problems.
+ */
+ if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO)
+ bs->cur = NULL;
+
+ xfs_scrub_xref_is_owned_by(bs->sc, agbno, 1, bs->oinfo);
+ if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP)
+ bs->cur = NULL;
+
+ if (init_sa)
+ xfs_scrub_ag_free(bs->sc, &bs->sc->sa);
+
+ return error;
+}
+
+/* Check the owner of a btree block. */
+STATIC int
+xfs_scrub_btree_check_owner(
+ struct xfs_scrub_btree *bs,
+ int level,
+ struct xfs_buf *bp)
+{
+ struct xfs_btree_cur *cur = bs->cur;
+ struct check_owner *co;
+
+ if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && bp == NULL)
+ return 0;
+
+ /*
+ * We want to cross-reference each btree block with the bnobt
+ * and the rmapbt. We cannot cross-reference the bnobt or
+ * rmapbt while scanning the bnobt or rmapbt, respectively,
+ * because we cannot alter the cursor and we'd prefer not to
+ * duplicate cursors. Therefore, save the buffer daddr for
+ * later scanning.
+ */
+ if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) {
+ co = kmem_alloc(sizeof(struct check_owner),
+ KM_MAYFAIL | KM_NOFS);
+ if (!co)
+ return -ENOMEM;
+ co->level = level;
+ co->daddr = XFS_BUF_ADDR(bp);
+ list_add_tail(&co->list, &bs->to_check);
+ return 0;
+ }
+
+ return xfs_scrub_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp));
+}
+
/*
* Grab and scrub a btree block given a btree pointer. Returns block
* and buffer pointers (if applicable) if they're ok to use.
@@ -349,6 +488,16 @@ xfs_scrub_btree_get_block(
xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level);
return 0;
}
+ if (*pbp)
+ xfs_scrub_buffer_recheck(bs->sc, *pbp);
+
+ /*
+ * Check the block's owner; this function absorbs error codes
+ * for us.
+ */
+ error = xfs_scrub_btree_check_owner(bs, level, *pbp);
+ if (error)
+ return error;
/*
* Check the block's siblings; this function absorbs error codes
@@ -421,6 +570,8 @@ xfs_scrub_btree(
struct xfs_btree_block *block;
int level;
struct xfs_buf *bp;
+ struct check_owner *co;
+ struct check_owner *n;
int i;
int error = 0;
@@ -512,5 +663,14 @@ xfs_scrub_btree(
}
out:
+ /* Process deferred owner checks on btree blocks. */
+ list_for_each_entry_safe(co, n, &bs.to_check, list) {
+ if (!error && bs.cur)
+ error = xfs_scrub_btree_check_block_owner(&bs,
+ co->level, co->daddr);
+ list_del(&co->list);
+ kmem_free(co);
+ }
+
return error;
}
diff --git a/fs/xfs/scrub/btree.h b/fs/xfs/scrub/btree.h
index 4de825a626d1..e2b868ede70b 100644
--- a/fs/xfs/scrub/btree.h
+++ b/fs/xfs/scrub/btree.h
@@ -26,10 +26,19 @@
bool xfs_scrub_btree_process_error(struct xfs_scrub_context *sc,
struct xfs_btree_cur *cur, int level, int *error);
+/* Check for btree xref operation errors. */
+bool xfs_scrub_btree_xref_process_error(struct xfs_scrub_context *sc,
+ struct xfs_btree_cur *cur, int level,
+ int *error);
+
/* Check for btree corruption. */
void xfs_scrub_btree_set_corrupt(struct xfs_scrub_context *sc,
struct xfs_btree_cur *cur, int level);
+/* Check for btree xref discrepancies. */
+void xfs_scrub_btree_xref_set_corrupt(struct xfs_scrub_context *sc,
+ struct xfs_btree_cur *cur, int level);
+
struct xfs_scrub_btree;
typedef int (*xfs_scrub_btree_rec_fn)(
struct xfs_scrub_btree *bs,
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index ac95fe911d96..8033ab9d8f47 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -78,12 +78,14 @@
*/
/* Check for operational errors. */
-bool
-xfs_scrub_process_error(
+static bool
+__xfs_scrub_process_error(
struct xfs_scrub_context *sc,
xfs_agnumber_t agno,
xfs_agblock_t bno,
- int *error)
+ int *error,
+ __u32 errflag,
+ void *ret_ip)
{
switch (*error) {
case 0:
@@ -95,24 +97,48 @@ xfs_scrub_process_error(
case -EFSBADCRC:
case -EFSCORRUPTED:
/* Note the badness but don't abort. */
- sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+ sc->sm->sm_flags |= errflag;
*error = 0;
/* fall through */
default:
trace_xfs_scrub_op_error(sc, agno, bno, *error,
- __return_address);
+ ret_ip);
break;
}
return false;
}
-/* Check for operational errors for a file offset. */
bool
-xfs_scrub_fblock_process_error(
+xfs_scrub_process_error(
+ struct xfs_scrub_context *sc,
+ xfs_agnumber_t agno,
+ xfs_agblock_t bno,
+ int *error)
+{
+ return __xfs_scrub_process_error(sc, agno, bno, error,
+ XFS_SCRUB_OFLAG_CORRUPT, __return_address);
+}
+
+bool
+xfs_scrub_xref_process_error(
+ struct xfs_scrub_context *sc,
+ xfs_agnumber_t agno,
+ xfs_agblock_t bno,
+ int *error)
+{
+ return __xfs_scrub_process_error(sc, agno, bno, error,
+ XFS_SCRUB_OFLAG_XFAIL, __return_address);
+}
+
+/* Check for operational errors for a file offset. */
+static bool
+__xfs_scrub_fblock_process_error(
struct xfs_scrub_context *sc,
int whichfork,
xfs_fileoff_t offset,
- int *error)
+ int *error,
+ __u32 errflag,
+ void *ret_ip)
{
switch (*error) {
case 0:
@@ -124,17 +150,39 @@ xfs_scrub_fblock_process_error(
case -EFSBADCRC:
case -EFSCORRUPTED:
/* Note the badness but don't abort. */
- sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+ sc->sm->sm_flags |= errflag;
*error = 0;
/* fall through */
default:
trace_xfs_scrub_file_op_error(sc, whichfork, offset, *error,
- __return_address);
+ ret_ip);
break;
}
return false;
}
+bool
+xfs_scrub_fblock_process_error(
+ struct xfs_scrub_context *sc,
+ int whichfork,
+ xfs_fileoff_t offset,
+ int *error)
+{
+ return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error,
+ XFS_SCRUB_OFLAG_CORRUPT, __return_address);
+}
+
+bool
+xfs_scrub_fblock_xref_process_error(
+ struct xfs_scrub_context *sc,
+ int whichfork,
+ xfs_fileoff_t offset,
+ int *error)
+{
+ return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error,
+ XFS_SCRUB_OFLAG_XFAIL, __return_address);
+}
+
/*
* Handling scrub corruption/optimization/warning checks.
*
@@ -183,6 +231,16 @@ xfs_scrub_block_set_corrupt(
trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address);
}
+/* Record a corruption while cross-referencing. */
+void
+xfs_scrub_block_xref_set_corrupt(
+ struct xfs_scrub_context *sc,
+ struct xfs_buf *bp)
+{
+ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
+ trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address);
+}
+
/*
* Record a corrupt inode. The trace data will include the block given
* by bp if bp is given; otherwise it will use the block location of the
@@ -198,6 +256,17 @@ xfs_scrub_ino_set_corrupt(
trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address);
}
+/* Record a corruption while cross-referencing with an inode. */
+void
+xfs_scrub_ino_xref_set_corrupt(
+ struct xfs_scrub_context *sc,
+ xfs_ino_t ino,
+ struct xfs_buf *bp)
+{
+ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
+ trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address);
+}
+
/* Record corruption in a block indexed by a file fork. */
void
xfs_scrub_fblock_set_corrupt(
@@ -209,6 +278,17 @@ xfs_scrub_fblock_set_corrupt(
trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address);
}
+/* Record a corruption while cross-referencing a fork block. */
+void
+xfs_scrub_fblock_xref_set_corrupt(
+ struct xfs_scrub_context *sc,
+ int whichfork,
+ xfs_fileoff_t offset)
+{
+ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
+ trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address);
+}
+
/*
* Warn about inodes that need administrative review but is not
* incorrect.
@@ -245,6 +325,59 @@ xfs_scrub_set_incomplete(
}
/*
+ * rmap scrubbing -- compute the number of blocks with a given owner,
+ * at least according to the reverse mapping data.
+ */
+
+struct xfs_scrub_rmap_ownedby_info {
+ struct xfs_owner_info *oinfo;
+ xfs_filblks_t *blocks;
+};
+
+STATIC int
+xfs_scrub_count_rmap_ownedby_irec(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xfs_scrub_rmap_ownedby_info *sroi = priv;
+ bool irec_attr;
+ bool oinfo_attr;
+
+ irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK;
+ oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK;
+
+ if (rec->rm_owner != sroi->oinfo->oi_owner)
+ return 0;
+
+ if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr)
+ (*sroi->blocks) += rec->rm_blockcount;
+
+ return 0;
+}
+
+/*
+ * Calculate the number of blocks the rmap thinks are owned by something.
+ * The caller should pass us an rmapbt cursor.
+ */
+int
+xfs_scrub_count_rmap_ownedby_ag(
+ struct xfs_scrub_context *sc,
+ struct xfs_btree_cur *cur,
+ struct xfs_owner_info *oinfo,
+ xfs_filblks_t *blocks)
+{
+ struct xfs_scrub_rmap_ownedby_info sroi;
+
+ sroi.oinfo = oinfo;
+ *blocks = 0;
+ sroi.blocks = blocks;
+
+ return xfs_rmap_query_all(cur, xfs_scrub_count_rmap_ownedby_irec,
+ &sroi);
+}
+
+/*
* AG scrubbing
*
* These helpers facilitate locking an allocation group's header
@@ -302,7 +435,7 @@ xfs_scrub_ag_read_headers(
error = xfs_alloc_read_agfl(mp, sc->tp, agno, agfl);
if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL))
goto out;
-
+ error = 0;
out:
return error;
}
@@ -472,7 +605,7 @@ xfs_scrub_setup_ag_btree(
return error;
}
- error = xfs_scrub_setup_ag_header(sc, ip);
+ error = xfs_scrub_setup_fs(sc, ip);
if (error)
return error;
@@ -503,18 +636,11 @@ xfs_scrub_get_inode(
struct xfs_scrub_context *sc,
struct xfs_inode *ip_in)
{
+ struct xfs_imap imap;
struct xfs_mount *mp = sc->mp;
struct xfs_inode *ip = NULL;
int error;
- /*
- * If userspace passed us an AG number or a generation number
- * without an inode number, they haven't got a clue so bail out
- * immediately.
- */
- if (sc->sm->sm_agno || (sc->sm->sm_gen && !sc->sm->sm_ino))
- return -EINVAL;
-
/* We want to scan the inode we already had opened. */
if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) {
sc->ip = ip_in;
@@ -526,10 +652,33 @@ xfs_scrub_get_inode(
return -ENOENT;
error = xfs_iget(mp, NULL, sc->sm->sm_ino,
XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip);
- if (error == -ENOENT || error == -EINVAL) {
- /* inode doesn't exist... */
- return -ENOENT;
- } else if (error) {
+ switch (error) {
+ case -ENOENT:
+ /* Inode doesn't exist, just bail out. */
+ return error;
+ case 0:
+ /* Got an inode, continue. */
+ break;
+ case -EINVAL:
+ /*
+ * -EINVAL with IGET_UNTRUSTED could mean one of several
+ * things: userspace gave us an inode number that doesn't
+ * correspond to fs space, or doesn't have an inobt entry;
+ * or it could simply mean that the inode buffer failed the
+ * read verifiers.
+ *
+ * Try just the inode mapping lookup -- if it succeeds, then
+ * the inode buffer verifier failed and something needs fixing.
+ * Otherwise, we really couldn't find it so tell userspace
+ * that it no longer exists.
+ */
+ error = xfs_imap(sc->mp, sc->tp, sc->sm->sm_ino, &imap,
+ XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE);
+ if (error)
+ return -ENOENT;
+ error = -EFSCORRUPTED;
+ /* fall through */
+ default:
trace_xfs_scrub_op_error(sc,
XFS_INO_TO_AGNO(mp, sc->sm->sm_ino),
XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
@@ -572,3 +721,61 @@ out:
/* scrub teardown will unlock and release the inode for us */
return error;
}
+
+/*
+ * Predicate that decides if we need to evaluate the cross-reference check.
+ * If there was an error accessing the cross-reference btree, just delete
+ * the cursor and skip the check.
+ */
+bool
+xfs_scrub_should_check_xref(
+ struct xfs_scrub_context *sc,
+ int *error,
+ struct xfs_btree_cur **curpp)
+{
+ if (*error == 0)
+ return true;
+
+ if (curpp) {
+ /* If we've already given up on xref, just bail out. */
+ if (!*curpp)
+ return false;
+
+ /* xref error, delete cursor and bail out. */
+ xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR);
+ *curpp = NULL;
+ }
+
+ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL;
+ trace_xfs_scrub_xref_error(sc, *error, __return_address);
+
+ /*
+ * Errors encountered during cross-referencing with another
+ * data structure should not cause this scrubber to abort.
+ */
+ *error = 0;
+ return false;
+}
+
+/* Run the structure verifiers on in-memory buffers to detect bad memory. */
+void
+xfs_scrub_buffer_recheck(
+ struct xfs_scrub_context *sc,
+ struct xfs_buf *bp)
+{
+ xfs_failaddr_t fa;
+
+ if (bp->b_ops == NULL) {
+ xfs_scrub_block_set_corrupt(sc, bp);
+ return;
+ }
+ if (bp->b_ops->verify_struct == NULL) {
+ xfs_scrub_set_incomplete(sc);
+ return;
+ }
+ fa = bp->b_ops->verify_struct(bp);
+ if (!fa)
+ return;
+ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+ trace_xfs_scrub_block_error(sc, bp->b_bn, fa);
+}
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 5c043855570e..ddb65d22c76a 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -56,6 +56,11 @@ bool xfs_scrub_process_error(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
bool xfs_scrub_fblock_process_error(struct xfs_scrub_context *sc, int whichfork,
xfs_fileoff_t offset, int *error);
+bool xfs_scrub_xref_process_error(struct xfs_scrub_context *sc,
+ xfs_agnumber_t agno, xfs_agblock_t bno, int *error);
+bool xfs_scrub_fblock_xref_process_error(struct xfs_scrub_context *sc,
+ int whichfork, xfs_fileoff_t offset, int *error);
+
void xfs_scrub_block_set_preen(struct xfs_scrub_context *sc,
struct xfs_buf *bp);
void xfs_scrub_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino,
@@ -68,6 +73,13 @@ void xfs_scrub_ino_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino,
void xfs_scrub_fblock_set_corrupt(struct xfs_scrub_context *sc, int whichfork,
xfs_fileoff_t offset);
+void xfs_scrub_block_xref_set_corrupt(struct xfs_scrub_context *sc,
+ struct xfs_buf *bp);
+void xfs_scrub_ino_xref_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino,
+ struct xfs_buf *bp);
+void xfs_scrub_fblock_xref_set_corrupt(struct xfs_scrub_context *sc,
+ int whichfork, xfs_fileoff_t offset);
+
void xfs_scrub_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino,
struct xfs_buf *bp);
void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork,
@@ -76,10 +88,12 @@ void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork,
void xfs_scrub_set_incomplete(struct xfs_scrub_context *sc);
int xfs_scrub_checkpoint_log(struct xfs_mount *mp);
+/* Are we set up for a cross-referencing check? */
+bool xfs_scrub_should_check_xref(struct xfs_scrub_context *sc, int *error,
+ struct xfs_btree_cur **curpp);
+
/* Setup functions */
int xfs_scrub_setup_fs(struct xfs_scrub_context *sc, struct xfs_inode *ip);
-int xfs_scrub_setup_ag_header(struct xfs_scrub_context *sc,
- struct xfs_inode *ip);
int xfs_scrub_setup_ag_allocbt(struct xfs_scrub_context *sc,
struct xfs_inode *ip);
int xfs_scrub_setup_ag_iallocbt(struct xfs_scrub_context *sc,
@@ -134,11 +148,16 @@ int xfs_scrub_walk_agfl(struct xfs_scrub_context *sc,
int (*fn)(struct xfs_scrub_context *, xfs_agblock_t bno,
void *),
void *priv);
+int xfs_scrub_count_rmap_ownedby_ag(struct xfs_scrub_context *sc,
+ struct xfs_btree_cur *cur,
+ struct xfs_owner_info *oinfo,
+ xfs_filblks_t *blocks);
int xfs_scrub_setup_ag_btree(struct xfs_scrub_context *sc,
struct xfs_inode *ip, bool force_log);
int xfs_scrub_get_inode(struct xfs_scrub_context *sc, struct xfs_inode *ip_in);
int xfs_scrub_setup_inode_contents(struct xfs_scrub_context *sc,
struct xfs_inode *ip, unsigned int resblks);
+void xfs_scrub_buffer_recheck(struct xfs_scrub_context *sc, struct xfs_buf *bp);
#endif /* __XFS_SCRUB_COMMON_H__ */
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
index d94edd93cba8..bffdb7dc09bf 100644
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -233,11 +233,28 @@ xfs_scrub_da_btree_write_verify(
return;
}
}
+static void *
+xfs_scrub_da_btree_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_da_blkinfo *info = bp->b_addr;
+
+ switch (be16_to_cpu(info->magic)) {
+ case XFS_DIR2_LEAF1_MAGIC:
+ case XFS_DIR3_LEAF1_MAGIC:
+ bp->b_ops = &xfs_dir3_leaf1_buf_ops;
+ return bp->b_ops->verify_struct(bp);
+ default:
+ bp->b_ops = &xfs_da3_node_buf_ops;
+ return bp->b_ops->verify_struct(bp);
+ }
+}
static const struct xfs_buf_ops xfs_scrub_da_btree_buf_ops = {
.name = "xfs_scrub_da_btree",
.verify_read = xfs_scrub_da_btree_read_verify,
.verify_write = xfs_scrub_da_btree_write_verify,
+ .verify_struct = xfs_scrub_da_btree_verify,
};
/* Check a block's sibling. */
@@ -276,6 +293,9 @@ xfs_scrub_da_btree_block_check_sibling(
xfs_scrub_da_set_corrupt(ds, level);
return error;
}
+ if (ds->state->altpath.blk[level].bp)
+ xfs_scrub_buffer_recheck(ds->sc,
+ ds->state->altpath.blk[level].bp);
/* Compare upper level pointer to sibling pointer. */
if (ds->state->altpath.blk[level].blkno != sibling)
@@ -358,6 +378,8 @@ xfs_scrub_da_btree_block(
&xfs_scrub_da_btree_buf_ops);
if (!xfs_scrub_da_process_error(ds, level, &error))
goto out_nobuf;
+ if (blk->bp)
+ xfs_scrub_buffer_recheck(ds->sc, blk->bp);
/*
* We didn't find a dir btree root block, which means that
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index 69e1efdd4019..50b6a26b0299 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -92,7 +92,7 @@ xfs_scrub_dir_check_ftype(
* inodes can trigger immediate inactive cleanup of the inode.
*/
error = xfs_iget(mp, sdc->sc->tp, inum, 0, 0, &ip);
- if (!xfs_scrub_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset,
+ if (!xfs_scrub_fblock_xref_process_error(sdc->sc, XFS_DATA_FORK, offset,
&error))
goto out;
@@ -200,6 +200,7 @@ xfs_scrub_dir_rec(
struct xfs_inode *dp = ds->dargs.dp;
struct xfs_dir2_data_entry *dent;
struct xfs_buf *bp;
+ char *p, *endp;
xfs_ino_t ino;
xfs_dablk_t rec_bno;
xfs_dir2_db_t db;
@@ -237,9 +238,37 @@ xfs_scrub_dir_rec(
xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
goto out;
}
+ xfs_scrub_buffer_recheck(ds->sc, bp);
- /* Retrieve the entry, sanity check it, and compare hashes. */
dent = (struct xfs_dir2_data_entry *)(((char *)bp->b_addr) + off);
+
+ /* Make sure we got a real directory entry. */
+ p = (char *)mp->m_dir_inode_ops->data_entry_p(bp->b_addr);
+ endp = xfs_dir3_data_endp(mp->m_dir_geo, bp->b_addr);
+ if (!endp) {
+ xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
+ goto out_relse;
+ }
+ while (p < endp) {
+ struct xfs_dir2_data_entry *dep;
+ struct xfs_dir2_data_unused *dup;
+
+ dup = (struct xfs_dir2_data_unused *)p;
+ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
+ p += be16_to_cpu(dup->length);
+ continue;
+ }
+ dep = (struct xfs_dir2_data_entry *)p;
+ if (dep == dent)
+ break;
+ p += mp->m_dir_inode_ops->data_entsize(dep->namelen);
+ }
+ if (p >= endp) {
+ xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
+ goto out_relse;
+ }
+
+ /* Retrieve the entry, sanity check it, and compare hashes. */
ino = be64_to_cpu(dent->inumber);
hash = be32_to_cpu(ent->hashval);
tag = be16_to_cpup(dp->d_ops->data_entry_tag_p(dent));
@@ -324,6 +353,7 @@ xfs_scrub_directory_data_bestfree(
}
if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
goto out;
+ xfs_scrub_buffer_recheck(sc, bp);
/* XXX: Check xfs_dir3_data_hdr.pad is zero once we start setting it. */
@@ -361,13 +391,7 @@ xfs_scrub_directory_data_bestfree(
/* Make sure the bestfrees are actually the best free spaces. */
ptr = (char *)d_ops->data_entry_p(bp->b_addr);
- if (is_block) {
- struct xfs_dir2_block_tail *btp;
-
- btp = xfs_dir2_block_tail_p(mp->m_dir_geo, bp->b_addr);
- endptr = (char *)xfs_dir2_block_leaf_p(btp);
- } else
- endptr = (char *)bp->b_addr + BBTOB(bp->b_length);
+ endptr = xfs_dir3_data_endp(mp->m_dir_geo, bp->b_addr);
/* Iterate the entries, stopping when we hit or go past the end. */
while (ptr < endptr) {
@@ -474,6 +498,7 @@ xfs_scrub_directory_leaf1_bestfree(
error = xfs_dir3_leaf_read(sc->tp, sc->ip, lblk, -1, &bp);
if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
goto out;
+ xfs_scrub_buffer_recheck(sc, bp);
leaf = bp->b_addr;
d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
@@ -559,6 +584,7 @@ xfs_scrub_directory_free_bestfree(
error = xfs_dir2_free_read(sc->tp, sc->ip, lblk, &bp);
if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
goto out;
+ xfs_scrub_buffer_recheck(sc, bp);
if (xfs_sb_version_hascrc(&sc->mp->m_sb)) {
struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index 496d6f2fbb9e..63ab3f98430d 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -58,6 +58,56 @@ xfs_scrub_setup_ag_iallocbt(
/* Inode btree scrubber. */
+/*
+ * If we're checking the finobt, cross-reference with the inobt.
+ * Otherwise we're checking the inobt; if there is an finobt, make sure
+ * we have a record or not depending on freecount.
+ */
+static inline void
+xfs_scrub_iallocbt_chunk_xref_other(
+ struct xfs_scrub_context *sc,
+ struct xfs_inobt_rec_incore *irec,
+ xfs_agino_t agino)
+{
+ struct xfs_btree_cur **pcur;
+ bool has_irec;
+ int error;
+
+ if (sc->sm->sm_type == XFS_SCRUB_TYPE_FINOBT)
+ pcur = &sc->sa.ino_cur;
+ else
+ pcur = &sc->sa.fino_cur;
+ if (!(*pcur))
+ return;
+ error = xfs_ialloc_has_inode_record(*pcur, agino, agino, &has_irec);
+ if (!xfs_scrub_should_check_xref(sc, &error, pcur))
+ return;
+ if (((irec->ir_freecount > 0 && !has_irec) ||
+ (irec->ir_freecount == 0 && has_irec)))
+ xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0);
+}
+
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_iallocbt_chunk_xref(
+ struct xfs_scrub_context *sc,
+ struct xfs_inobt_rec_incore *irec,
+ xfs_agino_t agino,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len)
+{
+ struct xfs_owner_info oinfo;
+
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ xfs_scrub_xref_is_used_space(sc, agbno, len);
+ xfs_scrub_iallocbt_chunk_xref_other(sc, irec, agino);
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
+ xfs_scrub_xref_is_owned_by(sc, agbno, len, &oinfo);
+ xfs_scrub_xref_is_not_shared(sc, agbno, len);
+}
+
/* Is this chunk worth checking? */
STATIC bool
xfs_scrub_iallocbt_chunk(
@@ -76,6 +126,8 @@ xfs_scrub_iallocbt_chunk(
!xfs_verify_agbno(mp, agno, bno + len - 1))
xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+ xfs_scrub_iallocbt_chunk_xref(bs->sc, irec, agino, bno, len);
+
return true;
}
@@ -190,8 +242,14 @@ xfs_scrub_iallocbt_check_freemask(
}
/* If any part of this is a hole, skip it. */
- if (ir_holemask)
+ if (ir_holemask) {
+ xfs_scrub_xref_is_not_owned_by(bs->sc, agbno,
+ blks_per_cluster, &oinfo);
continue;
+ }
+
+ xfs_scrub_xref_is_owned_by(bs->sc, agbno, blks_per_cluster,
+ &oinfo);
/* Grab the inode cluster buffer. */
imap.im_blkno = XFS_AGB_TO_DADDR(mp, bs->cur->bc_private.a.agno,
@@ -227,6 +285,7 @@ xfs_scrub_iallocbt_rec(
union xfs_btree_rec *rec)
{
struct xfs_mount *mp = bs->cur->bc_mp;
+ xfs_filblks_t *inode_blocks = bs->private;
struct xfs_inobt_rec_incore irec;
uint64_t holes;
xfs_agnumber_t agno = bs->cur->bc_private.a.agno;
@@ -264,6 +323,9 @@ xfs_scrub_iallocbt_rec(
(agbno & (xfs_icluster_size_fsb(mp) - 1)))
xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+ *inode_blocks += XFS_B_TO_FSB(mp,
+ irec.ir_count * mp->m_sb.sb_inodesize);
+
/* Handle non-sparse inodes */
if (!xfs_inobt_issparse(irec.ir_holemask)) {
len = XFS_B_TO_FSB(mp,
@@ -308,6 +370,72 @@ out:
return error;
}
+/*
+ * Make sure the inode btrees are as large as the rmap thinks they are.
+ * Don't bother if we're missing btree cursors, as we're already corrupt.
+ */
+STATIC void
+xfs_scrub_iallocbt_xref_rmap_btreeblks(
+ struct xfs_scrub_context *sc,
+ int which)
+{
+ struct xfs_owner_info oinfo;
+ xfs_filblks_t blocks;
+ xfs_extlen_t inobt_blocks = 0;
+ xfs_extlen_t finobt_blocks = 0;
+ int error;
+
+ if (!sc->sa.ino_cur || !sc->sa.rmap_cur ||
+ (xfs_sb_version_hasfinobt(&sc->mp->m_sb) && !sc->sa.fino_cur))
+ return;
+
+ /* Check that we saw as many inobt blocks as the rmap says. */
+ error = xfs_btree_count_blocks(sc->sa.ino_cur, &inobt_blocks);
+ if (!xfs_scrub_process_error(sc, 0, 0, &error))
+ return;
+
+ if (sc->sa.fino_cur) {
+ error = xfs_btree_count_blocks(sc->sa.fino_cur, &finobt_blocks);
+ if (!xfs_scrub_process_error(sc, 0, 0, &error))
+ return;
+ }
+
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
+ error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo,
+ &blocks);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+ return;
+ if (blocks != inobt_blocks + finobt_blocks)
+ xfs_scrub_btree_set_corrupt(sc, sc->sa.ino_cur, 0);
+}
+
+/*
+ * Make sure that the inobt records point to the same number of blocks as
+ * the rmap says are owned by inodes.
+ */
+STATIC void
+xfs_scrub_iallocbt_xref_rmap_inodes(
+ struct xfs_scrub_context *sc,
+ int which,
+ xfs_filblks_t inode_blocks)
+{
+ struct xfs_owner_info oinfo;
+ xfs_filblks_t blocks;
+ int error;
+
+ if (!sc->sa.rmap_cur)
+ return;
+
+ /* Check that we saw as many inode blocks as the rmap knows about. */
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
+ error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo,
+ &blocks);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+ return;
+ if (blocks != inode_blocks)
+ xfs_scrub_btree_set_corrupt(sc, sc->sa.ino_cur, 0);
+}
+
/* Scrub the inode btrees for some AG. */
STATIC int
xfs_scrub_iallocbt(
@@ -316,10 +444,29 @@ xfs_scrub_iallocbt(
{
struct xfs_btree_cur *cur;
struct xfs_owner_info oinfo;
+ xfs_filblks_t inode_blocks = 0;
+ int error;
xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
cur = which == XFS_BTNUM_INO ? sc->sa.ino_cur : sc->sa.fino_cur;
- return xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_rec, &oinfo, NULL);
+ error = xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_rec, &oinfo,
+ &inode_blocks);
+ if (error)
+ return error;
+
+ xfs_scrub_iallocbt_xref_rmap_btreeblks(sc, which);
+
+ /*
+ * If we're scrubbing the inode btree, inode_blocks is the number of
+ * blocks pointed to by all the inode chunk records. Therefore, we
+ * should compare to the number of inode chunk blocks that the rmap
+ * knows about. We can't do this for the finobt since it only points
+ * to inode chunks with free inodes.
+ */
+ if (which == XFS_BTNUM_INO)
+ xfs_scrub_iallocbt_xref_rmap_inodes(sc, which, inode_blocks);
+
+ return error;
}
int
@@ -335,3 +482,46 @@ xfs_scrub_finobt(
{
return xfs_scrub_iallocbt(sc, XFS_BTNUM_FINO);
}
+
+/* See if an inode btree has (or doesn't have) an inode chunk record. */
+static inline void
+xfs_scrub_xref_inode_check(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len,
+ struct xfs_btree_cur **icur,
+ bool should_have_inodes)
+{
+ bool has_inodes;
+ int error;
+
+ if (!(*icur))
+ return;
+
+ error = xfs_ialloc_has_inodes_at_extent(*icur, agbno, len, &has_inodes);
+ if (!xfs_scrub_should_check_xref(sc, &error, icur))
+ return;
+ if (has_inodes != should_have_inodes)
+ xfs_scrub_btree_xref_set_corrupt(sc, *icur, 0);
+}
+
+/* xref check that the extent is not covered by inodes */
+void
+xfs_scrub_xref_is_not_inode_chunk(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len)
+{
+ xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, false);
+ xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.fino_cur, false);
+}
+
+/* xref check that the extent is covered by inodes */
+void
+xfs_scrub_xref_is_inode_chunk(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len)
+{
+ xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, true);
+}
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index f120fb20452f..21297bef8df1 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -36,9 +36,13 @@
#include "xfs_ialloc.h"
#include "xfs_da_format.h"
#include "xfs_reflink.h"
+#include "xfs_rmap.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
+#include "scrub/btree.h"
#include "scrub/trace.h"
/*
@@ -64,7 +68,7 @@ xfs_scrub_setup_inode(
break;
case -EFSCORRUPTED:
case -EFSBADCRC:
- return 0;
+ return xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp);
default:
return error;
}
@@ -392,6 +396,14 @@ xfs_scrub_dinode(
break;
}
+ /* di_[amc]time.nsec */
+ if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC)
+ xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC)
+ xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC)
+ xfs_scrub_ino_set_corrupt(sc, ino, bp);
+
/*
* di_size. xfs_dinode_verify checks for things that screw up
* the VFS such as the upper bit being set and zero-length
@@ -495,6 +507,8 @@ xfs_scrub_dinode(
}
if (dip->di_version >= 3) {
+ if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC)
+ xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_inode_flags2(sc, bp, dip, ino, mode, flags, flags2);
xfs_scrub_inode_cowextsize(sc, bp, dip, ino, mode, flags,
flags2);
@@ -546,7 +560,7 @@ xfs_scrub_inode_map_raw(
*/
bp->b_ops = &xfs_inode_buf_ops;
dip = xfs_buf_offset(bp, imap.im_boffset);
- if (!xfs_dinode_verify(mp, ino, dip) ||
+ if (xfs_dinode_verify(mp, ino, dip) != NULL ||
!xfs_dinode_good_version(mp, dip->di_version)) {
xfs_scrub_ino_set_corrupt(sc, ino, bp);
goto out_buf;
@@ -567,18 +581,155 @@ out_buf:
return error;
}
+/*
+ * Make sure the finobt doesn't think this inode is free.
+ * We don't have to check the inobt ourselves because we got the inode via
+ * IGET_UNTRUSTED, which checks the inobt for us.
+ */
+static void
+xfs_scrub_inode_xref_finobt(
+ struct xfs_scrub_context *sc,
+ xfs_ino_t ino)
+{
+ struct xfs_inobt_rec_incore rec;
+ xfs_agino_t agino;
+ int has_record;
+ int error;
+
+ if (!sc->sa.fino_cur)
+ return;
+
+ agino = XFS_INO_TO_AGINO(sc->mp, ino);
+
+ /*
+ * Try to get the finobt record. If we can't get it, then we're
+ * in good shape.
+ */
+ error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE,
+ &has_record);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) ||
+ !has_record)
+ return;
+
+ error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) ||
+ !has_record)
+ return;
+
+ /*
+ * Otherwise, make sure this record either doesn't cover this inode,
+ * or that it does but it's marked present.
+ */
+ if (rec.ir_startino > agino ||
+ rec.ir_startino + XFS_INODES_PER_CHUNK <= agino)
+ return;
+
+ if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0);
+}
+
+/* Cross reference the inode fields with the forks. */
+STATIC void
+xfs_scrub_inode_xref_bmap(
+ struct xfs_scrub_context *sc,
+ struct xfs_dinode *dip)
+{
+ xfs_extnum_t nextents;
+ xfs_filblks_t count;
+ xfs_filblks_t acount;
+ int error;
+
+ /* Walk all the extents to check nextents/naextents/nblocks. */
+ error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
+ &nextents, &count);
+ if (!xfs_scrub_should_check_xref(sc, &error, NULL))
+ return;
+ if (nextents < be32_to_cpu(dip->di_nextents))
+ xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL);
+
+ error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
+ &nextents, &acount);
+ if (!xfs_scrub_should_check_xref(sc, &error, NULL))
+ return;
+ if (nextents != be16_to_cpu(dip->di_anextents))
+ xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL);
+
+ /* Check nblocks against the inode. */
+ if (count + acount != be64_to_cpu(dip->di_nblocks))
+ xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL);
+}
+
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_inode_xref(
+ struct xfs_scrub_context *sc,
+ xfs_ino_t ino,
+ struct xfs_dinode *dip)
+{
+ struct xfs_owner_info oinfo;
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+ int error;
+
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ agno = XFS_INO_TO_AGNO(sc->mp, ino);
+ agbno = XFS_INO_TO_AGBNO(sc->mp, ino);
+
+ error = xfs_scrub_ag_init(sc, agno, &sc->sa);
+ if (!xfs_scrub_xref_process_error(sc, agno, agbno, &error))
+ return;
+
+ xfs_scrub_xref_is_used_space(sc, agbno, 1);
+ xfs_scrub_inode_xref_finobt(sc, ino);
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
+ xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
+ xfs_scrub_xref_is_not_shared(sc, agbno, 1);
+ xfs_scrub_inode_xref_bmap(sc, dip);
+
+ xfs_scrub_ag_free(sc, &sc->sa);
+}
+
+/*
+ * If the reflink iflag disagrees with a scan for shared data fork extents,
+ * either flag an error (shared extents w/ no flag) or a preen (flag set w/o
+ * any shared extents). We already checked for reflink iflag set on a non
+ * reflink filesystem.
+ */
+static void
+xfs_scrub_inode_check_reflink_iflag(
+ struct xfs_scrub_context *sc,
+ xfs_ino_t ino,
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = sc->mp;
+ bool has_shared;
+ int error;
+
+ if (!xfs_sb_version_hasreflink(&mp->m_sb))
+ return;
+
+ error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
+ &has_shared);
+ if (!xfs_scrub_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
+ XFS_INO_TO_AGBNO(mp, ino), &error))
+ return;
+ if (xfs_is_reflink_inode(sc->ip) && !has_shared)
+ xfs_scrub_ino_set_preen(sc, ino, bp);
+ else if (!xfs_is_reflink_inode(sc->ip) && has_shared)
+ xfs_scrub_ino_set_corrupt(sc, ino, bp);
+}
+
/* Scrub an inode. */
int
xfs_scrub_inode(
struct xfs_scrub_context *sc)
{
struct xfs_dinode di;
- struct xfs_mount *mp = sc->mp;
struct xfs_buf *bp = NULL;
struct xfs_dinode *dip;
xfs_ino_t ino;
-
- bool has_shared;
int error = 0;
/* Did we get the in-core inode, or are we doing this manually? */
@@ -603,19 +754,14 @@ xfs_scrub_inode(
goto out;
/*
- * Does this inode have the reflink flag set but no shared extents?
- * Set the preening flag if this is the case.
+ * Look for discrepancies between file's data blocks and the reflink
+ * iflag. We already checked the iflag against the file mode when
+ * we scrubbed the dinode.
*/
- if (xfs_is_reflink_inode(sc->ip)) {
- error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
- &has_shared);
- if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
- XFS_INO_TO_AGBNO(mp, ino), &error))
- goto out;
- if (!has_shared)
- xfs_scrub_ino_set_preen(sc, ino, bp);
- }
+ if (S_ISREG(VFS_I(sc->ip)->i_mode))
+ xfs_scrub_inode_check_reflink_iflag(sc, ino, bp);
+ xfs_scrub_inode_xref(sc, ino, dip);
out:
if (bp)
xfs_trans_brelse(sc->tp, bp);
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index 63a25334fc83..0d3851410c74 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -169,9 +169,9 @@ xfs_scrub_parent_validate(
* immediate inactive cleanup of the inode.
*/
error = xfs_iget(mp, sc->tp, dnum, 0, 0, &dp);
- if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
+ if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
goto out;
- if (dp == sc->ip) {
+ if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) {
xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
goto out_rele;
}
@@ -185,7 +185,7 @@ xfs_scrub_parent_validate(
*/
if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) {
error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink);
- if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0,
+ if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
&error))
goto out_unlock;
if (nlink != expected_nlink)
@@ -205,7 +205,7 @@ xfs_scrub_parent_validate(
/* Go looking for our dentry. */
error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink);
- if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
+ if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
goto out_unlock;
/* Drop the parent lock, relock this inode. */
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index 3d9037eceaf1..51daa4ae2627 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -67,13 +67,6 @@ xfs_scrub_setup_quota(
{
uint dqtype;
- /*
- * If userspace gave us an AG number or inode data, they don't
- * know what they're doing. Get out.
- */
- if (sc->sm->sm_agno || sc->sm->sm_ino || sc->sm->sm_gen)
- return -EINVAL;
-
dqtype = xfs_scrub_quota_to_dqtype(sc);
if (dqtype == 0)
return -EINVAL;
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index 2f88a8d44bd0..400f1561cd3d 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -31,6 +31,7 @@
#include "xfs_sb.h"
#include "xfs_alloc.h"
#include "xfs_rmap.h"
+#include "xfs_refcount.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
@@ -50,6 +51,307 @@ xfs_scrub_setup_ag_refcountbt(
/* Reference count btree scrubber. */
+/*
+ * Confirming Reference Counts via Reverse Mappings
+ *
+ * We want to count the reverse mappings overlapping a refcount record
+ * (bno, len, refcount), allowing for the possibility that some of the
+ * overlap may come from smaller adjoining reverse mappings, while some
+ * comes from single extents which overlap the range entirely. The
+ * outer loop is as follows:
+ *
+ * 1. For all reverse mappings overlapping the refcount extent,
+ * a. If a given rmap completely overlaps, mark it as seen.
+ * b. Otherwise, record the fragment (in agbno order) for later
+ * processing.
+ *
+ * Once we've seen all the rmaps, we know that for all blocks in the
+ * refcount record we want to find $refcount owners and we've already
+ * visited $seen extents that overlap all the blocks. Therefore, we
+ * need to find ($refcount - $seen) owners for every block in the
+ * extent; call that quantity $target_nr. Proceed as follows:
+ *
+ * 2. Pull the first $target_nr fragments from the list; all of them
+ * should start at or before the start of the extent.
+ * Call this subset of fragments the working set.
+ * 3. Until there are no more unprocessed fragments,
+ * a. Find the shortest fragments in the set and remove them.
+ * b. Note the block number of the end of these fragments.
+ * c. Pull the same number of fragments from the list. All of these
+ * fragments should start at the block number recorded in the
+ * previous step.
+ * d. Put those fragments in the set.
+ * 4. Check that there are $target_nr fragments remaining in the list,
+ * and that they all end at or beyond the end of the refcount extent.
+ *
+ * If the refcount is correct, all the check conditions in the algorithm
+ * should always hold true. If not, the refcount is incorrect.
+ */
+struct xfs_scrub_refcnt_frag {
+ struct list_head list;
+ struct xfs_rmap_irec rm;
+};
+
+struct xfs_scrub_refcnt_check {
+ struct xfs_scrub_context *sc;
+ struct list_head fragments;
+
+ /* refcount extent we're examining */
+ xfs_agblock_t bno;
+ xfs_extlen_t len;
+ xfs_nlink_t refcount;
+
+ /* number of owners seen */
+ xfs_nlink_t seen;
+};
+
+/*
+ * Decide if the given rmap is large enough that we can redeem it
+ * towards refcount verification now, or if it's a fragment, in
+ * which case we'll hang onto it in the hopes that we'll later
+ * discover that we've collected exactly the correct number of
+ * fragments as the refcountbt says we should have.
+ */
+STATIC int
+xfs_scrub_refcountbt_rmap_check(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xfs_scrub_refcnt_check *refchk = priv;
+ struct xfs_scrub_refcnt_frag *frag;
+ xfs_agblock_t rm_last;
+ xfs_agblock_t rc_last;
+ int error = 0;
+
+ if (xfs_scrub_should_terminate(refchk->sc, &error))
+ return error;
+
+ rm_last = rec->rm_startblock + rec->rm_blockcount - 1;
+ rc_last = refchk->bno + refchk->len - 1;
+
+ /* Confirm that a single-owner refc extent is a CoW stage. */
+ if (refchk->refcount == 1 && rec->rm_owner != XFS_RMAP_OWN_COW) {
+ xfs_scrub_btree_xref_set_corrupt(refchk->sc, cur, 0);
+ return 0;
+ }
+
+ if (rec->rm_startblock <= refchk->bno && rm_last >= rc_last) {
+ /*
+ * The rmap overlaps the refcount record, so we can confirm
+ * one refcount owner seen.
+ */
+ refchk->seen++;
+ } else {
+ /*
+ * This rmap covers only part of the refcount record, so
+ * save the fragment for later processing. If the rmapbt
+ * is healthy each rmap_irec we see will be in agbno order
+ * so we don't need insertion sort here.
+ */
+ frag = kmem_alloc(sizeof(struct xfs_scrub_refcnt_frag),
+ KM_MAYFAIL | KM_NOFS);
+ if (!frag)
+ return -ENOMEM;
+ memcpy(&frag->rm, rec, sizeof(frag->rm));
+ list_add_tail(&frag->list, &refchk->fragments);
+ }
+
+ return 0;
+}
+
+/*
+ * Given a bunch of rmap fragments, iterate through them, keeping
+ * a running tally of the refcount. If this ever deviates from
+ * what we expect (which is the refcountbt's refcount minus the
+ * number of extents that totally covered the refcountbt extent),
+ * we have a refcountbt error.
+ */
+STATIC void
+xfs_scrub_refcountbt_process_rmap_fragments(
+ struct xfs_scrub_refcnt_check *refchk)
+{
+ struct list_head worklist;
+ struct xfs_scrub_refcnt_frag *frag;
+ struct xfs_scrub_refcnt_frag *n;
+ xfs_agblock_t bno;
+ xfs_agblock_t rbno;
+ xfs_agblock_t next_rbno;
+ xfs_nlink_t nr;
+ xfs_nlink_t target_nr;
+
+ target_nr = refchk->refcount - refchk->seen;
+ if (target_nr == 0)
+ return;
+
+ /*
+ * There are (refchk->rc.rc_refcount - refchk->nr refcount)
+ * references we haven't found yet. Pull that many off the
+ * fragment list and figure out where the smallest rmap ends
+ * (and therefore the next rmap should start). All the rmaps
+ * we pull off should start at or before the beginning of the
+ * refcount record's range.
+ */
+ INIT_LIST_HEAD(&worklist);
+ rbno = NULLAGBLOCK;
+ nr = 1;
+
+ /* Make sure the fragments actually /are/ in agbno order. */
+ bno = 0;
+ list_for_each_entry(frag, &refchk->fragments, list) {
+ if (frag->rm.rm_startblock < bno)
+ goto done;
+ bno = frag->rm.rm_startblock;
+ }
+
+ /*
+ * Find all the rmaps that start at or before the refc extent,
+ * and put them on the worklist.
+ */
+ list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
+ if (frag->rm.rm_startblock > refchk->bno)
+ goto done;
+ bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
+ if (bno < rbno)
+ rbno = bno;
+ list_move_tail(&frag->list, &worklist);
+ if (nr == target_nr)
+ break;
+ nr++;
+ }
+
+ /*
+ * We should have found exactly $target_nr rmap fragments starting
+ * at or before the refcount extent.
+ */
+ if (nr != target_nr)
+ goto done;
+
+ while (!list_empty(&refchk->fragments)) {
+ /* Discard any fragments ending at rbno from the worklist. */
+ nr = 0;
+ next_rbno = NULLAGBLOCK;
+ list_for_each_entry_safe(frag, n, &worklist, list) {
+ bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
+ if (bno != rbno) {
+ if (bno < next_rbno)
+ next_rbno = bno;
+ continue;
+ }
+ list_del(&frag->list);
+ kmem_free(frag);
+ nr++;
+ }
+
+ /* Try to add nr rmaps starting at rbno to the worklist. */
+ list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
+ bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
+ if (frag->rm.rm_startblock != rbno)
+ goto done;
+ list_move_tail(&frag->list, &worklist);
+ if (next_rbno > bno)
+ next_rbno = bno;
+ nr--;
+ if (nr == 0)
+ break;
+ }
+
+ /*
+ * If we get here and nr > 0, this means that we added fewer
+ * items to the worklist than we discarded because the fragment
+ * list ran out of items. Therefore, we cannot maintain the
+ * required refcount. Something is wrong, so we're done.
+ */
+ if (nr)
+ goto done;
+
+ rbno = next_rbno;
+ }
+
+ /*
+ * Make sure the last extent we processed ends at or beyond
+ * the end of the refcount extent.
+ */
+ if (rbno < refchk->bno + refchk->len)
+ goto done;
+
+ /* Actually record us having seen the remaining refcount. */
+ refchk->seen = refchk->refcount;
+done:
+ /* Delete fragments and work list. */
+ list_for_each_entry_safe(frag, n, &worklist, list) {
+ list_del(&frag->list);
+ kmem_free(frag);
+ }
+ list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
+ list_del(&frag->list);
+ kmem_free(frag);
+ }
+}
+
+/* Use the rmap entries covering this extent to verify the refcount. */
+STATIC void
+xfs_scrub_refcountbt_xref_rmap(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ xfs_nlink_t refcount)
+{
+ struct xfs_scrub_refcnt_check refchk = {
+ .sc = sc,
+ .bno = bno,
+ .len = len,
+ .refcount = refcount,
+ .seen = 0,
+ };
+ struct xfs_rmap_irec low;
+ struct xfs_rmap_irec high;
+ struct xfs_scrub_refcnt_frag *frag;
+ struct xfs_scrub_refcnt_frag *n;
+ int error;
+
+ if (!sc->sa.rmap_cur)
+ return;
+
+ /* Cross-reference with the rmapbt to confirm the refcount. */
+ memset(&low, 0, sizeof(low));
+ low.rm_startblock = bno;
+ memset(&high, 0xFF, sizeof(high));
+ high.rm_startblock = bno + len - 1;
+
+ INIT_LIST_HEAD(&refchk.fragments);
+ error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high,
+ &xfs_scrub_refcountbt_rmap_check, &refchk);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+ goto out_free;
+
+ xfs_scrub_refcountbt_process_rmap_fragments(&refchk);
+ if (refcount != refchk.seen)
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+
+out_free:
+ list_for_each_entry_safe(frag, n, &refchk.fragments, list) {
+ list_del(&frag->list);
+ kmem_free(frag);
+ }
+}
+
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_refcountbt_xref(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len,
+ xfs_nlink_t refcount)
+{
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ xfs_scrub_xref_is_used_space(sc, agbno, len);
+ xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len);
+ xfs_scrub_refcountbt_xref_rmap(sc, agbno, len, refcount);
+}
+
/* Scrub a refcountbt record. */
STATIC int
xfs_scrub_refcountbt_rec(
@@ -57,6 +359,7 @@ xfs_scrub_refcountbt_rec(
union xfs_btree_rec *rec)
{
struct xfs_mount *mp = bs->cur->bc_mp;
+ xfs_agblock_t *cow_blocks = bs->private;
xfs_agnumber_t agno = bs->cur->bc_private.a.agno;
xfs_agblock_t bno;
xfs_extlen_t len;
@@ -72,6 +375,8 @@ xfs_scrub_refcountbt_rec(
has_cowflag = (bno & XFS_REFC_COW_START);
if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag))
xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+ if (has_cowflag)
+ (*cow_blocks) += len;
/* Check the extent. */
bno &= ~XFS_REFC_COW_START;
@@ -83,17 +388,128 @@ xfs_scrub_refcountbt_rec(
if (refcount == 0)
xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+ xfs_scrub_refcountbt_xref(bs->sc, bno, len, refcount);
+
return error;
}
+/* Make sure we have as many refc blocks as the rmap says. */
+STATIC void
+xfs_scrub_refcount_xref_rmap(
+ struct xfs_scrub_context *sc,
+ struct xfs_owner_info *oinfo,
+ xfs_filblks_t cow_blocks)
+{
+ xfs_extlen_t refcbt_blocks = 0;
+ xfs_filblks_t blocks;
+ int error;
+
+ if (!sc->sa.rmap_cur)
+ return;
+
+ /* Check that we saw as many refcbt blocks as the rmap knows about. */
+ error = xfs_btree_count_blocks(sc->sa.refc_cur, &refcbt_blocks);
+ if (!xfs_scrub_btree_process_error(sc, sc->sa.refc_cur, 0, &error))
+ return;
+ error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo,
+ &blocks);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+ return;
+ if (blocks != refcbt_blocks)
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+
+ /* Check that we saw as many cow blocks as the rmap knows about. */
+ xfs_rmap_ag_owner(oinfo, XFS_RMAP_OWN_COW);
+ error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo,
+ &blocks);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+ return;
+ if (blocks != cow_blocks)
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+}
+
/* Scrub the refcount btree for some AG. */
int
xfs_scrub_refcountbt(
struct xfs_scrub_context *sc)
{
struct xfs_owner_info oinfo;
+ xfs_agblock_t cow_blocks = 0;
+ int error;
xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC);
- return xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec,
- &oinfo, NULL);
+ error = xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec,
+ &oinfo, &cow_blocks);
+ if (error)
+ return error;
+
+ xfs_scrub_refcount_xref_rmap(sc, &oinfo, cow_blocks);
+
+ return 0;
+}
+
+/* xref check that a cow staging extent is marked in the refcountbt. */
+void
+xfs_scrub_xref_is_cow_staging(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len)
+{
+ struct xfs_refcount_irec rc;
+ bool has_cowflag;
+ int has_refcount;
+ int error;
+
+ if (!sc->sa.refc_cur)
+ return;
+
+ /* Find the CoW staging extent. */
+ error = xfs_refcount_lookup_le(sc->sa.refc_cur,
+ agbno + XFS_REFC_COW_START, &has_refcount);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
+ return;
+ if (!has_refcount) {
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
+ return;
+ }
+
+ error = xfs_refcount_get_rec(sc->sa.refc_cur, &rc, &has_refcount);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
+ return;
+ if (!has_refcount) {
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
+ return;
+ }
+
+ /* CoW flag must be set, refcount must be 1. */
+ has_cowflag = (rc.rc_startblock & XFS_REFC_COW_START);
+ if (!has_cowflag || rc.rc_refcount != 1)
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
+
+ /* Must be at least as long as what was passed in */
+ if (rc.rc_blockcount < len)
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
+}
+
+/*
+ * xref check that the extent is not shared. Only file data blocks
+ * can have multiple owners.
+ */
+void
+xfs_scrub_xref_is_not_shared(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len)
+{
+ bool shared;
+ int error;
+
+ if (!sc->sa.refc_cur)
+ return;
+
+ error = xfs_refcount_has_record(sc->sa.refc_cur, agbno, len, &shared);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
+ return;
+ if (shared)
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
}
diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c
index 97846c424690..8f2a7c3ff455 100644
--- a/fs/xfs/scrub/rmap.c
+++ b/fs/xfs/scrub/rmap.c
@@ -32,6 +32,7 @@
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_rmap.h"
+#include "xfs_refcount.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
@@ -51,6 +52,61 @@ xfs_scrub_setup_ag_rmapbt(
/* Reverse-mapping scrubber. */
+/* Cross-reference a rmap against the refcount btree. */
+STATIC void
+xfs_scrub_rmapbt_xref_refc(
+ struct xfs_scrub_context *sc,
+ struct xfs_rmap_irec *irec)
+{
+ xfs_agblock_t fbno;
+ xfs_extlen_t flen;
+ bool non_inode;
+ bool is_bmbt;
+ bool is_attr;
+ bool is_unwritten;
+ int error;
+
+ if (!sc->sa.refc_cur)
+ return;
+
+ non_inode = XFS_RMAP_NON_INODE_OWNER(irec->rm_owner);
+ is_bmbt = irec->rm_flags & XFS_RMAP_BMBT_BLOCK;
+ is_attr = irec->rm_flags & XFS_RMAP_ATTR_FORK;
+ is_unwritten = irec->rm_flags & XFS_RMAP_UNWRITTEN;
+
+ /* If this is shared, must be a data fork extent. */
+ error = xfs_refcount_find_shared(sc->sa.refc_cur, irec->rm_startblock,
+ irec->rm_blockcount, &fbno, &flen, false);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
+ return;
+ if (flen != 0 && (non_inode || is_attr || is_bmbt || is_unwritten))
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
+}
+
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_rmapbt_xref(
+ struct xfs_scrub_context *sc,
+ struct xfs_rmap_irec *irec)
+{
+ xfs_agblock_t agbno = irec->rm_startblock;
+ xfs_extlen_t len = irec->rm_blockcount;
+
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ xfs_scrub_xref_is_used_space(sc, agbno, len);
+ if (irec->rm_owner == XFS_RMAP_OWN_INODES)
+ xfs_scrub_xref_is_inode_chunk(sc, agbno, len);
+ else
+ xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len);
+ if (irec->rm_owner == XFS_RMAP_OWN_COW)
+ xfs_scrub_xref_is_cow_staging(sc, irec->rm_startblock,
+ irec->rm_blockcount);
+ else
+ xfs_scrub_rmapbt_xref_refc(sc, irec);
+}
+
/* Scrub an rmapbt record. */
STATIC int
xfs_scrub_rmapbt_rec(
@@ -121,6 +177,8 @@ xfs_scrub_rmapbt_rec(
irec.rm_owner > XFS_RMAP_OWN_FS)
xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
}
+
+ xfs_scrub_rmapbt_xref(bs->sc, &irec);
out:
return error;
}
@@ -136,3 +194,68 @@ xfs_scrub_rmapbt(
return xfs_scrub_btree(sc, sc->sa.rmap_cur, xfs_scrub_rmapbt_rec,
&oinfo, NULL);
}
+
+/* xref check that the extent is owned by a given owner */
+static inline void
+xfs_scrub_xref_check_owner(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ struct xfs_owner_info *oinfo,
+ bool should_have_rmap)
+{
+ bool has_rmap;
+ int error;
+
+ if (!sc->sa.rmap_cur)
+ return;
+
+ error = xfs_rmap_record_exists(sc->sa.rmap_cur, bno, len, oinfo,
+ &has_rmap);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+ return;
+ if (has_rmap != should_have_rmap)
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+}
+
+/* xref check that the extent is owned by a given owner */
+void
+xfs_scrub_xref_is_owned_by(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ struct xfs_owner_info *oinfo)
+{
+ xfs_scrub_xref_check_owner(sc, bno, len, oinfo, true);
+}
+
+/* xref check that the extent is not owned by a given owner */
+void
+xfs_scrub_xref_is_not_owned_by(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ struct xfs_owner_info *oinfo)
+{
+ xfs_scrub_xref_check_owner(sc, bno, len, oinfo, false);
+}
+
+/* xref check that the extent has no reverse mapping at all */
+void
+xfs_scrub_xref_has_no_owner(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t bno,
+ xfs_extlen_t len)
+{
+ bool has_rmap;
+ int error;
+
+ if (!sc->sa.rmap_cur)
+ return;
+
+ error = xfs_rmap_has_record(sc->sa.rmap_cur, bno, len, &has_rmap);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+ return;
+ if (has_rmap)
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+}
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index c6fedb698008..26390991369a 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -43,22 +43,14 @@ xfs_scrub_setup_rt(
struct xfs_scrub_context *sc,
struct xfs_inode *ip)
{
- struct xfs_mount *mp = sc->mp;
- int error = 0;
-
- /*
- * If userspace gave us an AG number or inode data, they don't
- * know what they're doing. Get out.
- */
- if (sc->sm->sm_agno || sc->sm->sm_ino || sc->sm->sm_gen)
- return -EINVAL;
+ int error;
error = xfs_scrub_setup_fs(sc, ip);
if (error)
return error;
sc->ilock_flags = XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP;
- sc->ip = mp->m_rbmip;
+ sc->ip = sc->mp->m_rbmip;
xfs_ilock(sc->ip, sc->ilock_flags);
return 0;
@@ -106,3 +98,26 @@ xfs_scrub_rtsummary(
/* XXX: implement this some day */
return -ENOENT;
}
+
+
+/* xref check that the extent is not free in the rtbitmap */
+void
+xfs_scrub_xref_is_used_rt_space(
+ struct xfs_scrub_context *sc,
+ xfs_rtblock_t fsbno,
+ xfs_extlen_t len)
+{
+ bool is_free;
+ int error;
+
+ xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
+ error = xfs_rtalloc_extent_is_free(sc->mp, sc->tp, fsbno, len,
+ &is_free);
+ if (!xfs_scrub_should_check_xref(sc, &error, NULL))
+ goto out_unlock;
+ if (is_free)
+ xfs_scrub_ino_xref_set_corrupt(sc, sc->mp->m_rbmip->i_ino,
+ NULL);
+out_unlock:
+ xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
+}
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index ab3aef2ae823..26c75967a072 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -110,6 +110,16 @@
* structure itself is corrupt, the CORRUPT flag will be set. If
* the metadata is correct but otherwise suboptimal, the PREEN flag
* will be set.
+ *
+ * We perform secondary validation of filesystem metadata by
+ * cross-referencing every record with all other available metadata.
+ * For example, for block mapping extents, we verify that there are no
+ * records in the free space and inode btrees corresponding to that
+ * space extent and that there is a corresponding entry in the reverse
+ * mapping btree. Inconsistent metadata is noted by setting the
+ * XCORRUPT flag; btree query function errors are noted by setting the
+ * XFAIL flag and deleting the cursor to prevent further attempts to
+ * cross-reference with a defective btree.
*/
/*
@@ -128,8 +138,6 @@ xfs_scrub_probe(
{
int error = 0;
- if (sc->sm->sm_ino || sc->sm->sm_agno)
- return -EINVAL;
if (xfs_scrub_should_terminate(sc, &error))
return error;
@@ -151,7 +159,8 @@ xfs_scrub_teardown(
sc->tp = NULL;
}
if (sc->ip) {
- xfs_iunlock(sc->ip, sc->ilock_flags);
+ if (sc->ilock_flags)
+ xfs_iunlock(sc->ip, sc->ilock_flags);
if (sc->ip != ip_in &&
!xfs_internal_inum(sc->mp, sc->ip->i_ino))
iput(VFS_I(sc->ip));
@@ -167,106 +176,130 @@ xfs_scrub_teardown(
/* Scrubbing dispatch. */
static const struct xfs_scrub_meta_ops meta_scrub_ops[] = {
- { /* ioctl presence test */
+ [XFS_SCRUB_TYPE_PROBE] = { /* ioctl presence test */
+ .type = ST_NONE,
.setup = xfs_scrub_setup_fs,
.scrub = xfs_scrub_probe,
},
- { /* superblock */
- .setup = xfs_scrub_setup_ag_header,
+ [XFS_SCRUB_TYPE_SB] = { /* superblock */
+ .type = ST_PERAG,
+ .setup = xfs_scrub_setup_fs,
.scrub = xfs_scrub_superblock,
},
- { /* agf */
- .setup = xfs_scrub_setup_ag_header,
+ [XFS_SCRUB_TYPE_AGF] = { /* agf */
+ .type = ST_PERAG,
+ .setup = xfs_scrub_setup_fs,
.scrub = xfs_scrub_agf,
},
- { /* agfl */
- .setup = xfs_scrub_setup_ag_header,
+ [XFS_SCRUB_TYPE_AGFL]= { /* agfl */
+ .type = ST_PERAG,
+ .setup = xfs_scrub_setup_fs,
.scrub = xfs_scrub_agfl,
},
- { /* agi */
- .setup = xfs_scrub_setup_ag_header,
+ [XFS_SCRUB_TYPE_AGI] = { /* agi */
+ .type = ST_PERAG,
+ .setup = xfs_scrub_setup_fs,
.scrub = xfs_scrub_agi,
},
- { /* bnobt */
+ [XFS_SCRUB_TYPE_BNOBT] = { /* bnobt */
+ .type = ST_PERAG,
.setup = xfs_scrub_setup_ag_allocbt,
.scrub = xfs_scrub_bnobt,
},
- { /* cntbt */
+ [XFS_SCRUB_TYPE_CNTBT] = { /* cntbt */
+ .type = ST_PERAG,
.setup = xfs_scrub_setup_ag_allocbt,
.scrub = xfs_scrub_cntbt,
},
- { /* inobt */
+ [XFS_SCRUB_TYPE_INOBT] = { /* inobt */
+ .type = ST_PERAG,
.setup = xfs_scrub_setup_ag_iallocbt,
.scrub = xfs_scrub_inobt,
},
- { /* finobt */
+ [XFS_SCRUB_TYPE_FINOBT] = { /* finobt */
+ .type = ST_PERAG,
.setup = xfs_scrub_setup_ag_iallocbt,
.scrub = xfs_scrub_finobt,
.has = xfs_sb_version_hasfinobt,
},
- { /* rmapbt */
+ [XFS_SCRUB_TYPE_RMAPBT] = { /* rmapbt */
+ .type = ST_PERAG,
.setup = xfs_scrub_setup_ag_rmapbt,
.scrub = xfs_scrub_rmapbt,
.has = xfs_sb_version_hasrmapbt,
},
- { /* refcountbt */
+ [XFS_SCRUB_TYPE_REFCNTBT] = { /* refcountbt */
+ .type = ST_PERAG,
.setup = xfs_scrub_setup_ag_refcountbt,
.scrub = xfs_scrub_refcountbt,
.has = xfs_sb_version_hasreflink,
},
- { /* inode record */
+ [XFS_SCRUB_TYPE_INODE] = { /* inode record */
+ .type = ST_INODE,
.setup = xfs_scrub_setup_inode,
.scrub = xfs_scrub_inode,
},
- { /* inode data fork */
+ [XFS_SCRUB_TYPE_BMBTD] = { /* inode data fork */
+ .type = ST_INODE,
.setup = xfs_scrub_setup_inode_bmap,
.scrub = xfs_scrub_bmap_data,
},
- { /* inode attr fork */
+ [XFS_SCRUB_TYPE_BMBTA] = { /* inode attr fork */
+ .type = ST_INODE,
.setup = xfs_scrub_setup_inode_bmap,
.scrub = xfs_scrub_bmap_attr,
},
- { /* inode CoW fork */
+ [XFS_SCRUB_TYPE_BMBTC] = { /* inode CoW fork */
+ .type = ST_INODE,
.setup = xfs_scrub_setup_inode_bmap,
.scrub = xfs_scrub_bmap_cow,
},
- { /* directory */
+ [XFS_SCRUB_TYPE_DIR] = { /* directory */
+ .type = ST_INODE,
.setup = xfs_scrub_setup_directory,
.scrub = xfs_scrub_directory,
},
- { /* extended attributes */
+ [XFS_SCRUB_TYPE_XATTR] = { /* extended attributes */
+ .type = ST_INODE,
.setup = xfs_scrub_setup_xattr,
.scrub = xfs_scrub_xattr,
},
- { /* symbolic link */
+ [XFS_SCRUB_TYPE_SYMLINK] = { /* symbolic link */
+ .type = ST_INODE,
.setup = xfs_scrub_setup_symlink,
.scrub = xfs_scrub_symlink,
},
- { /* parent pointers */
+ [XFS_SCRUB_TYPE_PARENT] = { /* parent pointers */
+ .type = ST_INODE,
.setup = xfs_scrub_setup_parent,
.scrub = xfs_scrub_parent,
},
- { /* realtime bitmap */
+ [XFS_SCRUB_TYPE_RTBITMAP] = { /* realtime bitmap */
+ .type = ST_FS,
.setup = xfs_scrub_setup_rt,
.scrub = xfs_scrub_rtbitmap,
.has = xfs_sb_version_hasrealtime,
},
- { /* realtime summary */
+ [XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */
+ .type = ST_FS,
.setup = xfs_scrub_setup_rt,
.scrub = xfs_scrub_rtsummary,
.has = xfs_sb_version_hasrealtime,
},
- { /* user quota */
- .setup = xfs_scrub_setup_quota,
- .scrub = xfs_scrub_quota,
+ [XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */
+ .type = ST_FS,
+ .setup = xfs_scrub_setup_quota,
+ .scrub = xfs_scrub_quota,
},
- { /* group quota */
- .setup = xfs_scrub_setup_quota,
- .scrub = xfs_scrub_quota,
+ [XFS_SCRUB_TYPE_GQUOTA] = { /* group quota */
+ .type = ST_FS,
+ .setup = xfs_scrub_setup_quota,
+ .scrub = xfs_scrub_quota,
},
- { /* project quota */
- .setup = xfs_scrub_setup_quota,
- .scrub = xfs_scrub_quota,
+ [XFS_SCRUB_TYPE_PQUOTA] = { /* project quota */
+ .type = ST_FS,
+ .setup = xfs_scrub_setup_quota,
+ .scrub = xfs_scrub_quota,
},
};
@@ -284,44 +317,56 @@ xfs_scrub_experimental_warning(
"EXPERIMENTAL online scrub feature in use. Use at your own risk!");
}
-/* Dispatch metadata scrubbing. */
-int
-xfs_scrub_metadata(
- struct xfs_inode *ip,
+static int
+xfs_scrub_validate_inputs(
+ struct xfs_mount *mp,
struct xfs_scrub_metadata *sm)
{
- struct xfs_scrub_context sc;
- struct xfs_mount *mp = ip->i_mount;
+ int error;
const struct xfs_scrub_meta_ops *ops;
- bool try_harder = false;
- int error = 0;
-
- trace_xfs_scrub_start(ip, sm, error);
-
- /* Forbidden if we are shut down or mounted norecovery. */
- error = -ESHUTDOWN;
- if (XFS_FORCED_SHUTDOWN(mp))
- goto out;
- error = -ENOTRECOVERABLE;
- if (mp->m_flags & XFS_MOUNT_NORECOVERY)
- goto out;
- /* Check our inputs. */
error = -EINVAL;
+ /* Check our inputs. */
sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN)
goto out;
+ /* sm_reserved[] must be zero */
if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved)))
goto out;
- /* Do we know about this type of metadata? */
error = -ENOENT;
+ /* Do we know about this type of metadata? */
if (sm->sm_type >= XFS_SCRUB_TYPE_NR)
goto out;
ops = &meta_scrub_ops[sm->sm_type];
- if (ops->scrub == NULL)
+ if (ops->setup == NULL || ops->scrub == NULL)
goto out;
+ /* Does this fs even support this type of metadata? */
+ if (ops->has && !ops->has(&mp->m_sb))
+ goto out;
+
+ error = -EINVAL;
+ /* restricting fields must be appropriate for type */
+ switch (ops->type) {
+ case ST_NONE:
+ case ST_FS:
+ if (sm->sm_ino || sm->sm_gen || sm->sm_agno)
+ goto out;
+ break;
+ case ST_PERAG:
+ if (sm->sm_ino || sm->sm_gen ||
+ sm->sm_agno >= mp->m_sb.sb_agcount)
+ goto out;
+ break;
+ case ST_INODE:
+ if (sm->sm_agno || (sm->sm_gen && !sm->sm_ino))
+ goto out;
+ break;
+ default:
+ goto out;
+ }
+ error = -EOPNOTSUPP;
/*
* We won't scrub any filesystem that doesn't have the ability
* to record unwritten extents. The option was made default in
@@ -331,20 +376,46 @@ xfs_scrub_metadata(
* We also don't support v1-v3 filesystems, which aren't
* mountable.
*/
- error = -EOPNOTSUPP;
if (!xfs_sb_version_hasextflgbit(&mp->m_sb))
goto out;
- /* Does this fs even support this type of metadata? */
- error = -ENOENT;
- if (ops->has && !ops->has(&mp->m_sb))
- goto out;
-
/* We don't know how to repair anything yet. */
- error = -EOPNOTSUPP;
if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
goto out;
+ error = 0;
+out:
+ return error;
+}
+
+/* Dispatch metadata scrubbing. */
+int
+xfs_scrub_metadata(
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm)
+{
+ struct xfs_scrub_context sc;
+ struct xfs_mount *mp = ip->i_mount;
+ bool try_harder = false;
+ int error = 0;
+
+ BUILD_BUG_ON(sizeof(meta_scrub_ops) !=
+ (sizeof(struct xfs_scrub_meta_ops) * XFS_SCRUB_TYPE_NR));
+
+ trace_xfs_scrub_start(ip, sm, error);
+
+ /* Forbidden if we are shut down or mounted norecovery. */
+ error = -ESHUTDOWN;
+ if (XFS_FORCED_SHUTDOWN(mp))
+ goto out;
+ error = -ENOTRECOVERABLE;
+ if (mp->m_flags & XFS_MOUNT_NORECOVERY)
+ goto out;
+
+ error = xfs_scrub_validate_inputs(mp, sm);
+ if (error)
+ goto out;
+
xfs_scrub_experimental_warning(mp);
retry_op:
@@ -352,7 +423,7 @@ retry_op:
memset(&sc, 0, sizeof(sc));
sc.mp = ip->i_mount;
sc.sm = sm;
- sc.ops = ops;
+ sc.ops = &meta_scrub_ops[sm->sm_type];
sc.try_harder = try_harder;
sc.sa.agno = NULLAGNUMBER;
error = sc.ops->setup(&sc, ip);
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index e9ec041cf713..0d92af86f67a 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -22,6 +22,14 @@
struct xfs_scrub_context;
+/* Type info and names for the scrub types. */
+enum xfs_scrub_type {
+ ST_NONE = 1, /* disabled */
+ ST_PERAG, /* per-AG metadata */
+ ST_FS, /* per-FS metadata */
+ ST_INODE, /* per-inode metadata */
+};
+
struct xfs_scrub_meta_ops {
/* Acquire whatever resources are needed for the operation. */
int (*setup)(struct xfs_scrub_context *,
@@ -32,6 +40,9 @@ struct xfs_scrub_meta_ops {
/* Decide if we even have this piece of metadata. */
bool (*has)(struct xfs_sb *);
+
+ /* type describing required/allowed inputs */
+ enum xfs_scrub_type type;
};
/* Buffer pointers and btree cursors for an entire AG. */
@@ -112,4 +123,30 @@ xfs_scrub_quota(struct xfs_scrub_context *sc)
}
#endif
+/* cross-referencing helpers */
+void xfs_scrub_xref_is_used_space(struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno, xfs_extlen_t len);
+void xfs_scrub_xref_is_not_inode_chunk(struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno, xfs_extlen_t len);
+void xfs_scrub_xref_is_inode_chunk(struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno, xfs_extlen_t len);
+void xfs_scrub_xref_is_owned_by(struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno, xfs_extlen_t len,
+ struct xfs_owner_info *oinfo);
+void xfs_scrub_xref_is_not_owned_by(struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno, xfs_extlen_t len,
+ struct xfs_owner_info *oinfo);
+void xfs_scrub_xref_has_no_owner(struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno, xfs_extlen_t len);
+void xfs_scrub_xref_is_cow_staging(struct xfs_scrub_context *sc,
+ xfs_agblock_t bno, xfs_extlen_t len);
+void xfs_scrub_xref_is_not_shared(struct xfs_scrub_context *sc,
+ xfs_agblock_t bno, xfs_extlen_t len);
+#ifdef CONFIG_XFS_RT
+void xfs_scrub_xref_is_used_rt_space(struct xfs_scrub_context *sc,
+ xfs_rtblock_t rtbno, xfs_extlen_t len);
+#else
+# define xfs_scrub_xref_is_used_rt_space(sc, rtbno, len) do { } while (0)
+#endif
+
#endif /* __XFS_SCRUB_SCRUB_H__ */
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index c4ebfb5c1ee8..4dc896852bf0 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -50,7 +50,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_class,
__entry->flags = sm->sm_flags;
__entry->error = error;
),
- TP_printk("dev %d:%d ino %llu type %u agno %u inum %llu gen %u flags 0x%x error %d",
+ TP_printk("dev %d:%d ino 0x%llx type %u agno %u inum %llu gen %u flags 0x%x error %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->type,
@@ -90,7 +90,7 @@ TRACE_EVENT(xfs_scrub_op_error,
__entry->error = error;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d type %u agno %u agbno %u error %d ret_ip %pF",
+ TP_printk("dev %d:%d type %u agno %u agbno %u error %d ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->type,
__entry->agno,
@@ -121,7 +121,7 @@ TRACE_EVENT(xfs_scrub_file_op_error,
__entry->error = error;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu error %d ret_ip %pF",
+ TP_printk("dev %d:%d ino 0x%llx fork %d type %u offset %llu error %d ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->whichfork,
@@ -156,7 +156,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_block_error_class,
__entry->bno = bno;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d type %u agno %u agbno %u ret_ip %pF",
+ TP_printk("dev %d:%d type %u agno %u agbno %u ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->type,
__entry->agno,
@@ -207,7 +207,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_ino_error_class,
__entry->bno = bno;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d ino %llu type %u agno %u agbno %u ret_ip %pF",
+ TP_printk("dev %d:%d ino 0x%llx type %u agno %u agbno %u ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->type,
@@ -246,7 +246,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_fblock_error_class,
__entry->offset = offset;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu ret_ip %pF",
+ TP_printk("dev %d:%d ino 0x%llx fork %d type %u offset %llu ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->whichfork,
@@ -277,7 +277,7 @@ TRACE_EVENT(xfs_scrub_incomplete,
__entry->type = sc->sm->sm_type;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d type %u ret_ip %pF",
+ TP_printk("dev %d:%d type %u ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->type,
__entry->ret_ip)
@@ -311,7 +311,7 @@ TRACE_EVENT(xfs_scrub_btree_op_error,
__entry->error = error;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pF",
+ TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->type,
__entry->btnum,
@@ -354,7 +354,7 @@ TRACE_EVENT(xfs_scrub_ifork_btree_op_error,
__entry->error = error;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pF",
+ TP_printk("dev %d:%d ino 0x%llx fork %d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->whichfork,
@@ -393,7 +393,7 @@ TRACE_EVENT(xfs_scrub_btree_error,
__entry->ptr = cur->bc_ptrs[level];
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pF",
+ TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->type,
__entry->btnum,
@@ -433,7 +433,7 @@ TRACE_EVENT(xfs_scrub_ifork_btree_error,
__entry->ptr = cur->bc_ptrs[level];
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pF",
+ TP_printk("dev %d:%d ino 0x%llx fork %d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->whichfork,
@@ -491,6 +491,28 @@ DEFINE_EVENT(xfs_scrub_sbtree_class, name, \
DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_rec);
DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_key);
+TRACE_EVENT(xfs_scrub_xref_error,
+ TP_PROTO(struct xfs_scrub_context *sc, int error, void *ret_ip),
+ TP_ARGS(sc, error, ret_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int, type)
+ __field(int, error)
+ __field(void *, ret_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = sc->mp->m_super->s_dev;
+ __entry->type = sc->sm->sm_type;
+ __entry->error = error;
+ __entry->ret_ip = ret_ip;
+ ),
+ TP_printk("dev %d:%d type %u xref error %d ret_ip %pF",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->type,
+ __entry->error,
+ __entry->ret_ip)
+);
+
#endif /* _TRACE_XFS_SCRUB_TRACE_H */
#undef TRACE_INCLUDE_PATH