summaryrefslogtreecommitdiff
path: root/fs/xfs/libxfs/xfs_btree.h
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2016-08-03 11:08:36 +1000
committerDave Chinner <david@fromorbit.com>2016-08-03 11:08:36 +1000
commit2c813ad66a7218a64db68f0a4bfa8d2d9caef4c0 (patch)
treedb611817eabb9b7553a4613932730cd6144b3f9d /fs/xfs/libxfs/xfs_btree.h
parent70b2265935544c2ba64619172fd757bd0ca91800 (diff)
xfs: support btrees with overlapping intervals for keys
On a filesystem with both reflink and reverse mapping enabled, it's possible to have multiple rmap records referring to the same blocks on disk. When overlapping intervals are possible, querying a classic btree to find all records intersecting a given interval is inefficient because we cannot use the left side of the search interval to filter out non-matching records the same way that we can use the existing btree key to filter out records coming after the right side of the search interval. This will become important once we want to use the rmap btree to rebuild BMBTs, or implement the (future) fsmap ioctl. (For the non-overlapping case, we can perform such queries trivially by starting at the left side of the interval and walking the tree until we pass the right side.) Therefore, extend the btree code to come closer to supporting intervals as a first-class record attribute. This involves widening the btree node's key space to store both the lowest key reachable via the node pointer (as the btree does now) and the highest key reachable via the same pointer and teaching the btree modifying functions to keep the highest-key records up to date. This behavior can be turned on via a new btree ops flag so that btrees that cannot store overlapping intervals don't pay the overhead costs in terms of extra code and disk format changes. When we're deleting a record in a btree that supports overlapped interval records and the deletion results in two btree blocks being joined, we defer updating the high/low keys until after all possible joining (at higher levels in the tree) have finished. At this point, the btree pointers at all levels have been updated to remove the empty blocks and we can update the low and high keys. When we're doing this, we must be careful to update the keys of all node pointers up to the root instead of stopping at the first set of keys that don't need updating. This is because it's possible for a single deletion to cause joining of multiple levels of tree, and so we need to update everything going back to the root. The diff_two_keys functions return < 0, 0, or > 0 if key1 is less than, equal to, or greater than key2, respectively. This is consistent with the rest of the kernel and the C library. In btree_updkeys(), we need to evaluate the force_all parameter before running the key diff to avoid reading uninitialized memory when we're forcing a key update. This happens when we've allocated an empty slot at level N + 1 to point to a new block at level N and we're in the process of filling out the new keys. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs/libxfs/xfs_btree.h')
-rw-r--r--fs/xfs/libxfs/xfs_btree.h30
1 files changed, 30 insertions, 0 deletions
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index e097e60400d8..bce6daacb1f7 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -44,6 +44,20 @@ union xfs_btree_key {
xfs_inobt_key_t inobt;
};
+/*
+ * In-core key that holds both low and high keys for overlapped btrees.
+ * The two keys are packed next to each other on disk, so do the same
+ * in memory. Preserve the existing xfs_btree_key as a single key to
+ * avoid the mental model breakage that would happen if we passed a
+ * bigkey into a function that operates on a single key.
+ */
+union xfs_btree_bigkey {
+ struct xfs_bmbt_key bmbt;
+ xfs_bmdr_key_t bmbr; /* bmbt root block */
+ xfs_alloc_key_t alloc;
+ struct xfs_inobt_key inobt;
+};
+
union xfs_btree_rec {
xfs_bmbt_rec_t bmbt;
xfs_bmdr_rec_t bmbr; /* bmbt root block */
@@ -162,11 +176,21 @@ struct xfs_btree_ops {
union xfs_btree_rec *rec);
void (*init_ptr_from_cur)(struct xfs_btree_cur *cur,
union xfs_btree_ptr *ptr);
+ void (*init_high_key_from_rec)(union xfs_btree_key *key,
+ union xfs_btree_rec *rec);
/* difference between key value and cursor value */
__int64_t (*key_diff)(struct xfs_btree_cur *cur,
union xfs_btree_key *key);
+ /*
+ * Difference between key2 and key1 -- positive if key1 > key2,
+ * negative if key1 < key2, and zero if equal.
+ */
+ __int64_t (*diff_two_keys)(struct xfs_btree_cur *cur,
+ union xfs_btree_key *key1,
+ union xfs_btree_key *key2);
+
const struct xfs_buf_ops *buf_ops;
#if defined(DEBUG) || defined(XFS_WARN)
@@ -249,6 +273,7 @@ typedef struct xfs_btree_cur
#define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */
#define XFS_BTREE_LASTREC_UPDATE (1<<2) /* track last rec externally */
#define XFS_BTREE_CRC_BLOCKS (1<<3) /* uses extended btree blocks */
+#define XFS_BTREE_OVERLAPPING (1<<4) /* overlapping intervals */
#define XFS_BTREE_NOERROR 0
@@ -493,5 +518,10 @@ void xfs_btree_get_leaf_keys(struct xfs_btree_cur *cur,
void xfs_btree_get_node_keys(struct xfs_btree_cur *cur,
struct xfs_btree_block *block, union xfs_btree_key *key);
int xfs_btree_update_keys(struct xfs_btree_cur *cur, int level);
+void xfs_btree_get_leaf_keys_overlapped(struct xfs_btree_cur *cur,
+ struct xfs_btree_block *block, union xfs_btree_key *key);
+void xfs_btree_get_node_keys_overlapped(struct xfs_btree_cur *cur,
+ struct xfs_btree_block *block, union xfs_btree_key *key);
+int xfs_btree_update_keys_overlapped(struct xfs_btree_cur *cur, int level);
#endif /* __XFS_BTREE_H__ */