summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorNaohiro Aota <naohiro.aota@wdc.com>2021-04-08 17:25:28 +0900
committerDavid Sterba <dsterba@suse.com>2021-04-10 12:13:16 +0200
commit53b74fa990bf76f290aa5930abfcf37424a1a865 (patch)
tree71c5cb4776dd1543b0dc5c09a534d3f451c90714 /fs
parentc1d6abdac46ca8127274bea195d804e3f2cec7ee (diff)
btrfs: zoned: move superblock logging zone location
Moves the location of the superblock logging zones. The new locations of the logging zones are now determined based on fixed block addresses instead of on fixed zone numbers. The old placement method based on fixed zone numbers causes problems when one needs to inspect a file system image without access to the drive zone information. In such case, the super block locations cannot be reliably determined as the zone size is unknown. By locating the superblock logging zones using fixed addresses, we can scan a dumped file system image without the zone information since a super block copy will always be present at or after the fixed known locations. Introduce the following three pairs of zones containing fixed offset locations, regardless of the device zone size. - primary superblock: offset 0B (and the following zone) - first copy: offset 512G (and the following zone) - Second copy: offset 4T (4096G, and the following zone) If a logging zone is outside of the disk capacity, we do not record the superblock copy. The first copy position is much larger than for a non-zoned filesystem, which is at 64M. This is to avoid overlapping with the log zones for the primary superblock. This higher location is arbitrary but allows supporting devices with very large zone sizes, plus some space around in between. Such large zone size is unrealistic and very unlikely to ever be seen in real devices. Currently, SMR disks have a zone size of 256MB, and we are expecting ZNS drives to be in the 1-4GB range, so this limit gives us room to breathe. For now, we only allow zone sizes up to 8GB. The maximum zone size that would still fit in the space is 256G. The fixed location addresses are somewhat arbitrary, with the intent of maintaining superblock reliability for smaller and larger devices, with the preference for the latter. For this reason, there are two superblocks under the first 1T. This should cover use cases for physical devices and for emulated/device-mapper devices. The superblock logging zones are reserved for superblock logging and never used for data or metadata blocks. Note that we only reserve the two zones per primary/copy actually used for superblock logging. We do not reserve the ranges of zones possibly containing superblocks with the largest supported zone size (0-16GB, 512G-528GB, 4096G-4112G). The zones containing the fixed location offsets used to store superblocks on a non-zoned volume are also reserved to avoid confusion. Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/zoned.c53
1 files changed, 42 insertions, 11 deletions
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 43948bd40e02..ba7a303300a3 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -21,9 +21,30 @@
/* Pseudo write pointer value for conventional zone */
#define WP_CONVENTIONAL ((u64)-2)
+/*
+ * Location of the first zone of superblock logging zone pairs.
+ *
+ * - primary superblock: 0B (zone 0)
+ * - first copy: 512G (zone starting at that offset)
+ * - second copy: 4T (zone starting at that offset)
+ */
+#define BTRFS_SB_LOG_PRIMARY_OFFSET (0ULL)
+#define BTRFS_SB_LOG_FIRST_OFFSET (512ULL * SZ_1G)
+#define BTRFS_SB_LOG_SECOND_OFFSET (4096ULL * SZ_1G)
+
+#define BTRFS_SB_LOG_FIRST_SHIFT const_ilog2(BTRFS_SB_LOG_FIRST_OFFSET)
+#define BTRFS_SB_LOG_SECOND_SHIFT const_ilog2(BTRFS_SB_LOG_SECOND_OFFSET)
+
/* Number of superblock log zones */
#define BTRFS_NR_SB_LOG_ZONES 2
+/*
+ * Maximum supported zone size. Currently, SMR disks have a zone size of
+ * 256MiB, and we are expecting ZNS drives to be in the 1-4GiB range. We do not
+ * expect the zone size to become larger than 8GiB in the near future.
+ */
+#define BTRFS_MAX_ZONE_SIZE SZ_8G
+
static int copy_zone_info_cb(struct blk_zone *zone, unsigned int idx, void *data)
{
struct blk_zone *zones = data;
@@ -111,23 +132,22 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
}
/*
- * The following zones are reserved as the circular buffer on ZONED btrfs.
- * - The primary superblock: zones 0 and 1
- * - The first copy: zones 16 and 17
- * - The second copy: zones 1024 or zone at 256GB which is minimum, and
- * the following one
+ * Get the first zone number of the superblock mirror
*/
static inline u32 sb_zone_number(int shift, int mirror)
{
- ASSERT(mirror < BTRFS_SUPER_MIRROR_MAX);
+ u64 zone;
+ ASSERT(mirror < BTRFS_SUPER_MIRROR_MAX);
switch (mirror) {
- case 0: return 0;
- case 1: return 16;
- case 2: return min_t(u64, btrfs_sb_offset(mirror) >> shift, 1024);
+ case 0: zone = 0; break;
+ case 1: zone = 1ULL << (BTRFS_SB_LOG_FIRST_SHIFT - shift); break;
+ case 2: zone = 1ULL << (BTRFS_SB_LOG_SECOND_SHIFT - shift); break;
}
- return 0;
+ ASSERT(zone <= U32_MAX);
+
+ return (u32)zone;
}
/*
@@ -300,10 +320,21 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
zone_sectors = bdev_zone_sectors(bdev);
}
- nr_sectors = bdev_nr_sectors(bdev);
/* Check if it's power of 2 (see is_power_of_2) */
ASSERT(zone_sectors != 0 && (zone_sectors & (zone_sectors - 1)) == 0);
zone_info->zone_size = zone_sectors << SECTOR_SHIFT;
+
+ /* We reject devices with a zone size larger than 8GB */
+ if (zone_info->zone_size > BTRFS_MAX_ZONE_SIZE) {
+ btrfs_err_in_rcu(fs_info,
+ "zoned: %s: zone size %llu larger than supported maximum %llu",
+ rcu_str_deref(device->name),
+ zone_info->zone_size, BTRFS_MAX_ZONE_SIZE);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ nr_sectors = bdev_nr_sectors(bdev);
zone_info->zone_size_shift = ilog2(zone_info->zone_size);
zone_info->max_zone_append_size =
(u64)queue_max_zone_append_sectors(queue) << SECTOR_SHIFT;