diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-05 15:45:03 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-05 15:45:03 -0700 |
commit | b25c6644bfd3affd7d0127ce95c5c96c155a7515 (patch) | |
tree | 9ef9c0fe74a08b7baf3a3c3753368b0f481b581f /drivers/md/dm-zoned-metadata.c | |
parent | 818dbde78e0f4f11c9f804c36913a7ccfc2e87ad (diff) | |
parent | 64611a15ca9da91ff532982429c44686f4593b5f (diff) |
Merge tag 'for-5.8/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer:
- The largest change for this cycle is the DM zoned target's metadata
version 2 feature that adds support for pairing regular block devices
with a zoned device to ease the performance impact associated with
finite random zones of zoned device.
The changes came in three batches: the first prepared for and then
added the ability to pair a single regular block device, the second
was a batch of fixes to improve zoned's reclaim heuristic, and the
third removed the limitation of only adding a single additional
regular block device to allow many devices.
Testing has shown linear scaling as more devices are added.
- Add new emulated block size (ebs) target that emulates a smaller
logical_block_size than a block device supports
The primary use-case is to emulate "512e" devices that have 512 byte
logical_block_size and 4KB physical_block_size. This is useful to
some legacy applications that otherwise wouldn't be able to be used
on 4K devices because they depend on issuing IO in 512 byte
granularity.
- Add discard interfaces to DM bufio. First consumer of the interface
is the dm-ebs target that makes heavy use of dm-bufio.
- Fix DM crypt's block queue_limits stacking to not truncate
logic_block_size.
- Add Documentation for DM integrity's status line.
- Switch DMDEBUG from a compile time config option to instead use
dynamic debug via pr_debug.
- Fix DM multipath target's hueristic for how it manages
"queue_if_no_path" state internally.
DM multipath now avoids disabling "queue_if_no_path" unless it is
actually needed (e.g. in response to configure timeout or explicit
"fail_if_no_path" message).
This fixes reports of spurious -EIO being reported back to userspace
application during fault tolerance testing with an NVMe backend.
Added various dynamic DMDEBUG messages to assist with debugging
queue_if_no_path in the future.
- Add a new DM multipath "Historical Service Time" Path Selector.
- Fix DM multipath's dm_blk_ioctl() to switch paths on IO error.
- Improve DM writecache target performance by using explicit cache
flushing for target's single-threaded usecase and a small cleanup to
remove unnecessary test in persistent_memory_claim.
- Other small cleanups in DM core, dm-persistent-data, and DM
integrity.
* tag 'for-5.8/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (62 commits)
dm crypt: avoid truncating the logical block size
dm mpath: add DM device name to Failing/Reinstating path log messages
dm mpath: enhance queue_if_no_path debugging
dm mpath: restrict queue_if_no_path state machine
dm mpath: simplify __must_push_back
dm zoned: check superblock location
dm zoned: prefer full zones for reclaim
dm zoned: select reclaim zone based on device index
dm zoned: allocate zone by device index
dm zoned: support arbitrary number of devices
dm zoned: move random and sequential zones into struct dmz_dev
dm zoned: per-device reclaim
dm zoned: add metadata pointer to struct dmz_dev
dm zoned: add device pointer to struct dm_zone
dm zoned: allocate temporary superblock for tertiary devices
dm zoned: convert to xarray
dm zoned: add a 'reserved' zone flag
dm zoned: improve logging messages for reclaim
dm zoned: avoid unnecessary device recalulation for secondary superblock
dm zoned: add debugging message for reading superblocks
...
Diffstat (limited to 'drivers/md/dm-zoned-metadata.c')
-rw-r--r-- | drivers/md/dm-zoned-metadata.c | 1046 |
1 files changed, 771 insertions, 275 deletions
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index bf2245370305..130b5a6d9f12 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -16,7 +16,7 @@ /* * Metadata version. */ -#define DMZ_META_VER 1 +#define DMZ_META_VER 2 /* * On-disk super block magic. @@ -69,8 +69,17 @@ struct dmz_super { /* Checksum */ __le32 crc; /* 48 */ + /* DM-Zoned label */ + u8 dmz_label[32]; /* 80 */ + + /* DM-Zoned UUID */ + u8 dmz_uuid[16]; /* 96 */ + + /* Device UUID */ + u8 dev_uuid[16]; /* 112 */ + /* Padding to full 512B sector */ - u8 reserved[464]; /* 512 */ + u8 reserved[400]; /* 512 */ }; /* @@ -122,8 +131,10 @@ enum { */ struct dmz_sb { sector_t block; + struct dmz_dev *dev; struct dmz_mblock *mblk; struct dmz_super *sb; + struct dm_zone *zone; }; /* @@ -131,28 +142,41 @@ struct dmz_sb { */ struct dmz_metadata { struct dmz_dev *dev; + unsigned int nr_devs; + + char devname[BDEVNAME_SIZE]; + char label[BDEVNAME_SIZE]; + uuid_t uuid; sector_t zone_bitmap_size; unsigned int zone_nr_bitmap_blocks; unsigned int zone_bits_per_mblk; + sector_t zone_nr_blocks; + sector_t zone_nr_blocks_shift; + + sector_t zone_nr_sectors; + sector_t zone_nr_sectors_shift; + unsigned int nr_bitmap_blocks; unsigned int nr_map_blocks; + unsigned int nr_zones; unsigned int nr_useable_zones; unsigned int nr_meta_blocks; unsigned int nr_meta_zones; unsigned int nr_data_zones; + unsigned int nr_cache_zones; unsigned int nr_rnd_zones; unsigned int nr_reserved_seq; unsigned int nr_chunks; /* Zone information array */ - struct dm_zone *zones; + struct xarray zones; - struct dm_zone *sb_zone; struct dmz_sb sb[2]; unsigned int mblk_primary; + unsigned int sb_version; u64 sb_gen; unsigned int min_nr_mblks; unsigned int max_nr_mblks; @@ -168,15 +192,11 @@ struct dmz_metadata { /* Zone allocation management */ struct mutex map_lock; struct dmz_mblock **map_mblk; - unsigned int nr_rnd; - atomic_t unmap_nr_rnd; - struct list_head unmap_rnd_list; - struct list_head map_rnd_list; - unsigned int nr_seq; - atomic_t unmap_nr_seq; - struct list_head unmap_seq_list; - struct list_head map_seq_list; + unsigned int nr_cache; + atomic_t unmap_nr_cache; + struct list_head unmap_cache_list; + struct list_head map_cache_list; atomic_t nr_reserved_seq_zones; struct list_head reserved_seq_zones_list; @@ -184,22 +204,65 @@ struct dmz_metadata { wait_queue_head_t free_wq; }; +#define dmz_zmd_info(zmd, format, args...) \ + DMINFO("(%s): " format, (zmd)->label, ## args) + +#define dmz_zmd_err(zmd, format, args...) \ + DMERR("(%s): " format, (zmd)->label, ## args) + +#define dmz_zmd_warn(zmd, format, args...) \ + DMWARN("(%s): " format, (zmd)->label, ## args) + +#define dmz_zmd_debug(zmd, format, args...) \ + DMDEBUG("(%s): " format, (zmd)->label, ## args) /* * Various accessors */ -unsigned int dmz_id(struct dmz_metadata *zmd, struct dm_zone *zone) +static unsigned int dmz_dev_zone_id(struct dmz_metadata *zmd, struct dm_zone *zone) { - return ((unsigned int)(zone - zmd->zones)); + if (WARN_ON(!zone)) + return 0; + + return zone->id - zone->dev->zone_offset; } sector_t dmz_start_sect(struct dmz_metadata *zmd, struct dm_zone *zone) { - return (sector_t)dmz_id(zmd, zone) << zmd->dev->zone_nr_sectors_shift; + unsigned int zone_id = dmz_dev_zone_id(zmd, zone); + + return (sector_t)zone_id << zmd->zone_nr_sectors_shift; } sector_t dmz_start_block(struct dmz_metadata *zmd, struct dm_zone *zone) { - return (sector_t)dmz_id(zmd, zone) << zmd->dev->zone_nr_blocks_shift; + unsigned int zone_id = dmz_dev_zone_id(zmd, zone); + + return (sector_t)zone_id << zmd->zone_nr_blocks_shift; +} + +unsigned int dmz_zone_nr_blocks(struct dmz_metadata *zmd) +{ + return zmd->zone_nr_blocks; +} + +unsigned int dmz_zone_nr_blocks_shift(struct dmz_metadata *zmd) +{ + return zmd->zone_nr_blocks_shift; +} + +unsigned int dmz_zone_nr_sectors(struct dmz_metadata *zmd) +{ + return zmd->zone_nr_sectors; +} + +unsigned int dmz_zone_nr_sectors_shift(struct dmz_metadata *zmd) +{ + return zmd->zone_nr_sectors_shift; +} + +unsigned int dmz_nr_zones(struct dmz_metadata *zmd) +{ + return zmd->nr_zones; } unsigned int dmz_nr_chunks(struct dmz_metadata *zmd) @@ -207,14 +270,88 @@ unsigned int dmz_nr_chunks(struct dmz_metadata *zmd) return zmd->nr_chunks; } -unsigned int dmz_nr_rnd_zones(struct dmz_metadata *zmd) +unsigned int dmz_nr_rnd_zones(struct dmz_metadata *zmd, int idx) { - return zmd->nr_rnd; + return zmd->dev[idx].nr_rnd; } -unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd) +unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd, int idx) { - return atomic_read(&zmd->unmap_nr_rnd); + return atomic_read(&zmd->dev[idx].unmap_nr_rnd); +} + +unsigned int dmz_nr_cache_zones(struct dmz_metadata *zmd) +{ + return zmd->nr_cache; +} + +unsigned int dmz_nr_unmap_cache_zones(struct dmz_metadata *zmd) +{ + return atomic_read(&zmd->unmap_nr_cache); +} + +unsigned int dmz_nr_seq_zones(struct dmz_metadata *zmd, int idx) +{ + return zmd->dev[idx].nr_seq; +} + +unsigned int dmz_nr_unmap_seq_zones(struct dmz_metadata *zmd, int idx) +{ + return atomic_read(&zmd->dev[idx].unmap_nr_seq); +} + +static struct dm_zone *dmz_get(struct dmz_metadata *zmd, unsigned int zone_id) +{ + return xa_load(&zmd->zones, zone_id); +} + +static struct dm_zone *dmz_insert(struct dmz_metadata *zmd, + unsigned int zone_id, struct dmz_dev *dev) +{ + struct dm_zone *zone = kzalloc(sizeof(struct dm_zone), GFP_KERNEL); + + if (!zone) + return ERR_PTR(-ENOMEM); + + if (xa_insert(&zmd->zones, zone_id, zone, GFP_KERNEL)) { + kfree(zone); + return ERR_PTR(-EBUSY); + } + + INIT_LIST_HEAD(&zone->link); + atomic_set(&zone->refcount, 0); + zone->id = zone_id; + zone->chunk = DMZ_MAP_UNMAPPED; + zone->dev = dev; + + return zone; +} + +const char *dmz_metadata_label(struct dmz_metadata *zmd) +{ + return (const char *)zmd->label; +} + +bool dmz_check_dev(struct dmz_metadata *zmd) +{ + unsigned int i; + + for (i = 0; i < zmd->nr_devs; i++) { + if (!dmz_check_bdev(&zmd->dev[i])) + return false; + } + return true; +} + +bool dmz_dev_is_dying(struct dmz_metadata *zmd) +{ + unsigned int i; + + for (i = 0; i < zmd->nr_devs; i++) { + if (dmz_bdev_is_dying(&zmd->dev[i])) + return true; + } + return false; } /* @@ -402,9 +539,10 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd, { struct dmz_mblock *mblk, *m; sector_t block = zmd->sb[zmd->mblk_primary].block + mblk_no; + struct dmz_dev *dev = zmd->sb[zmd->mblk_primary].dev; struct bio *bio; - if (dmz_bdev_is_dying(zmd->dev)) + if (dmz_bdev_is_dying(dev)) return ERR_PTR(-EIO); /* Get a new block and a BIO to read it */ @@ -440,7 +578,7 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd, /* Submit read BIO */ bio->bi_iter.bi_sector = dmz_blk2sect(block); - bio_set_dev(bio, zmd->dev->bdev); + bio_set_dev(bio, dev->bdev); bio->bi_private = mblk; bio->bi_end_io = dmz_mblock_bio_end_io; bio_set_op_attrs(bio, REQ_OP_READ, REQ_META | REQ_PRIO); @@ -537,6 +675,7 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd, sector_t mblk_no) { struct dmz_mblock *mblk; + struct dmz_dev *dev = zmd->sb[zmd->mblk_primary].dev; /* Check rbtree */ spin_lock(&zmd->mblk_lock); @@ -555,7 +694,7 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd, TASK_UNINTERRUPTIBLE); if (test_bit(DMZ_META_ERROR, &mblk->state)) { dmz_release_mblock(zmd, mblk); - dmz_check_bdev(zmd->dev); + dmz_check_bdev(dev); return ERR_PTR(-EIO); } @@ -579,10 +718,11 @@ static void dmz_dirty_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk) static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, unsigned int set) { + struct dmz_dev *dev = zmd->sb[set].dev; sector_t block = zmd->sb[set].block + mblk->no; struct bio *bio; - if (dmz_bdev_is_dying(zmd->dev)) + if (dmz_bdev_is_dying(dev)) return -EIO; bio = bio_alloc(GFP_NOIO, 1); @@ -594,7 +734,7 @@ static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, set_bit(DMZ_META_WRITING, &mblk->state); bio->bi_iter.bi_sector = dmz_blk2sect(block); - bio_set_dev(bio, zmd->dev->bdev); + bio_set_dev(bio, dev->bdev); bio->bi_private = mblk; bio->bi_end_io = dmz_mblock_bio_end_io; bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_META | REQ_PRIO); @@ -607,13 +747,16 @@ static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, /* * Read/write a metadata block. */ -static int dmz_rdwr_block(struct dmz_metadata *zmd, int op, sector_t block, - struct page *page) +static int dmz_rdwr_block(struct dmz_dev *dev, int op, + sector_t block, struct page *page) { struct bio *bio; int ret; - if (dmz_bdev_is_dying(zmd->dev)) + if (WARN_ON(!dev)) + return -EIO; + + if (dmz_bdev_is_dying(dev)) return -EIO; bio = bio_alloc(GFP_NOIO, 1); @@ -621,14 +764,14 @@ static int dmz_rdwr_block(struct dmz_metadata *zmd, int op, sector_t block, return -ENOMEM; bio->bi_iter.bi_sector = dmz_blk2sect(block); - bio_set_dev(bio, zmd->dev->bdev); + bio_set_dev(bio, dev->bdev); bio_set_op_attrs(bio, op, REQ_SYNC | REQ_META | REQ_PRIO); bio_add_page(bio, page, DMZ_BLOCK_SIZE, 0); ret = submit_bio_wait(bio); bio_put(bio); if (ret) - dmz_check_bdev(zmd->dev); + dmz_check_bdev(dev); return ret; } @@ -637,18 +780,32 @@ static int dmz_rdwr_block(struct dmz_metadata *zmd, int op, sector_t block, */ static int dmz_write_sb(struct dmz_metadata *zmd, unsigned int set) { - sector_t block = zmd->sb[set].block; struct dmz_mblock *mblk = zmd->sb[set].mblk; struct dmz_super *sb = zmd->sb[set].sb; + struct dmz_dev *dev = zmd->sb[set].dev; + sector_t sb_block; u64 sb_gen = zmd->sb_gen + 1; int ret; sb->magic = cpu_to_le32(DMZ_MAGIC); - sb->version = cpu_to_le32(DMZ_META_VER); + + sb->version = cpu_to_le32(zmd->sb_version); + if (zmd->sb_version > 1) { + BUILD_BUG_ON(UUID_SIZE != 16); + export_uuid(sb->dmz_uuid, &zmd->uuid); + memcpy(sb->dmz_label, zmd->label, BDEVNAME_SIZE); + export_uuid(sb->dev_uuid, &dev->uuid); + } sb->gen = cpu_to_le64(sb_gen); - sb->sb_block = cpu_to_le64(block); + /* + * The metadata always references the absolute block address, + * ie relative to the entire block range, not the per-device + * block address. + */ + sb_block = zmd->sb[set].zone->id << zmd->zone_nr_blocks_shift; + sb->sb_block = cpu_to_le64(sb_block); sb->nr_meta_blocks = cpu_to_le32(zmd->nr_meta_blocks); sb->nr_reserved_seq = cpu_to_le32(zmd->nr_reserved_seq); sb->nr_chunks = cpu_to_le32(zmd->nr_chunks); @@ -659,9 +816,10 @@ static int dmz_write_sb(struct dmz_metadata *zmd, unsigned int set) sb->crc = 0; sb->crc = cpu_to_le32(crc32_le(sb_gen, (unsigned char *)sb, DMZ_BLOCK_SIZE)); - ret = dmz_rdwr_block(zmd, REQ_OP_WRITE, block, mblk->page); + ret = dmz_rdwr_block(dev, REQ_OP_WRITE, zmd->sb[set].block, + mblk->page); if (ret == 0) - ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO); + ret = blkdev_issue_flush(dev->bdev, GFP_NOIO); return ret; } @@ -674,6 +832,7 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd, unsigned int set) { struct dmz_mblock *mblk; + struct dmz_dev *dev = zmd->sb[set].dev; struct blk_plug plug; int ret = 0, nr_mblks_submitted = 0; @@ -695,7 +854,7 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd, TASK_UNINTERRUPTIBLE); if (test_bit(DMZ_META_ERROR, &mblk->state)) { clear_bit(DMZ_META_ERROR, &mblk->state); - dmz_check_bdev(zmd->dev); + dmz_check_bdev(dev); ret = -EIO; } nr_mblks_submitted--; @@ -703,7 +862,7 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd, /* Flush drive cache (this will also sync data) */ if (ret == 0) - ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO); + ret = blkdev_issue_flush(dev->bdev, GFP_NOIO); return ret; } @@ -740,6 +899,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) { struct dmz_mblock *mblk; struct list_head write_list; + struct dmz_dev *dev; int ret; if (WARN_ON(!zmd)) @@ -753,6 +913,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) * from modifying metadata. */ down_write(&zmd->mblk_sem); + dev = zmd->sb[zmd->mblk_primary].dev; /* * This is called from the target flush work and reclaim work. @@ -760,7 +921,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) */ dmz_lock_flush(zmd); - if (dmz_bdev_is_dying(zmd->dev)) { + if (dmz_bdev_is_dying(dev)) { ret = -EIO; goto out; } @@ -772,7 +933,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) /* If there are no dirty metadata blocks, just flush the device cache */ if (list_empty(&write_list)) { - ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO); + ret = blkdev_issue_flush(dev->bdev, GFP_NOIO); goto err; } @@ -821,7 +982,7 @@ err: list_splice(&write_list, &zmd->mblk_dirty_list); spin_unlock(&zmd->mblk_lock); } - if (!dmz_check_bdev(zmd->dev)) + if (!dmz_check_bdev(dev)) ret = -EIO; goto out; } @@ -829,12 +990,31 @@ err: /* * Check super block. */ -static int dmz_check_sb(struct dmz_metadata *zmd, struct dmz_super *sb) +static int dmz_check_sb(struct dmz_metadata *zmd, struct dmz_sb *dsb, + bool tertiary) { + struct dmz_super *sb = dsb->sb; + struct dmz_dev *dev = dsb->dev; unsigned int nr_meta_zones, nr_data_zones; - struct dmz_dev *dev = zmd->dev; u32 crc, stored_crc; - u64 gen; + u64 gen, sb_block; + + if (le32_to_cpu(sb->magic) != DMZ_MAGIC) { + dmz_dev_err(dev, "Invalid meta magic (needed 0x%08x, got 0x%08x)", + DMZ_MAGIC, le32_to_cpu(sb->magic)); + return -ENXIO; + } + + zmd->sb_version = le32_to_cpu(sb->version); + if (zmd->sb_version > DMZ_META_VER) { + dmz_dev_err(dev, "Invalid meta version (needed %d, got %d)", + DMZ_META_VER, zmd->sb_version); + return -EINVAL; + } + if (zmd->sb_version < 2 && tertiary) { + dmz_dev_err(dev, "Tertiary superblocks are not supported"); + return -EINVAL; + } gen = le64_to_cpu(sb->gen); stored_crc = le32_to_cpu(sb->crc); @@ -846,20 +1026,57 @@ static int dmz_check_sb(struct dmz_metadata *zmd, struct dmz_super *sb) return -ENXIO; } - if (le32_to_cpu(sb->magic) != DMZ_MAGIC) { - dmz_dev_err(dev, "Invalid meta magic (needed 0x%08x, got 0x%08x)", - DMZ_MAGIC, le32_to_cpu(sb->magic)); - return -ENXIO; - } + sb_block = le64_to_cpu(sb->sb_block); + if (sb_block != (u64)dsb->zone->id << zmd->zone_nr_blocks_shift ) { + dmz_dev_err(dev, "Invalid superblock position " + "(is %llu expected %llu)", + sb_block, + (u64)dsb->zone->id << zmd->zone_nr_blocks_shift); + return -EINVAL; + } + if (zmd->sb_version > 1) { + uuid_t sb_uuid; + + import_uuid(&sb_uuid, sb->dmz_uuid); + if (uuid_is_null(&sb_uuid)) { + dmz_dev_err(dev, "NULL DM-Zoned uuid"); + return -ENXIO; + } else if (uuid_is_null(&zmd->uuid)) { + uuid_copy(&zmd->uuid, &sb_uuid); + } else if (!uuid_equal(&zmd->uuid, &sb_uuid)) { + dmz_dev_err(dev, "mismatching DM-Zoned uuid, " + "is %pUl expected %pUl", + &sb_uuid, &zmd->uuid); + return -ENXIO; + } + if (!strlen(zmd->label)) + memcpy(zmd->label, sb->dmz_label, BDEVNAME_SIZE); + else if (memcmp(zmd->label, sb->dmz_label, BDEVNAME_SIZE)) { + dmz_dev_err(dev, "mismatching DM-Zoned label, " + "is %s expected %s", + sb->dmz_label, zmd->label); + return -ENXIO; + } + import_uuid(&dev->uuid, sb->dev_uuid); + if (uuid_is_null(&dev->uuid)) { + dmz_dev_err(dev, "NULL device uuid"); + return -ENXIO; + } - if (le32_to_cpu(sb->version) != DMZ_META_VER) { - dmz_dev_err(dev, "Invalid meta version (needed %d, got %d)", - DMZ_META_VER, le32_to_cpu(sb->version)); - return -ENXIO; + if (tertiary) { + /* + * Generation number should be 0, but it doesn't + * really matter if it isn't. + */ + if (gen != 0) + dmz_dev_warn(dev, "Invalid generation %llu", + gen); + return 0; + } } - nr_meta_zones = (le32_to_cpu(sb->nr_meta_blocks) + dev->zone_nr_blocks - 1) - >> dev->zone_nr_blocks_shift; + nr_meta_zones = (le32_to_cpu(sb->nr_meta_blocks) + zmd->zone_nr_blocks - 1) + >> zmd->zone_nr_blocks_shift; if (!nr_meta_zones || nr_meta_zones >= zmd->nr_rnd_zones) { dmz_dev_err(dev, "Invalid number of metadata blocks"); @@ -895,10 +1112,13 @@ static int dmz_check_sb(struct dmz_metadata *zmd, struct dmz_super *sb) /* * Read the first or second super block from disk. */ -static int dmz_read_sb(struct dmz_metadata *zmd, unsigned int set) +static int dmz_read_sb(struct dmz_metadata *zmd, struct dmz_sb *sb, int set) { - return dmz_rdwr_block(zmd, REQ_OP_READ, zmd->sb[set].block, - zmd->sb[set].mblk->page); + dmz_zmd_debug(zmd, "read superblock set %d dev %s block %llu", + set, sb->dev->name, sb->block); + + return dmz_rdwr_block(sb->dev, REQ_OP_READ, + sb->block, sb->mblk->page); } /* @@ -908,8 +1128,9 @@ static int dmz_read_sb(struct dmz_metadata *zmd, unsigned int set) */ static int dmz_lookup_secondary_sb(struct dmz_metadata *zmd) { - unsigned int zone_nr_blocks = zmd->dev->zone_nr_blocks; + unsigned int zone_nr_blocks = zmd->zone_nr_blocks; struct dmz_mblock *mblk; + unsigned int zone_id = zmd->sb[0].zone->id; int i; /* Allocate a block */ @@ -922,24 +1143,29 @@ static int dmz_lookup_secondary_sb(struct dmz_metadata *zmd) /* Bad first super block: search for the second one */ zmd->sb[1].block = zmd->sb[0].block + zone_nr_blocks; - for (i = 0; i < zmd->nr_rnd_zones - 1; i++) { - if (dmz_read_sb(zmd, 1) != 0) + zmd->sb[1].zone = dmz_get(zmd, zone_id + 1); + zmd->sb[1].dev = zmd->sb[0].dev; + for (i = 1; i < zmd->nr_rnd_zones; i++) { + if (dmz_read_sb(zmd, &zmd->sb[1], 1) != 0) break; if (le32_to_cpu(zmd->sb[1].sb->magic) == DMZ_MAGIC) return 0; zmd->sb[1].block += zone_nr_blocks; + zmd->sb[1].zone = dmz_get(zmd, zone_id + i); } dmz_free_mblock(zmd, mblk); zmd->sb[1].mblk = NULL; + zmd->sb[1].zone = NULL; + zmd->sb[1].dev = NULL; return -EIO; } /* - * Read the first or second super block from disk. + * Read a super block from disk. */ -static int dmz_get_sb(struct dmz_metadata *zmd, unsigned int set) +static int dmz_get_sb(struct dmz_metadata *zmd, struct dmz_sb *sb, int set) { struct dmz_mblock *mblk; int ret; @@ -949,14 +1175,14 @@ static int dmz_get_sb(struct dmz_metadata *zmd, unsigned int set) if (!mblk) return -ENOMEM; - zmd->sb[set].mblk = mblk; - zmd->sb[set].sb = mblk->data; + sb->mblk = mblk; + sb->sb = mblk->data; /* Read super block */ - ret = dmz_read_sb(zmd, set); + ret = dmz_read_sb(zmd, sb, set); if (ret) { dmz_free_mblock(zmd, mblk); - zmd->sb[set].mblk = NULL; + sb->mblk = NULL; return ret; } @@ -972,14 +1198,13 @@ static int dmz_recover_mblocks(struct dmz_metadata *zmd, unsigned int dst_set) struct page *page; int i, ret; - dmz_dev_warn(zmd->dev, "Metadata set %u invalid: recovering", dst_set); + dmz_dev_warn(zmd->sb[dst_set].dev, + "Metadata set %u invalid: recovering", dst_set); if (dst_set == 0) - zmd->sb[0].block = dmz_start_block(zmd, zmd->sb_zone); - else { - zmd->sb[1].block = zmd->sb[0].block + - (zmd->nr_meta_zones << zmd->dev->zone_nr_blocks_shift); - } + zmd->sb[0].block = dmz_start_block(zmd, zmd->sb[0].zone); + else + zmd->sb[1].block = dmz_start_block(zmd, zmd->sb[1].zone); page = alloc_page(GFP_NOIO); if (!page) @@ -987,11 +1212,11 @@ static int dmz_recover_mblocks(struct dmz_metadata *zmd, unsigned int dst_set) /* Copy metadata blocks */ for (i = 1; i < zmd->nr_meta_blocks; i++) { - ret = dmz_rdwr_block(zmd, REQ_OP_READ, + ret = dmz_rdwr_block(zmd->sb[src_set].dev, REQ_OP_READ, zmd->sb[src_set].block + i, page); if (ret) goto out; - ret = dmz_rdwr_block(zmd, REQ_OP_WRITE, + ret = dmz_rdwr_block(zmd->sb[dst_set].dev, REQ_OP_WRITE, zmd->sb[dst_set].block + i, page); if (ret) goto out; @@ -1023,53 +1248,73 @@ static int dmz_load_sb(struct dmz_metadata *zmd) u64 sb_gen[2] = {0, 0}; int ret; + if (!zmd->sb[0].zone) { + dmz_zmd_err(zmd, "Primary super block zone not set"); + return -ENXIO; + } + /* Read and check the primary super block */ - zmd->sb[0].block = dmz_start_block(zmd, zmd->sb_zone); - ret = dmz_get_sb(zmd, 0); + zmd->sb[0].block = dmz_start_block(zmd, zmd->sb[0].zone); + zmd->sb[0].dev = zmd->sb[0].zone->dev; + ret = dmz_get_sb(zmd, &zmd->sb[0], 0); if (ret) { - dmz_dev_err(zmd->dev, "Read primary super block failed"); + dmz_dev_err(zmd->sb[0].dev, "Read primary super block failed"); return ret; } - ret = dmz_check_sb(zmd, zmd->sb[0].sb); + ret = dmz_check_sb(zmd, &zmd->sb[0], false); /* Read and check secondary super block */ if (ret == 0) { sb_good[0] = true; - zmd->sb[1].block = zmd->sb[0].block + - (zmd->nr_meta_zones << zmd->dev->zone_nr_blocks_shift); - ret = dmz_get_sb(zmd, 1); + if (!zmd->sb[1].zone) { + unsigned int zone_id = + zmd->sb[0].zone->id + zmd->nr_meta_zones; + + zmd->sb[1].zone = dmz_get(zmd, zone_id); + } + zmd->sb[1].block = dmz_start_block(zmd, zmd->sb[1].zone); + zmd->sb[1].dev = zmd->sb[0].dev; + ret = dmz_get_sb(zmd, &zmd->sb[1], 1); } else ret = dmz_lookup_secondary_sb(zmd); if (ret) { - dmz_dev_err(zmd->dev, "Read secondary super block failed"); + dmz_dev_err(zmd->sb[1].dev, "Read secondary super block failed"); return ret; } - ret = dmz_check_sb(zmd, zmd->sb[1].sb); + ret = dmz_check_sb(zmd, &zmd->sb[1], false); if (ret == 0) sb_good[1] = true; /* Use highest generation sb first */ if (!sb_good[0] && !sb_good[1]) { - dmz_dev_err(zmd->dev, "No valid super block found"); + dmz_zmd_err(zmd, "No valid super block found"); return -EIO; } if (sb_good[0]) sb_gen[0] = le64_to_cpu(zmd->sb[0].sb->gen); - else + else { ret = dmz_recover_mblocks(zmd, 0); + if (ret) { + dmz_dev_err(zmd->sb[0].dev, + "Recovery of superblock 0 failed"); + return -EIO; + } + } if (sb_good[1]) sb_gen[1] = le64_to_cpu(zmd->sb[1].sb->gen); - else + else { ret = dmz_recover_mblocks(zmd, 1); - if (ret) { - dmz_dev_err(zmd->dev, "Recovery failed"); - return -EIO; + if (ret) { + dmz_dev_err(zmd->sb[1].dev, + "Recovery of superblock 1 failed"); + return -EIO; + } } if (sb_gen[0] >= sb_gen[1]) { @@ -1080,32 +1325,70 @@ static int dmz_load_sb(struct dmz_metadata *zmd) zmd->mblk_primary = 1; } - dmz_dev_debug(zmd->dev, "Using super block %u (gen %llu)", + dmz_dev_debug(zmd->sb[zmd->mblk_primary].dev, + "Using super block %u (gen %llu)", zmd->mblk_primary, zmd->sb_gen); - return 0; + if (zmd->sb_version > 1) { + int i; + struct dmz_sb *sb; + + sb = kzalloc(sizeof(struct dmz_sb), GFP_KERNEL); + if (!sb) + return -ENOMEM; + for (i = 1; i < zmd->nr_devs; i++) { + sb->block = 0; + sb->zone = dmz_get(zmd, zmd->dev[i].zone_offset); + sb->dev = &zmd->dev[i]; + if (!dmz_is_meta(sb->zone)) { + dmz_dev_err(sb->dev, + "Tertiary super block zone %u not marked as metadata zone", + sb->zone->id); + ret = -EINVAL; + goto out_kfree; + } + ret = dmz_get_sb(zmd, sb, i + 1); + if (ret) { + dmz_dev_err(sb->dev, + "Read tertiary super block failed"); + dmz_free_mblock(zmd, sb->mblk); + goto out_kfree; + } + ret = dmz_check_sb(zmd, sb, true); + dmz_free_mblock(zmd, sb->mblk); + if (ret == -EINVAL) + goto out_kfree; + } + out_kfree: + kfree(sb); + } + return ret; } /* * Initialize a zone descriptor. */ -static int dmz_init_zone(struct blk_zone *blkz, unsigned int idx, void *data) +static int dmz_init_zone(struct blk_zone *blkz, unsigned int num, void *data) { - struct dmz_metadata *zmd = data; - struct dm_zone *zone = &zmd->zones[idx]; - struct dmz_dev *dev = zmd->dev; + struct dmz_dev *dev = data; + struct dmz_metadata *zmd = dev->metadata; + int idx = num + dev->zone_offset; + struct dm_zone *zone; + + zone = dmz_insert(zmd, idx, dev); + if (IS_ERR(zone)) + return PTR_ERR(zone); - /* Ignore the eventual last runt (smaller) zone */ - if (blkz->len != dev->zone_nr_sectors) { - if (blkz->start + blkz->len == dev->capacity) + if (blkz->len != zmd->zone_nr_sectors) { + if (zmd->sb_version > 1) { + /* Ignore the eventual runt (smaller) zone */ + set_bit(DMZ_OFFLINE, &zone->flags); + return 0; + } else if (blkz->start + blkz->len == dev->capacity) return 0; return -ENXIO; } - INIT_LIST_HEAD(&zone->link); - atomic_set(&zone->refcount, 0); - zone->chunk = DMZ_MAP_UNMAPPED; - switch (blkz->type) { case BLK_ZONE_TYPE_CONVENTIONAL: set_bit(DMZ_RND, &zone->flags); @@ -1131,13 +1414,45 @@ static int dmz_init_zone(struct blk_zone *blkz, unsigned int idx, void *data) zmd->nr_useable_zones++; if (dmz_is_rnd(zone)) { zmd->nr_rnd_zones++; - if (!zmd->sb_zone) { - /* Super block zone */ - zmd->sb_zone = zone; + if (zmd->nr_devs == 1 && !zmd->sb[0].zone) { + /* Primary super block zone */ + zmd->sb[0].zone = zone; } } + if (zmd->nr_devs > 1 && num == 0) { + /* + * Tertiary superblock zones are always at the + * start of the zoned devices, so mark them + * as metadata zone. + */ + set_bit(DMZ_META, &zone->flags); + } } + return 0; +} + +static int dmz_emulate_zones(struct dmz_metadata *zmd, struct dmz_dev *dev) +{ + int idx; + sector_t zone_offset = 0; + for(idx = 0; idx < dev->nr_zones; idx++) { + struct dm_zone *zone; + + zone = dmz_insert(zmd, idx, dev); + if (IS_ERR(zone)) + return PTR_ERR(zone); + set_bit(DMZ_CACHE, &zone->flags); + zone->wp_block = 0; + zmd->nr_cache_zones++; + zmd->nr_useable_zones++; + if (dev->capacity - zone_offset < zmd->zone_nr_sectors) { + /* Disable runt zone */ + set_bit(DMZ_OFFLINE, &zone->flags); + break; + } + zone_offset += zmd->zone_nr_sectors; + } return 0; } @@ -1146,8 +1461,15 @@ static int dmz_init_zone(struct blk_zone *blkz, unsigned int idx, void *data) */ static void dmz_drop_zones(struct dmz_metadata *zmd) { - kfree(zmd->zones); - zmd->zones = NULL; + int idx; + + for(idx = 0; idx < zmd->nr_zones; idx++) { + struct dm_zone *zone = xa_load(&zmd->zones, idx); + + kfree(zone); + xa_erase(&zmd->zones, idx); + } + xa_destroy(&zmd->zones); } /* @@ -1156,32 +1478,87 @@ static void dmz_drop_zones(struct dmz_metadata *zmd) */ static int dmz_init_zones(struct dmz_metadata *zmd) { - struct dmz_dev *dev = zmd->dev; - int ret; + int i, ret; + struct dmz_dev *zoned_dev = &zmd->dev[0]; /* Init */ - zmd->zone_bitmap_size = dev->zone_nr_blocks >> 3; + zmd->zone_nr_sectors = zmd->dev[0].zone_nr_sectors; + zmd->zone_nr_sectors_shift = ilog2(zmd->zone_nr_sectors); + zmd->zone_nr_blocks = dmz_sect2blk(zmd->zone_nr_sectors); + zmd->zone_nr_blocks_shift = ilog2(zmd->zone_nr_blocks); + zmd->zone_bitmap_size = zmd->zone_nr_blocks >> 3; zmd->zone_nr_bitmap_blocks = max_t(sector_t, 1, zmd->zone_bitmap_size >> DMZ_BLOCK_SHIFT); - zmd->zone_bits_per_mblk = min_t(sector_t, dev->zone_nr_blocks, + zmd->zone_bits_per_mblk = min_t(sector_t, zmd->zone_nr_blocks, DMZ_BLOCK_SIZE_BITS); /* Allocate zone array */ - zmd->zones = kcalloc(dev->nr_zones, sizeof(struct dm_zone), GFP_KERNEL); - if (!zmd->zones) - return -ENOMEM; + zmd->nr_zones = 0; + for (i = 0; i < zmd->nr_devs; i++) { + struct dmz_dev *dev = &zmd->dev[i]; + + dev->metadata = zmd; + zmd->nr_zones += dev->nr_zones; + + atomic_set(&dev->unmap_nr_rnd, 0); + INIT_LIST_HEAD(&dev->unmap_rnd_list); + INIT_LIST_HEAD(&dev->map_rnd_list); + + atomic_set(&dev->unmap_nr_seq, 0); + INIT_LIST_HEAD(&dev->unmap_seq_list); + INIT_LIST_HEAD(&dev->map_seq_list); + } + + if (!zmd->nr_zones) { + DMERR("(%s): No zones found", zmd->devname); + return -ENXIO; + } + xa_init(&zmd->zones); + + DMDEBUG("(%s): Using %zu B for zone information", + zmd->devname, sizeof(struct dm_zone) * zmd->nr_zones); - dmz_dev_info(dev, "Using %zu B for zone information", - sizeof(struct dm_zone) * dev->nr_zones); + if (zmd->nr_devs > 1) { + ret = dmz_emulate_zones(zmd, &zmd->dev[0]); + if (ret < 0) { + DMDEBUG("(%s): Failed to emulate zones, error %d", + zmd->devname, ret); + dmz_drop_zones(zmd); + return ret; + } + + /* + * Primary superblock zone is always at zone 0 when multiple + * drives are present. + */ + zmd->sb[0].zone = dmz_get(zmd, 0); + + for (i = 1; i < zmd->nr_devs; i++) { + zoned_dev = &zmd->dev[i]; + + ret = blkdev_report_zones(zoned_dev->bdev, 0, + BLK_ALL_ZONES, + dmz_init_zone, zoned_dev); + if (ret < 0) { + DMDEBUG("(%s): Failed to report zones, error %d", + zmd->devname, ret); + dmz_drop_zones(zmd); + return ret; + } + } + return 0; + } /* * Get zone information and initialize zone descriptors. At the same * time, determine where the super block should be: first block of the * first randomly writable zone. */ - ret = blkdev_report_zones(dev->bdev, 0, BLK_ALL_ZONES, dmz_init_zone, - zmd); + ret = blkdev_report_zones(zoned_dev->bdev, 0, BLK_ALL_ZONES, + dmz_init_zone, zoned_dev); if (ret < 0) { + DMDEBUG("(%s): Failed to report zones, error %d", + zmd->devname, ret); dmz_drop_zones(zmd); return ret; } @@ -1213,9 +1590,13 @@ static int dmz_update_zone_cb(struct blk_zone *blkz, unsigned int idx, */ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone) { + struct dmz_dev *dev = zone->dev; unsigned int noio_flag; int ret; + if (dev->flags & DMZ_BDEV_REGULAR) + return 0; + /* * Get zone information from disk. Since blkdev_report_zones() uses * GFP_KERNEL by default for memory allocations, set the per-task @@ -1223,16 +1604,16 @@ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone) * GFP_NOIO was specified. */ noio_flag = memalloc_noio_save(); - ret = blkdev_report_zones(zmd->dev->bdev, dmz_start_sect(zmd, zone), 1, + ret = blkdev_report_zones(dev->bdev, dmz_start_sect(zmd, zone), 1, dmz_update_zone_cb, zone); memalloc_noio_restore(noio_flag); if (ret == 0) ret = -EIO; if (ret < 0) { - dmz_dev_err(zmd->dev, "Get zone %u report failed", - dmz_id(zmd, zone)); - dmz_check_bdev(zmd->dev); + dmz_dev_err(dev, "Get zone %u report failed", + zone->id); + dmz_check_bdev(dev); return ret; } @@ -1246,6 +1627,7 @@ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone) static int dmz_handle_seq_write_err(struct dmz_metadata *zmd, struct dm_zone *zone) { + struct dmz_dev *dev = zone->dev; unsigned int wp = 0; int ret; @@ -1254,8 +1636,8 @@ static int dmz_handle_seq_write_err(struct dmz_metadata *zmd, if (ret) return ret; - dmz_dev_warn(zmd->dev, "Processing zone %u write error (zone wp %u/%u)", - dmz_id(zmd, zone), zone->wp_block, wp); + dmz_dev_warn(dev, "Processing zone %u write error (zone wp %u/%u)", + zone->id, zone->wp_block, wp); if (zone->wp_block < wp) { dmz_invalidate_blocks(zmd, zone, zone->wp_block, @@ -1265,11 +1647,6 @@ static int dmz_handle_seq_write_err(struct dmz_metadata *zmd, return 0; } -static struct dm_zone *dmz_get(struct dmz_metadata *zmd, unsigned int zone_id) -{ - return &zmd->zones[zone_id]; -} - /* * Reset a zone write pointer. */ @@ -1287,14 +1664,14 @@ static int dmz_reset_zone(struct dmz_metadata *zmd, struct dm_zone *zone) return 0; if (!dmz_is_empty(zone) || dmz_seq_write_err(zone)) { - struct dmz_dev *dev = zmd->dev; + struct dmz_dev *dev = zone->dev; ret = blkdev_zone_mgmt(dev->bdev, REQ_OP_ZONE_RESET, dmz_start_sect(zmd, zone), - dev->zone_nr_sectors, GFP_NOIO); + zmd->zone_nr_sectors, GFP_NOIO); if (ret) { dmz_dev_err(dev, "Reset zone %u failed %d", - dmz_id(zmd, zone), ret); + zone->id, ret); return ret; } } @@ -1313,7 +1690,6 @@ static void dmz_get_zone_weight(struct dmz_metadata *zmd, struct dm_zone *zone); */ static int dmz_load_mapping(struct dmz_metadata *zmd) { - struct dmz_dev *dev = zmd->dev; struct dm_zone *dzone, *bzone; struct dmz_mblock *dmap_mblk = NULL; struct dmz_map *dmap; @@ -1345,36 +1721,48 @@ static int dmz_load_mapping(struct dmz_metadata *zmd) if (dzone_id == DMZ_MAP_UNMAPPED) goto next; - if (dzone_id >= dev->nr_zones) { - dmz_dev_err(dev, "Chunk %u mapping: invalid data zone ID %u", + if (dzone_id >= zmd->nr_zones) { + dmz_zmd_err(zmd, "Chunk %u mapping: invalid data zone ID %u", chunk, dzone_id); return -EIO; } dzone = dmz_get(zmd, dzone_id); + if (!dzone) { + dmz_zmd_err(zmd, "Chunk %u mapping: data zone %u not present", + chunk, dzone_id); + return -EIO; + } set_bit(DMZ_DATA, &dzone->flags); dzone->chunk = chunk; dmz_get_zone_weight(zmd, dzone); - if (dmz_is_rnd(dzone)) - list_add_tail(&dzone->link, &zmd->map_rnd_list); + if (dmz_is_cache(dzone)) + list_add_tail(&dzone->link, &zmd->map_cache_list); + else if (dmz_is_rnd(dzone)) + list_add_tail(&dzone->link, &dzone->dev->map_rnd_list); else - list_add_tail(&dzone->link, &zmd->map_seq_list); + list_add_tail(&dzone->link, &dzone->dev->map_seq_list); /* Check buffer zone */ bzone_id = le32_to_cpu(dmap[e].bzone_id); if (bzone_id == DMZ_MAP_UNMAPPED) goto next; - if (bzone_id >= dev->nr_zones) { - dmz_dev_err(dev, "Chunk %u mapping: invalid buffer zone ID %u", + if (bzone_id >= zmd->nr_zones) { + dmz_zmd_err(zmd, "Chunk %u mapping: invalid buffer zone ID %u", chunk, bzone_id); return -EIO; } bzone = dmz_get(zmd, bzone_id); - if (!dmz_is_rnd(bzone)) { - dmz_dev_err(dev, "Chunk %u mapping: invalid buffer zone %u", + if (!bzone) { + dmz_zmd_err(zmd, "Chunk %u mapping: buffer zone %u not present", + chunk, bzone_id); + return -EIO; + } + if (!dmz_is_rnd(bzone) && !dmz_is_cache(bzone)) { + dmz_zmd_err(zmd, "Chunk %u mapping: invalid buffer zone %u", chunk, bzone_id); return -EIO; } @@ -1385,7 +1773,10 @@ static int dmz_load_mapping(struct dmz_metadata *zmd) bzone->bzone = dzone; dzone->bzone = bzone; dmz_get_zone_weight(zmd, bzone); - list_add_tail(&bzone->link, &zmd->map_rnd_list); + if (dmz_is_cache(bzone)) + list_add_tail(&bzone->link, &zmd->map_cache_list); + else + list_add_tail(&bzone->link, &bzone->dev->map_rnd_list); next: chunk++; e++; @@ -1398,15 +1789,21 @@ next: * fully initialized. All remaining zones are unmapped data * zones. Finish initializing those here. */ - for (i = 0; i < dev->nr_zones; i++) { + for (i = 0; i < zmd->nr_zones; i++) { dzone = dmz_get(zmd, i); + if (!dzone) + continue; if (dmz_is_meta(dzone)) continue; + if (dmz_is_offline(dzone)) + continue; - if (dmz_is_rnd(dzone)) - zmd->nr_rnd++; + if (dmz_is_cache(dzone)) + zmd->nr_cache++; + else if (dmz_is_rnd(dzone)) + dzone->dev->nr_rnd++; else - zmd->nr_seq++; + dzone->dev->nr_seq++; if (dmz_is_data(dzone)) { /* Already initialized */ @@ -1416,16 +1813,22 @@ next: /* Unmapped data zone */ set_bit(DMZ_DATA, &dzone->flags); dzone->chunk = DMZ_MAP_UNMAPPED; - if (dmz_is_rnd(dzone)) { - list_add_tail(&dzone->link, &zmd->unmap_rnd_list); - atomic_inc(&zmd->unmap_nr_rnd); + if (dmz_is_cache(dzone)) { + list_add_tail(&dzone->link, &zmd->unmap_cache_list); + atomic_inc(&zmd->unmap_nr_cache); + } else if (dmz_is_rnd(dzone)) { + list_add_tail(&dzone->link, + &dzone->dev->unmap_rnd_list); + atomic_inc(&dzone->dev->unmap_nr_rnd); } else if (atomic_read(&zmd->nr_reserved_seq_zones) < zmd->nr_reserved_seq) { list_add_tail(&dzone->link, &zmd->reserved_seq_zones_list); + set_bit(DMZ_RESERVED, &dzone->flags); atomic_inc(&zmd->nr_reserved_seq_zones); - zmd->nr_seq--; + dzone->dev->nr_seq--; } else { - list_add_tail(&dzone->link, &zmd->unmap_seq_list); - atomic_inc(&zmd->unmap_nr_seq); + list_add_tail(&dzone->link, + &dzone->dev->unmap_seq_list); + atomic_inc(&dzone->dev->unmap_nr_seq); } } @@ -1459,10 +1862,13 @@ static void __dmz_lru_zone(struct dmz_metadata *zmd, struct dm_zone *zone) list_del_init(&zone->link); if (dmz_is_seq(zone)) { /* LRU rotate sequential zone */ - list_add_tail(&zone->link, &zmd->map_seq_list); + list_add_tail(&zone->link, &zone->dev->map_seq_list); + } else if (dmz_is_cache(zone)) { + /* LRU rotate cache zone */ + list_add_tail(&zone->link, &zmd->map_cache_list); } else { /* LRU rotate random zone */ - list_add_tail(&zone->link, &zmd->map_rnd_list); + list_add_tail(&zone->link, &zone->dev->map_rnd_list); } } @@ -1529,58 +1935,76 @@ static void dmz_wait_for_reclaim(struct dmz_metadata *zmd, struct dm_zone *zone) { dmz_unlock_map(zmd); dmz_unlock_metadata(zmd); + set_bit(DMZ_RECLAIM_TERMINATE, &zone->flags); wait_on_bit_timeout(&zone->flags, DMZ_RECLAIM, TASK_UNINTERRUPTIBLE, HZ); + clear_bit(DMZ_RECLAIM_TERMINATE, &zone->flags); dmz_lock_metadata(zmd); dmz_lock_map(zmd); } /* - * Select a random write zone for reclaim. + * Select a cache or random write zone for reclaim. */ -static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd) +static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd, + unsigned int idx, bool idle) { struct dm_zone *dzone = NULL; - struct dm_zone *zone; - - if (list_empty(&zmd->map_rnd_list)) - return ERR_PTR(-EBUSY); + struct dm_zone *zone, *last = NULL; + struct list_head *zone_list; + + /* If we have cache zones select from the cache zone list */ + if (zmd->nr_cache) { + zone_list = &zmd->map_cache_list; + /* Try to relaim random zones, too, when idle */ + if (idle && list_empty(zone_list)) + zone_list = &zmd->dev[idx].map_rnd_list; + } else + zone_list = &zmd->dev[idx].map_rnd_list; - list_for_each_entry(zone, &zmd->map_rnd_list, link) { - if (dmz_is_buf(zone)) + list_for_each_entry(zone, zone_list, link) { + if (dmz_is_buf(zone)) { dzone = zone->bzone; - else + if (dzone->dev->dev_idx != idx) + continue; + if (!last) { + last = dzone; + continue; + } + if (last->weight < dzone->weight) + continue; + dzone = last; + } else dzone = zone; if (dmz_lock_zone_reclaim(dzone)) return dzone; } - return ERR_PTR(-EBUSY); + return NULL; } /* * Select a buffered sequential zone for reclaim. */ -static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd) +static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd, + unsigned int idx) { struct dm_zone *zone; - if (list_empty(&zmd->map_seq_list)) - return ERR_PTR(-EBUSY); - - list_for_each_entry(zone, &zmd->map_seq_list, link) { + list_for_each_entry(zone, &zmd->dev[idx].map_seq_list, link) { if (!zone->bzone) continue; if (dmz_lock_zone_reclaim(zone)) return zone; } - return ERR_PTR(-EBUSY); + return NULL; } /* * Select a zone for reclaim. */ -struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd) +struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd, + unsigned int dev_idx, bool idle) { struct dm_zone *zone; @@ -1594,9 +2018,9 @@ struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd) */ dmz_lock_map(zmd); if (list_empty(&zmd->reserved_seq_zones_list)) - zone = dmz_get_seq_zone_for_reclaim(zmd); + zone = dmz_get_seq_zone_for_reclaim(zmd, dev_idx); else - zone = dmz_get_rnd_zone_for_reclaim(zmd); + zone = dmz_get_rnd_zone_for_reclaim(zmd, dev_idx, idle); dmz_unlock_map(zmd); return zone; @@ -1616,6 +2040,7 @@ struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd, unsigned int chu unsigned int dzone_id; struct dm_zone *dzone = NULL; int ret = 0; + int alloc_flags = zmd->nr_cache ? DMZ_ALLOC_CACHE : DMZ_ALLOC_RND; dmz_lock_map(zmd); again: @@ -1630,9 +2055,9 @@ again: goto out; /* Allocate a random zone */ - dzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND); + dzone = dmz_alloc_zone(zmd, 0, alloc_flags); if (!dzone) { - if (dmz_bdev_is_dying(zmd->dev)) { + if (dmz_dev_is_dying(zmd)) { dzone = ERR_PTR(-EIO); goto out; } @@ -1645,6 +2070,10 @@ again: } else { /* The chunk is already mapped: get the mapping zone */ dzone = dmz_get(zmd, dzone_id); + if (!dzone) { + dzone = ERR_PTR(-EIO); + goto out; + } if (dzone->chunk != chunk) { dzone = ERR_PTR(-EIO); goto out; @@ -1723,6 +2152,7 @@ struct dm_zone *dmz_get_chunk_buffer(struct dmz_metadata *zmd, struct dm_zone *dzone) { struct dm_zone *bzone; + int alloc_flags = zmd->nr_cache ? DMZ_ALLOC_CACHE : DMZ_ALLOC_RND; dmz_lock_map(zmd); again: @@ -1731,9 +2161,9 @@ again: goto out; /* Allocate a random zone */ - bzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND); + bzone = dmz_alloc_zone(zmd, 0, alloc_flags); if (!bzone) { - if (dmz_bdev_is_dying(zmd->dev)) { + if (dmz_dev_is_dying(zmd)) { bzone = ERR_PTR(-EIO); goto out; } @@ -1742,14 +2172,16 @@ again: } /* Update the chunk mapping */ - dmz_set_chunk_mapping(zmd, dzone->chunk, dmz_id(zmd, dzone), - dmz_id(zmd, bzone)); + dmz_set_chunk_mapping(zmd, dzone->chunk, dzone->id, bzone->id); set_bit(DMZ_BUF, &bzone->flags); bzone->chunk = dzone->chunk; bzone->bzone = dzone; dzone->bzone = bzone; - list_add_tail(&bzone->link, &zmd->map_rnd_list); + if (dmz_is_cache(bzone)) + list_add_tail(&bzone->link, &zmd->map_cache_list); + else + list_add_tail(&bzone->link, &bzone->dev->map_rnd_list); out: dmz_unlock_map(zmd); @@ -1760,46 +2192,68 @@ out: * Get an unmapped (free) zone. * This must be called with the mapping lock held. */ -struct dm_zone *dmz_alloc_zone(struct dmz_metadata *zmd, unsigned long flags) +struct dm_zone *dmz_alloc_zone(struct dmz_metadata *zmd, unsigned int dev_idx, + unsigned long flags) { struct list_head *list; struct dm_zone *zone; + int i = 0; - if (flags & DMZ_ALLOC_RND) - list = &zmd->unmap_rnd_list; - else - list = &zmd->unmap_seq_list; again: + if (flags & DMZ_ALLOC_CACHE) + list = &zmd->unmap_cache_list; + else if (flags & DMZ_ALLOC_RND) + list = &zmd->dev[dev_idx].unmap_rnd_list; + else + list = &zmd->dev[dev_idx].unmap_seq_list; + if (list_empty(list)) { /* - * No free zone: if this is for reclaim, allow using the - * reserved sequential zones. + * No free zone: return NULL if this is for not reclaim. */ - if (!(flags & DMZ_ALLOC_RECLAIM) || - list_empty(&zmd->reserved_seq_zones_list)) + if (!(flags & DMZ_ALLOC_RECLAIM)) return NULL; + /* + * Try to allocate from other devices + */ + if (i < zmd->nr_devs) { + dev_idx = (dev_idx + 1) % zmd->nr_devs; + i++; + goto again; + } - zone = list_first_entry(&zmd->reserved_seq_zones_list, - struct dm_zone, link); - list_del_init(&zone->link); - atomic_dec(&zmd->nr_reserved_seq_zones); + /* + * Fallback to the reserved sequential zones + */ + zone = list_first_entry_or_null(&zmd->reserved_seq_zones_list, + struct dm_zone, link); + if (zone) { + list_del_init(&zone->link); + atomic_dec(&zmd->nr_reserved_seq_zones); + } return zone; } zone = list_first_entry(list, struct dm_zone, link); list_del_init(&zone->link); - if (dmz_is_rnd(zone)) - atomic_dec(&zmd->unmap_nr_rnd); + if (dmz_is_cache(zone)) + atomic_dec(&zmd->unmap_nr_cache); + else if (dmz_is_rnd(zone)) + atomic_dec(&zone->dev->unmap_nr_rnd); else - atomic_dec(&zmd->unmap_nr_seq); + atomic_dec(&zone->dev->unmap_nr_seq); if (dmz_is_offline(zone)) { - dmz_dev_warn(zmd->dev, "Zone %u is offline", dmz_id(zmd, zone)); + dmz_zmd_warn(zmd, "Zone %u is offline", zone->id); + zone = NULL; + goto again; + } + if (dmz_is_meta(zone)) { + dmz_zmd_warn(zmd, "Zone %u has metadata", zone->id); zone = NULL; goto again; } - return zone; } @@ -1814,16 +2268,18 @@ void dmz_free_zone(struct dmz_metadata *zmd, struct dm_zone *zone) dmz_reset_zone(zmd, zone); /* Return the zone to its type unmap list */ - if (dmz_is_rnd(zone)) { - list_add_tail(&zone->link, &zmd->unmap_rnd_list); - atomic_inc(&zmd->unmap_nr_rnd); - } else if (atomic_read(&zmd->nr_reserved_seq_zones) < - zmd->nr_reserved_seq) { + if (dmz_is_cache(zone)) { + list_add_tail(&zone->link, &zmd->unmap_cache_list); + atomic_inc(&zmd->unmap_nr_cache); + } else if (dmz_is_rnd(zone)) { + list_add_tail(&zone->link, &zone->dev->unmap_rnd_list); + atomic_inc(&zone->dev->unmap_nr_rnd); + } else if (dmz_is_reserved(zone)) { list_add_tail(&zone->link, &zmd->reserved_seq_zones_list); atomic_inc(&zmd->nr_reserved_seq_zones); } else { - list_add_tail(&zone->link, &zmd->unmap_seq_list); - atomic_inc(&zmd->unmap_nr_seq); + list_add_tail(&zone->link, &zone->dev->unmap_seq_list); + atomic_inc(&zone->dev->unmap_nr_seq); } wake_up_all(&zmd->free_wq); @@ -1837,13 +2293,15 @@ void dmz_map_zone(struct dmz_metadata *zmd, struct dm_zone *dzone, unsigned int chunk) { /* Set the chunk mapping */ - dmz_set_chunk_mapping(zmd, chunk, dmz_id(zmd, dzone), + dmz_set_chunk_mapping(zmd, chunk, dzone->id, DMZ_MAP_UNMAPPED); dzone->chunk = chunk; - if (dmz_is_rnd(dzone)) - list_add_tail(&dzone->link, &zmd->map_rnd_list); + if (dmz_is_cache(dzone)) + list_add_tail(&dzone->link, &zmd->map_cache_list); + else if (dmz_is_rnd(dzone)) + list_add_tail(&dzone->link, &dzone->dev->map_rnd_list); else - list_add_tail(&dzone->link, &zmd->map_seq_list); + list_add_tail(&dzone->link, &dzone->dev->map_seq_list); } /* @@ -1865,7 +2323,7 @@ void dmz_unmap_zone(struct dmz_metadata *zmd, struct dm_zone *zone) * Unmapping the chunk buffer zone: clear only * the chunk buffer mapping */ - dzone_id = dmz_id(zmd, zone->bzone); + dzone_id = zone->bzone->id; zone->bzone->bzone = NULL; zone->bzone = NULL; @@ -1927,7 +2385,7 @@ static struct dmz_mblock *dmz_get_bitmap(struct dmz_metadata *zmd, sector_t chunk_block) { sector_t bitmap_block = 1 + zmd->nr_map_blocks + - (sector_t)(dmz_id(zmd, zone) * zmd->zone_nr_bitmap_blocks) + + (sector_t)(zone->id * zmd->zone_nr_bitmap_blocks) + (chunk_block >> DMZ_BLOCK_SHIFT_BITS); return dmz_get_mblock(zmd, bitmap_block); @@ -1943,7 +2401,7 @@ int dmz_copy_valid_blocks(struct dmz_metadata *zmd, struct dm_zone *from_zone, sector_t chunk_block = 0; /* Get the zones bitmap blocks */ - while (chunk_block < zmd->dev->zone_nr_blocks) { + while (chunk_block < zmd->zone_nr_blocks) { from_mblk = dmz_get_bitmap(zmd, from_zone, chunk_block); if (IS_ERR(from_mblk)) return PTR_ERR(from_mblk); @@ -1978,7 +2436,7 @@ int dmz_merge_valid_blocks(struct dmz_metadata *zmd, struct dm_zone *from_zone, int ret; /* Get the zones bitmap blocks */ - while (chunk_block < zmd->dev->zone_nr_blocks) { + while (chunk_block < zmd->zone_nr_blocks) { /* Get a valid region from the source zone */ ret = dmz_first_valid_block(zmd, from_zone, &chunk_block); if (ret <= 0) @@ -2002,12 +2460,12 @@ int dmz_validate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, sector_t chunk_block, unsigned int nr_blocks) { unsigned int count, bit, nr_bits; - unsigned int zone_nr_blocks = zmd->dev->zone_nr_blocks; + unsigned int zone_nr_blocks = zmd->zone_nr_blocks; struct dmz_mblock *mblk; unsigned int n = 0; - dmz_dev_debug(zmd->dev, "=> VALIDATE zone %u, block %llu, %u blocks", - dmz_id(zmd, zone), (unsigned long long)chunk_block, + dmz_zmd_debug(zmd, "=> VALIDATE zone %u, block %llu, %u blocks", + zone->id, (unsigned long long)chunk_block, nr_blocks); WARN_ON(chunk_block + nr_blocks > zone_nr_blocks); @@ -2036,8 +2494,8 @@ int dmz_validate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, if (likely(zone->weight + n <= zone_nr_blocks)) zone->weight += n; else { - dmz_dev_warn(zmd->dev, "Zone %u: weight %u should be <= %u", - dmz_id(zmd, zone), zone->weight, + dmz_zmd_warn(zmd, "Zone %u: weight %u should be <= %u", + zone->id, zone->weight, zone_nr_blocks - n); zone->weight = zone_nr_blocks; } @@ -2086,10 +2544,10 @@ int dmz_invalidate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, struct dmz_mblock *mblk; unsigned int n = 0; - dmz_dev_debug(zmd->dev, "=> INVALIDATE zone %u, block %llu, %u blocks", - dmz_id(zmd, zone), (u64)chunk_block, nr_blocks); + dmz_zmd_debug(zmd, "=> INVALIDATE zone %u, block %llu, %u blocks", + zone->id, (u64)chunk_block, nr_blocks); - WARN_ON(chunk_block + nr_blocks > zmd->dev->zone_nr_blocks); + WARN_ON(chunk_block + nr_blocks > zmd->zone_nr_blocks); while (nr_blocks) { /* Get bitmap block */ @@ -2116,8 +2574,8 @@ int dmz_invalidate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, if (zone->weight >= n) zone->weight -= n; else { - dmz_dev_warn(zmd->dev, "Zone %u: weight %u should be >= %u", - dmz_id(zmd, zone), zone->weight, n); + dmz_zmd_warn(zmd, "Zone %u: weight %u should be >= %u", + zone->id, zone->weight, n); zone->weight = 0; } @@ -2133,7 +2591,7 @@ static int dmz_test_block(struct dmz_metadata *zmd, struct dm_zone *zone, struct dmz_mblock *mblk; int ret; - WARN_ON(chunk_block >= zmd->dev->zone_nr_blocks); + WARN_ON(chunk_block >= zmd->zone_nr_blocks); /* Get bitmap block */ mblk = dmz_get_bitmap(zmd, zone, chunk_block); @@ -2163,7 +2621,7 @@ static int dmz_to_next_set_block(struct dmz_metadata *zmd, struct dm_zone *zone, unsigned long *bitmap; int n = 0; - WARN_ON(chunk_block + nr_blocks > zmd->dev->zone_nr_blocks); + WARN_ON(chunk_block + nr_blocks > zmd->zone_nr_blocks); while (nr_blocks) { /* Get bitmap block */ @@ -2207,7 +2665,7 @@ int dmz_block_valid(struct dmz_metadata *zmd, struct dm_zone *zone, /* The block is valid: get the number of valid blocks from block */ return dmz_to_next_set_block(zmd, zone, chunk_block, - zmd->dev->zone_nr_blocks - chunk_block, 0); + zmd->zone_nr_blocks - chunk_block, 0); } /* @@ -2223,7 +2681,7 @@ int dmz_first_valid_block(struct dmz_metadata *zmd, struct dm_zone *zone, int ret; ret = dmz_to_next_set_block(zmd, zone, start_block, - zmd->dev->zone_nr_blocks - start_block, 1); + zmd->zone_nr_blocks - start_block, 1); if (ret < 0) return ret; @@ -2231,7 +2689,7 @@ int dmz_first_valid_block(struct dmz_metadata *zmd, struct dm_zone *zone, *chunk_block = start_block; return dmz_to_next_set_block(zmd, zone, start_block, - zmd->dev->zone_nr_blocks - start_block, 0); + zmd->zone_nr_blocks - start_block, 0); } /* @@ -2270,7 +2728,7 @@ static void dmz_get_zone_weight(struct dmz_metadata *zmd, struct dm_zone *zone) struct dmz_mblock *mblk; sector_t chunk_block = 0; unsigned int bit, nr_bits; - unsigned int nr_blocks = zmd->dev->zone_nr_blocks; + unsigned int nr_blocks = zmd->zone_nr_blocks; void *bitmap; int n = 0; @@ -2326,7 +2784,7 @@ static void dmz_cleanup_metadata(struct dmz_metadata *zmd) while (!list_empty(&zmd->mblk_dirty_list)) { mblk = list_first_entry(&zmd->mblk_dirty_list, struct dmz_mblock, link); - dmz_dev_warn(zmd->dev, "mblock %llu still in dirty list (ref %u)", + dmz_zmd_warn(zmd, "mblock %llu still in dirty list (ref %u)", (u64)mblk->no, mblk->ref); list_del_init(&mblk->link); rb_erase(&mblk->node, &zmd->mblk_rbtree); @@ -2344,7 +2802,7 @@ static void dmz_cleanup_metadata(struct dmz_metadata *zmd) /* Sanity checks: the mblock rbtree should now be empty */ root = &zmd->mblk_rbtree; rbtree_postorder_for_each_entry_safe(mblk, next, root, node) { - dmz_dev_warn(zmd->dev, "mblock %llu ref %u still in rbtree", + dmz_zmd_warn(zmd, "mblock %llu ref %u still in rbtree", (u64)mblk->no, mblk->ref); mblk->ref = 0; dmz_free_mblock(zmd, mblk); @@ -2357,13 +2815,42 @@ static void dmz_cleanup_metadata(struct dmz_metadata *zmd) mutex_destroy(&zmd->map_lock); } +static void dmz_print_dev(struct dmz_metadata *zmd, int num) +{ + struct dmz_dev *dev = &zmd->dev[num]; + + if (bdev_zoned_model(dev->bdev) == BLK_ZONED_NONE) + dmz_dev_info(dev, "Regular block device"); + else + dmz_dev_info(dev, "Host-%s zoned block device", + bdev_zoned_model(dev->bdev) == BLK_ZONED_HA ? + "aware" : "managed"); + if (zmd->sb_version > 1) { + sector_t sector_offset = + dev->zone_offset << zmd->zone_nr_sectors_shift; + + dmz_dev_info(dev, " %llu 512-byte logical sectors (offset %llu)", + (u64)dev->capacity, (u64)sector_offset); + dmz_dev_info(dev, " %u zones of %llu 512-byte logical sectors (offset %llu)", + dev->nr_zones, (u64)zmd->zone_nr_sectors, + (u64)dev->zone_offset); + } else { + dmz_dev_info(dev, " %llu 512-byte logical sectors", + (u64)dev->capacity); + dmz_dev_info(dev, " %u zones of %llu 512-byte logical sectors", + dev->nr_zones, (u64)zmd->zone_nr_sectors); + } +} + /* * Initialize the zoned metadata. */ -int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata) +int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev, + struct dmz_metadata **metadata, + const char *devname) { struct dmz_metadata *zmd; - unsigned int i, zid; + unsigned int i; struct dm_zone *zone; int ret; @@ -2371,7 +2858,9 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata) if (!zmd) return -ENOMEM; + strcpy(zmd->devname, devname); zmd->dev = dev; + zmd->nr_devs = num_dev; zmd->mblk_rbtree = RB_ROOT; init_rwsem(&zmd->mblk_sem); mutex_init(&zmd->mblk_flush_lock); @@ -2380,13 +2869,10 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata) INIT_LIST_HEAD(&zmd->mblk_dirty_list); mutex_init(&zmd->map_lock); - atomic_set(&zmd->unmap_nr_rnd, 0); - INIT_LIST_HEAD(&zmd->unmap_rnd_list); - INIT_LIST_HEAD(&zmd->map_rnd_list); - atomic_set(&zmd->unmap_nr_seq, 0); - INIT_LIST_HEAD(&zmd->unmap_seq_list); - INIT_LIST_HEAD(&zmd->map_seq_list); + atomic_set(&zmd->unmap_nr_cache, 0); + INIT_LIST_HEAD(&zmd->unmap_cache_list); + INIT_LIST_HEAD(&zmd->map_cache_list); atomic_set(&zmd->nr_reserved_seq_zones, 0); INIT_LIST_HEAD(&zmd->reserved_seq_zones_list); @@ -2404,14 +2890,22 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata) goto err; /* Set metadata zones starting from sb_zone */ - zid = dmz_id(zmd, zmd->sb_zone); for (i = 0; i < zmd->nr_meta_zones << 1; i++) { - zone = dmz_get(zmd, zid + i); - if (!dmz_is_rnd(zone)) + zone = dmz_get(zmd, zmd->sb[0].zone->id + i); + if (!zone) { + dmz_zmd_err(zmd, + "metadata zone %u not present", i); + ret = -ENXIO; + goto err; + } + if (!dmz_is_rnd(zone) && !dmz_is_cache(zone)) { + dmz_zmd_err(zmd, + "metadata zone %d is not random", i); + ret = -ENXIO; goto err; + } set_bit(DMZ_META, &zone->flags); } - /* Load mapping table */ ret = dmz_load_mapping(zmd); if (ret) @@ -2432,34 +2926,38 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata) /* Metadata cache shrinker */ ret = register_shrinker(&zmd->mblk_shrinker); if (ret) { - dmz_dev_err(dev, "Register metadata cache shrinker failed"); + dmz_zmd_err(zmd, "Register metadata cache shrinker failed"); goto err; } - dmz_dev_info(dev, "Host-%s zoned block device", - bdev_zoned_model(dev->bdev) == BLK_ZONED_HA ? - "aware" : "managed"); - dmz_dev_info(dev, " %llu 512-byte logical sectors", - (u64)dev->capacity); - dmz_dev_info(dev, " %u zones of %llu 512-byte logical sectors", - dev->nr_zones, (u64)dev->zone_nr_sectors); - dmz_dev_info(dev, " %u metadata zones", - zmd->nr_meta_zones * 2); - dmz_dev_info(dev, " %u data zones for %u chunks", - zmd->nr_data_zones, zmd->nr_chunks); - dmz_dev_info(dev, " %u random zones (%u unmapped)", - zmd->nr_rnd, atomic_read(&zmd->unmap_nr_rnd)); - dmz_dev_info(dev, " %u sequential zones (%u unmapped)", - zmd->nr_seq, atomic_read(&zmd->unmap_nr_seq)); - dmz_dev_info(dev, " %u reserved sequential data zones", - zmd->nr_reserved_seq); - - dmz_dev_debug(dev, "Format:"); - dmz_dev_debug(dev, "%u metadata blocks per set (%u max cache)", + dmz_zmd_info(zmd, "DM-Zoned metadata version %d", zmd->sb_version); + for (i = 0; i < zmd->nr_devs; i++) + dmz_print_dev(zmd, i); + + dmz_zmd_info(zmd, " %u zones of %llu 512-byte logical sectors", + zmd->nr_zones, (u64)zmd->zone_nr_sectors); + dmz_zmd_debug(zmd, " %u metadata zones", + zmd->nr_meta_zones * 2); + dmz_zmd_debug(zmd, " %u data zones for %u chunks", + zmd->nr_data_zones, zmd->nr_chunks); + dmz_zmd_debug(zmd, " %u cache zones (%u unmapped)", + zmd->nr_cache, atomic_read(&zmd->unmap_nr_cache)); + for (i = 0; i < zmd->nr_devs; i++) { + dmz_zmd_debug(zmd, " %u random zones (%u unmapped)", + dmz_nr_rnd_zones(zmd, i), + dmz_nr_unmap_rnd_zones(zmd, i)); + dmz_zmd_debug(zmd, " %u sequential zones (%u unmapped)", + dmz_nr_seq_zones(zmd, i), + dmz_nr_unmap_seq_zones(zmd, i)); + } + dmz_zmd_debug(zmd, " %u reserved sequential data zones", + zmd->nr_reserved_seq); + dmz_zmd_debug(zmd, "Format:"); + dmz_zmd_debug(zmd, "%u metadata blocks per set (%u max cache)", zmd->nr_meta_blocks, zmd->max_nr_mblks); - dmz_dev_debug(dev, " %u data zone mapping blocks", + dmz_zmd_debug(zmd, " %u data zone mapping blocks", zmd->nr_map_blocks); - dmz_dev_debug(dev, " %u bitmap blocks", + dmz_zmd_debug(zmd, " %u bitmap blocks", zmd->nr_bitmap_blocks); *metadata = zmd; @@ -2488,30 +2986,28 @@ void dmz_dtr_metadata(struct dmz_metadata *zmd) */ int dmz_resume_metadata(struct dmz_metadata *zmd) { - struct dmz_dev *dev = zmd->dev; struct dm_zone *zone; sector_t wp_block; unsigned int i; int ret; /* Check zones */ - for (i = 0; i < dev->nr_zones; i++) { + for (i = 0; i < zmd->nr_zones; i++) { zone = dmz_get(zmd, i); if (!zone) { - dmz_dev_err(dev, "Unable to get zone %u", i); + dmz_zmd_err(zmd, "Unable to get zone %u", i); return -EIO; } - wp_block = zone->wp_block; ret = dmz_update_zone(zmd, zone); if (ret) { - dmz_dev_err(dev, "Broken zone %u", i); + dmz_zmd_err(zmd, "Broken zone %u", i); return ret; } if (dmz_is_offline(zone)) { - dmz_dev_warn(dev, "Zone %u is offline", i); + dmz_zmd_warn(zmd, "Zone %u is offline", i); continue; } @@ -2519,11 +3015,11 @@ int dmz_resume_metadata(struct dmz_metadata *zmd) if (!dmz_is_seq(zone)) zone->wp_block = 0; else if (zone->wp_block != wp_block) { - dmz_dev_err(dev, "Zone %u: Invalid wp (%llu / %llu)", + dmz_zmd_err(zmd, "Zone %u: Invalid wp (%llu / %llu)", i, (u64)zone->wp_block, (u64)wp_block); zone->wp_block = wp_block; dmz_invalidate_blocks(zmd, zone, zone->wp_block, - dev->zone_nr_blocks - zone->wp_block); + zmd->zone_nr_blocks - zone->wp_block); } } |