diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-12 15:46:11 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-12 15:46:11 -0800 |
commit | 3acbdbf42e943d85174401357a6b6243479d4c76 (patch) | |
tree | 94d092eedc0e24f611a14a4fcceb9d3643b7ac25 /fs | |
parent | 8834147f9505661859ce44549bf601e2a06bba7c (diff) | |
parent | 9e05e95ca8dae8de4a7a1645014e1bbd9c8a4dab (diff) |
Merge tag 'libnvdimm-for-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull dax and libnvdimm updates from Dan Williams:
"The bulk of this is a rework of the dax_operations API after
discovering the obstacles it posed to the work-in-progress DAX+reflink
support for XFS and other copy-on-write filesystem mechanics.
Primarily the need to plumb a block_device through the API to handle
partition offsets was a sticking point and Christoph untangled that
dependency in addition to other cleanups to make landing the
DAX+reflink support easier.
The DAX_PMEM_COMPAT option has been around for 4 years and not only
are distributions shipping userspace that understand the current
configuration API, but some are not even bothering to turn this option
on anymore, so it seems a good time to remove it per the deprecation
schedule. Recall that this was added after the device-dax subsystem
moved from /sys/class/dax to /sys/bus/dax for its sysfs organization.
All recent functionality depends on /sys/bus/dax.
Some other miscellaneous cleanups and reflink prep patches are
included as well.
Summary:
- Simplify the dax_operations API:
- Eliminate bdev_dax_pgoff() in favor of the filesystem
maintaining and applying a partition offset to all its DAX iomap
operations.
- Remove wrappers and device-mapper stacked callbacks for
->copy_from_iter() and ->copy_to_iter() in favor of moving
block_device relative offset responsibility to the
dax_direct_access() caller.
- Remove the need for an @bdev in filesystem-DAX infrastructure
- Remove unused uio helpers copy_from_iter_flushcache() and
copy_mc_to_iter() as only the non-check_copy_size() versions are
used for DAX.
- Prepare XFS for the pending (next merge window) DAX+reflink support
- Remove deprecated DEV_DAX_PMEM_COMPAT support
- Cleanup a straggling misuse of the GUID api"
* tag 'libnvdimm-for-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (38 commits)
iomap: Fix error handling in iomap_zero_iter()
ACPI: NFIT: Import GUID before use
dax: remove the copy_from_iter and copy_to_iter methods
dax: remove the DAXDEV_F_SYNC flag
dax: simplify dax_synchronous and set_dax_synchronous
uio: remove copy_from_iter_flushcache() and copy_mc_to_iter()
iomap: turn the byte variable in iomap_zero_iter into a ssize_t
memremap: remove support for external pgmap refcounts
fsdax: don't require CONFIG_BLOCK
iomap: build the block based code conditionally
dax: fix up some of the block device related ifdefs
fsdax: shift partition offset handling into the file systems
dax: return the partition offset from fs_dax_get_by_bdev
iomap: add a IOMAP_DAX flag
xfs: pass the mapping flags to xfs_bmbt_to_iomap
xfs: use xfs_direct_write_iomap_ops for DAX zeroing
xfs: move dax device handling into xfs_{alloc,free}_buftarg
ext4: cleanup the dax handling in ext4_fill_super
ext2: cleanup the dax handling in ext2_fill_super
fsdax: decouple zeroing from the iomap buffered I/O code
...
Diffstat (limited to 'fs')
-rw-r--r-- | fs/Kconfig | 8 | ||||
-rw-r--r-- | fs/dax.c | 157 | ||||
-rw-r--r-- | fs/erofs/data.c | 11 | ||||
-rw-r--r-- | fs/erofs/internal.h | 3 | ||||
-rw-r--r-- | fs/erofs/super.c | 15 | ||||
-rw-r--r-- | fs/ext2/ext2.h | 1 | ||||
-rw-r--r-- | fs/ext2/inode.c | 15 | ||||
-rw-r--r-- | fs/ext2/super.c | 16 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 1 | ||||
-rw-r--r-- | fs/ext4/inode.c | 25 | ||||
-rw-r--r-- | fs/ext4/super.c | 11 | ||||
-rw-r--r-- | fs/fuse/Kconfig | 2 | ||||
-rw-r--r-- | fs/fuse/virtio_fs.c | 18 | ||||
-rw-r--r-- | fs/iomap/Makefile | 4 | ||||
-rw-r--r-- | fs/iomap/buffered-io.c | 10 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.c | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.h | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.c | 84 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.h | 12 | ||||
-rw-r--r-- | fs/xfs/xfs_iops.c | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_pnfs.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_reflink.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 80 |
27 files changed, 288 insertions, 228 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index a6313a969bc5..7a2b11c0b803 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -15,11 +15,11 @@ config VALIDATE_FS_PARSER Enable this to perform validation of the parameter description for a filesystem when it is registered. -if BLOCK - config FS_IOMAP bool +if BLOCK + source "fs/ext2/Kconfig" source "fs/ext4/Kconfig" source "fs/jbd2/Kconfig" @@ -42,6 +42,8 @@ source "fs/nilfs2/Kconfig" source "fs/f2fs/Kconfig" source "fs/zonefs/Kconfig" +endif # BLOCK + config FS_DAX bool "File system based Direct Access (DAX) support" depends on MMU @@ -89,8 +91,6 @@ config FS_DAX_PMD config FS_DAX_LIMITED bool -endif # BLOCK - # Posix ACL utility routines # # Note: Posix ACLs can be implemented without these helpers. Never use @@ -709,26 +709,26 @@ int dax_invalidate_mapping_entry_sync(struct address_space *mapping, return __dax_invalidate_entry(mapping, index, false); } -static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_dev, - sector_t sector, struct page *to, unsigned long vaddr) +static pgoff_t dax_iomap_pgoff(const struct iomap *iomap, loff_t pos) { + return PHYS_PFN(iomap->addr + (pos & PAGE_MASK) - iomap->offset); +} + +static int copy_cow_page_dax(struct vm_fault *vmf, const struct iomap_iter *iter) +{ + pgoff_t pgoff = dax_iomap_pgoff(&iter->iomap, iter->pos); void *vto, *kaddr; - pgoff_t pgoff; long rc; int id; - rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff); - if (rc) - return rc; - id = dax_read_lock(); - rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL); + rc = dax_direct_access(iter->iomap.dax_dev, pgoff, 1, &kaddr, NULL); if (rc < 0) { dax_read_unlock(id); return rc; } - vto = kmap_atomic(to); - copy_user_page(vto, (void __force *)kaddr, vaddr, to); + vto = kmap_atomic(vmf->cow_page); + copy_user_page(vto, kaddr, vmf->address, vmf->cow_page); kunmap_atomic(vto); dax_read_unlock(id); return 0; @@ -1005,22 +1005,13 @@ int dax_writeback_mapping_range(struct address_space *mapping, } EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); -static sector_t dax_iomap_sector(const struct iomap *iomap, loff_t pos) -{ - return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9; -} - static int dax_iomap_pfn(const struct iomap *iomap, loff_t pos, size_t size, pfn_t *pfnp) { - const sector_t sector = dax_iomap_sector(iomap, pos); - pgoff_t pgoff; + pgoff_t pgoff = dax_iomap_pgoff(iomap, pos); int id, rc; long length; - rc = bdev_dax_pgoff(iomap->bdev, sector, size, &pgoff); - if (rc) - return rc; id = dax_read_lock(); length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size), NULL, pfnp); @@ -1126,42 +1117,87 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, } #endif /* CONFIG_FS_DAX_PMD */ -s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap) +static int dax_memzero(struct dax_device *dax_dev, pgoff_t pgoff, + unsigned int offset, size_t size) { - sector_t sector = iomap_sector(iomap, pos & PAGE_MASK); - pgoff_t pgoff; - long rc, id; void *kaddr; - bool page_aligned = false; - unsigned offset = offset_in_page(pos); - unsigned size = min_t(u64, PAGE_SIZE - offset, length); + long ret; - if (IS_ALIGNED(sector << SECTOR_SHIFT, PAGE_SIZE) && - (size == PAGE_SIZE)) - page_aligned = true; + ret = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL); + if (ret > 0) { + memset(kaddr + offset, 0, size); + dax_flush(dax_dev, kaddr + offset, size); + } + return ret; +} - rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff); - if (rc) - return rc; +static s64 dax_zero_iter(struct iomap_iter *iter, bool *did_zero) +{ + const struct iomap *iomap = &iter->iomap; + const struct iomap *srcmap = iomap_iter_srcmap(iter); + loff_t pos = iter->pos; + u64 length = iomap_length(iter); + s64 written = 0; + + /* already zeroed? we're done. */ + if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN) + return length; + + do { + unsigned offset = offset_in_page(pos); + unsigned size = min_t(u64, PAGE_SIZE - offset, length); + pgoff_t pgoff = dax_iomap_pgoff(iomap, pos); + long rc; + int id; + + id = dax_read_lock(); + if (IS_ALIGNED(pos, PAGE_SIZE) && size == PAGE_SIZE) + rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1); + else + rc = dax_memzero(iomap->dax_dev, pgoff, offset, size); + dax_read_unlock(id); - id = dax_read_lock(); + if (rc < 0) + return rc; + pos += size; + length -= size; + written += size; + if (did_zero) + *did_zero = true; + } while (length > 0); - if (page_aligned) - rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1); - else - rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL); - if (rc < 0) { - dax_read_unlock(id); - return rc; - } + return written; +} - if (!page_aligned) { - memset(kaddr + offset, 0, size); - dax_flush(iomap->dax_dev, kaddr + offset, size); - } - dax_read_unlock(id); - return size; +int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, + const struct iomap_ops *ops) +{ + struct iomap_iter iter = { + .inode = inode, + .pos = pos, + .len = len, + .flags = IOMAP_DAX | IOMAP_ZERO, + }; + int ret; + + while ((ret = iomap_iter(&iter, ops)) > 0) + iter.processed = dax_zero_iter(&iter, did_zero); + return ret; } +EXPORT_SYMBOL_GPL(dax_zero_range); + +int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, + const struct iomap_ops *ops) +{ + unsigned int blocksize = i_blocksize(inode); + unsigned int off = pos & (blocksize - 1); + + /* Block boundary? Nothing to do */ + if (!off) + return 0; + return dax_zero_range(inode, pos, blocksize - off, did_zero, ops); +} +EXPORT_SYMBOL_GPL(dax_truncate_page); static loff_t dax_iomap_iter(const struct iomap_iter *iomi, struct iov_iter *iter) @@ -1169,7 +1205,6 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, const struct iomap *iomap = &iomi->iomap; loff_t length = iomap_length(iomi); loff_t pos = iomi->pos; - struct block_device *bdev = iomap->bdev; struct dax_device *dax_dev = iomap->dax_dev; loff_t end = pos + length, done = 0; ssize_t ret = 0; @@ -1203,9 +1238,8 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, while (pos < end) { unsigned offset = pos & (PAGE_SIZE - 1); const size_t size = ALIGN(length + offset, PAGE_SIZE); - const sector_t sector = dax_iomap_sector(iomap, pos); + pgoff_t pgoff = dax_iomap_pgoff(iomap, pos); ssize_t map_len; - pgoff_t pgoff; void *kaddr; if (fatal_signal_pending(current)) { @@ -1213,10 +1247,6 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, break; } - ret = bdev_dax_pgoff(bdev, sector, size, &pgoff); - if (ret) - break; - map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, NULL); if (map_len < 0) { @@ -1230,11 +1260,6 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, if (map_len > end - pos) map_len = end - pos; - /* - * The userspace address for the memory copy has already been - * validated via access_ok() in either vfs_read() or - * vfs_write(), depending on which operation we are doing. - */ if (iov_iter_rw(iter) == WRITE) xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr, map_len, iter); @@ -1274,6 +1299,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, .inode = iocb->ki_filp->f_mapping->host, .pos = iocb->ki_pos, .len = iov_iter_count(iter), + .flags = IOMAP_DAX, }; loff_t done = 0; int ret; @@ -1332,19 +1358,16 @@ static vm_fault_t dax_fault_synchronous_pfnp(pfn_t *pfnp, pfn_t pfn) static vm_fault_t dax_fault_cow_page(struct vm_fault *vmf, const struct iomap_iter *iter) { - sector_t sector = dax_iomap_sector(&iter->iomap, iter->pos); - unsigned long vaddr = vmf->address; vm_fault_t ret; int error = 0; switch (iter->iomap.type) { case IOMAP_HOLE: case IOMAP_UNWRITTEN: - clear_user_highpage(vmf->cow_page, vaddr); + clear_user_highpage(vmf->cow_page, vmf->address); break; case IOMAP_MAPPED: - error = copy_cow_page_dax(iter->iomap.bdev, iter->iomap.dax_dev, - sector, vmf->cow_page, vaddr); + error = copy_cow_page_dax(vmf, iter); break; default: WARN_ON_ONCE(1); @@ -1430,7 +1453,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, .inode = mapping->host, .pos = (loff_t)vmf->pgoff << PAGE_SHIFT, .len = PAGE_SIZE, - .flags = IOMAP_FAULT, + .flags = IOMAP_DAX | IOMAP_FAULT, }; vm_fault_t ret = 0; void *entry; @@ -1539,7 +1562,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, struct iomap_iter iter = { .inode = mapping->host, .len = PMD_SIZE, - .flags = IOMAP_FAULT, + .flags = IOMAP_DAX | IOMAP_FAULT, }; vm_fault_t ret = VM_FAULT_FALLBACK; pgoff_t max_pgoff; diff --git a/fs/erofs/data.c b/fs/erofs/data.c index e18476c85fa2..fa7ddb7ad980 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -192,6 +192,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) /* primary device by default */ map->m_bdev = sb->s_bdev; map->m_daxdev = EROFS_SB(sb)->dax_dev; + map->m_dax_part_off = EROFS_SB(sb)->dax_part_off; if (map->m_deviceid) { down_read(&devs->rwsem); @@ -202,6 +203,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) } map->m_bdev = dif->bdev; map->m_daxdev = dif->dax_dev; + map->m_dax_part_off = dif->dax_part_off; up_read(&devs->rwsem); } else if (devs->extra_devices) { down_read(&devs->rwsem); @@ -218,6 +220,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) map->m_pa -= startoff; map->m_bdev = dif->bdev; map->m_daxdev = dif->dax_dev; + map->m_dax_part_off = dif->dax_part_off; break; } } @@ -248,9 +251,13 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, if (ret) return ret; - iomap->bdev = mdev.m_bdev; - iomap->dax_dev = mdev.m_daxdev; iomap->offset = map.m_la; + if (flags & IOMAP_DAX) { + iomap->dax_dev = mdev.m_daxdev; + iomap->offset += mdev.m_dax_part_off; + } else { + iomap->bdev = mdev.m_bdev; + } iomap->length = map.m_llen; iomap->flags = 0; iomap->private = NULL; diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 3db494a398b2..b8272fb95fd6 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -51,6 +51,7 @@ struct erofs_device_info { char *path; struct block_device *bdev; struct dax_device *dax_dev; + u64 dax_part_off; u32 blocks; u32 mapped_blkaddr; @@ -115,6 +116,7 @@ struct erofs_sb_info { #endif /* CONFIG_EROFS_FS_ZIP */ struct erofs_dev_context *devs; struct dax_device *dax_dev; + u64 dax_part_off; u64 total_blocks; u32 primarydevice_blocks; @@ -467,6 +469,7 @@ static inline int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_dev { struct block_device *m_bdev; struct dax_device *m_daxdev; + u64 m_dax_part_off; erofs_off_t m_pa; unsigned int m_deviceid; diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 5c137647fa8a..915eefe0d7e2 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -267,7 +267,7 @@ static int erofs_init_devices(struct super_block *sb, break; } dif->bdev = bdev; - dif->dax_dev = fs_dax_get_by_bdev(bdev); + dif->dax_dev = fs_dax_get_by_bdev(bdev, &dif->dax_part_off); dif->blocks = le32_to_cpu(dis->blocks); dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr); sbi->total_blocks += dif->blocks; @@ -597,7 +597,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_fs_info = sbi; sbi->opt = ctx->opt; - sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev); + sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->dax_part_off); sbi->devs = ctx->devs; ctx->devs = NULL; @@ -605,10 +605,13 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) if (err) return err; - if (test_opt(&sbi->opt, DAX_ALWAYS) && - !dax_supported(sbi->dax_dev, sb->s_bdev, EROFS_BLKSIZ, 0, bdev_nr_sectors(sb->s_bdev))) { - errorfc(fc, "DAX unsupported by block device. Turning off DAX."); - clear_opt(&sbi->opt, DAX_ALWAYS); + if (test_opt(&sbi->opt, DAX_ALWAYS)) { + BUILD_BUG_ON(EROFS_BLKSIZ != PAGE_SIZE); + + if (!sbi->dax_dev) { + errorfc(fc, "DAX unsupported by block device. Turning off DAX."); + clear_opt(&sbi->opt, DAX_ALWAYS); + } } sb->s_flags |= SB_RDONLY | SB_NOATIME; sb->s_maxbytes = MAX_LFS_FILESIZE; diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 3be9dd6412b7..d4f306aa5ace 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -118,6 +118,7 @@ struct ext2_sb_info { spinlock_t s_lock; struct mb_cache *s_ea_block_cache; struct dax_device *s_daxdev; + u64 s_dax_part_off; }; static inline spinlock_t * diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 333fa62661d5..602578b72d8c 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -36,6 +36,7 @@ #include <linux/iomap.h> #include <linux/namei.h> #include <linux/uio.h> +#include <linux/dax.h> #include "ext2.h" #include "acl.h" #include "xattr.h" @@ -816,9 +817,11 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, return ret; iomap->flags = 0; - iomap->bdev = inode->i_sb->s_bdev; iomap->offset = (u64)first_block << blkbits; - iomap->dax_dev = sbi->s_daxdev; + if (flags & IOMAP_DAX) + iomap->dax_dev = sbi->s_daxdev; + else + iomap->bdev = inode->i_sb->s_bdev; if (ret == 0) { iomap->type = IOMAP_HOLE; @@ -827,6 +830,8 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, } else { iomap->type = IOMAP_MAPPED; iomap->addr = (u64)bno << blkbits; + if (flags & IOMAP_DAX) + iomap->addr += sbi->s_dax_part_off; iomap->length = (u64)ret << blkbits; iomap->flags |= IOMAP_F_MERGED; } @@ -1297,9 +1302,9 @@ static int ext2_setsize(struct inode *inode, loff_t newsize) inode_dio_wait(inode); if (IS_DAX(inode)) { - error = iomap_zero_range(inode, newsize, - PAGE_ALIGN(newsize) - newsize, NULL, - &ext2_iomap_ops); + error = dax_zero_range(inode, newsize, + PAGE_ALIGN(newsize) - newsize, NULL, + &ext2_iomap_ops); } else if (test_opt(inode->i_sb, NOBH)) error = nobh_truncate_page(inode->i_mapping, newsize, ext2_get_block); diff --git a/fs/ext2/super.c b/fs/ext2/super.c index d8d580b609ba..94f1fbd7d3ac 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -802,7 +802,6 @@ static unsigned long descriptor_loc(struct super_block *sb, static int ext2_fill_super(struct super_block *sb, void *data, int silent) { - struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev); struct buffer_head * bh; struct ext2_sb_info * sbi; struct ext2_super_block * es; @@ -822,17 +821,17 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) - goto failed; + return -ENOMEM; sbi->s_blockgroup_lock = kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); if (!sbi->s_blockgroup_lock) { kfree(sbi); - goto failed; + return -ENOMEM; } sb->s_fs_info = sbi; sbi->s_sb_block = sb_block; - sbi->s_daxdev = dax_dev; + sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off); spin_lock_init(&sbi->s_lock); ret = -EINVAL; @@ -946,11 +945,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); if (test_opt(sb, DAX)) { - if (!dax_supported(dax_dev, sb->s_bdev, blocksize, 0, - bdev_nr_sectors(sb->s_bdev))) { + if (!sbi->s_daxdev) { ext2_msg(sb, KERN_ERR, "DAX unsupported by block device. Turning off DAX."); clear_opt(sbi->s_mount_opt, DAX); + } else if (blocksize != PAGE_SIZE) { + ext2_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n"); + clear_opt(sbi->s_mount_opt, DAX); } } @@ -1199,11 +1200,10 @@ failed_mount_group_desc: failed_mount: brelse(bh); failed_sbi: + fs_put_dax(sbi->s_daxdev); sb->s_fs_info = NULL; kfree(sbi->s_blockgroup_lock); kfree(sbi); -failed: - fs_put_dax(dax_dev); return ret; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 715ee206dfe1..71a3cdceaa03 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1699,6 +1699,7 @@ struct ext4_sb_info { */ struct percpu_rw_semaphore s_writepages_rwsem; struct dax_device *s_daxdev; + u64 s_dax_part_off; #ifdef CONFIG_EXT4_DEBUG unsigned long s_simulate_fail; #endif diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9dbeb772de60..5f79d265d06a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -41,6 +41,7 @@ #include <linux/bitops.h> #include <linux/iomap.h> #include <linux/iversion.h> +#include <linux/dax.h> #include "ext4_jbd2.h" #include "xattr.h" @@ -3253,7 +3254,7 @@ static bool ext4_inode_datasync_dirty(struct inode *inode) static void ext4_set_iomap(struct inode *inode, struct iomap *iomap, struct ext4_map_blocks *map, loff_t offset, - loff_t length) + loff_t length, unsigned int flags) { u8 blkbits = inode->i_blkbits; @@ -3270,8 +3271,10 @@ static void ext4_set_iomap(struct inode *inode, struct iomap *iomap, if (map->m_flags & EXT4_MAP_NEW) iomap->flags |= IOMAP_F_NEW; - iomap->bdev = inode->i_sb->s_bdev; - iomap->dax_dev = EXT4_SB(inode->i_sb)->s_daxdev; + if (flags & IOMAP_DAX) + iomap->dax_dev = EXT4_SB(inode->i_sb)->s_daxdev; + else + iomap->bdev = inode->i_sb->s_bdev; iomap->offset = (u64) map->m_lblk << blkbits; iomap->length = (u64) map->m_len << blkbits; @@ -3291,9 +3294,13 @@ static void ext4_set_iomap(struct inode *inode, struct iomap *iomap, if (map->m_flags & EXT4_MAP_UNWRITTEN) { iomap->type = IOMAP_UNWRITTEN; iomap->addr = (u64) map->m_pblk << blkbits; + if (flags & IOMAP_DAX) + iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off; } else if (map->m_flags & EXT4_MAP_MAPPED) { iomap->type = IOMAP_MAPPED; iomap->addr = (u64) map->m_pblk << blkbits; + if (flags & IOMAP_DAX) + iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off; } else { iomap->type = IOMAP_HOLE; iomap->addr = IOMAP_NULL_ADDR; @@ -3330,8 +3337,8 @@ retry: * DAX and direct I/O are the only two operations that are currently * supported with IOMAP_WRITE. */ - WARN_ON(!IS_DAX(inode) && !(flags & IOMAP_DIRECT)); - if (IS_DAX(inode)) + WARN_ON(!(flags & (IOMAP_DAX | IOMAP_DIRECT))); + if (flags & IOMAP_DAX) m_flags = EXT4_GET_BLOCKS_CREATE_ZERO; /* * We use i_size instead of i_disksize here because delalloc writeback @@ -3402,7 +3409,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, if (ret < 0) return ret; out: - ext4_set_iomap(inode, iomap, &map, offset, length); + ext4_set_iomap(inode, iomap, &map, offset, length, flags); return 0; } @@ -3522,7 +3529,7 @@ static int ext4_iomap_begin_report(struct inode *inode, loff_t offset, delalloc = ext4_iomap_is_delalloc(inode, &map); set_iomap: - ext4_set_iomap(inode, iomap, &map, offset, length); + ext4_set_iomap(inode, iomap, &map, offset, length, flags); if (delalloc && iomap->type == IOMAP_HOLE) iomap->type = IOMAP_DELALLOC; @@ -3762,8 +3769,8 @@ static int ext4_block_zero_page_range(handle_t *handle, length = max; if (IS_DAX(inode)) { - return iomap_zero_range(inode, from, length, NULL, - &ext4_iomap_ops); + return dax_zero_range(inode, from, length, NULL, + &ext4_iomap_ops); } return __ext4_block_zero_page_range(handle, mapping, from, length); } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9a936ecbaa3b..0343f682504d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4338,7 +4338,7 @@ static struct ext4_sb_info *ext4_alloc_sbi(struct super_block *sb) if (!sbi) return NULL; - sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev); + sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off); sbi->s_blockgroup_lock = kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); @@ -4756,9 +4756,12 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) goto failed_mount; } - if (dax_supported(sbi->s_daxdev, sb->s_bdev, blocksize, 0, - bdev_nr_sectors(sb->s_bdev))) - set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags); + if (sbi->s_daxdev) { + if (blocksize == PAGE_SIZE) + set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags); + else + ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n"); + } if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) { if (ext4_has_feature_inline_data(sb)) { diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig index 40ce9a1c12e5..038ed0b9aaa5 100644 --- a/fs/fuse/Kconfig +++ b/fs/fuse/Kconfig @@ -45,7 +45,7 @@ config FUSE_DAX select INTERVAL_TREE depends on VIRTIO_FS depends on FS_DAX - depends on DAX_DRIVER + depends on DAX help This allows bypassing guest page cache and allows mapping host page cache directly in guest address space. diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index e54dc069587d..74e627109b79 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -765,20 +765,6 @@ static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, return nr_pages > max_nr_pages ? max_nr_pages : nr_pages; } -static size_t virtio_fs_copy_from_iter(struct dax_device *dax_dev, - pgoff_t pgoff, void *addr, - size_t bytes, struct iov_iter *i) -{ - return copy_from_iter(addr, bytes, i); -} - -static size_t virtio_fs_copy_to_iter(struct dax_device *dax_dev, - pgoff_t pgoff, void *addr, - size_t bytes, struct iov_iter *i) -{ - return copy_to_iter(addr, bytes, i); -} - static int virtio_fs_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, size_t nr_pages) { @@ -795,8 +781,6 @@ static int virtio_fs_zero_page_range(struct dax_device *dax_dev, static const struct dax_operations virtio_fs_dax_ops = { .direct_access = virtio_fs_direct_access, - .copy_from_iter = virtio_fs_copy_from_iter, - .copy_to_iter = virtio_fs_copy_to_iter, .zero_page_range = virtio_fs_zero_page_range, }; @@ -862,7 +846,7 @@ static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs) dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n", __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len); - fs->dax_dev = alloc_dax(fs, NULL, &virtio_fs_dax_ops, 0); + fs->dax_dev = alloc_dax(fs, &virtio_fs_dax_ops); if (IS_ERR(fs->dax_dev)) return PTR_ERR(fs->dax_dev); diff --git a/fs/iomap/Makefile b/fs/iomap/Makefile index 4143a3ff89db..fc070184b7fa 100644 --- a/fs/iomap/Makefile +++ b/fs/iomap/Makefile @@ -9,9 +9,9 @@ ccflags-y += -I $(srctree)/$(src) # needed for trace events obj-$(CONFIG_FS_IOMAP) += iomap.o iomap-y += trace.o \ - buffered-io.o \ + iter.o +iomap-$(CONFIG_BLOCK) += buffered-io.o \ direct-io.o \ fiemap.o \ - iter.o \ seek.o iomap-$(CONFIG_SWAP) += swapfile.o diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index c6b3a148e898..c938bbad075e 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -897,7 +897,6 @@ EXPORT_SYMBOL_GPL(iomap_file_unshare); static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) { - struct iomap *iomap = &iter->iomap; const struct iomap *srcmap = iomap_iter_srcmap(iter); loff_t pos = iter->pos; loff_t length = iomap_length(iter); @@ -913,14 +912,6 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) size_t offset; size_t bytes = min_t(u64, SIZE_MAX, length); - if (IS_DAX(iter->inode)) { - s64 tmp = dax_iomap_zero(pos, bytes, iomap); - if (tmp < 0) - return tmp; - bytes = tmp; - goto good; - } - status = iomap_write_begin(iter, pos, bytes, &folio); if (status) return status; @@ -933,7 +924,6 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) folio_mark_accessed(folio); bytes = iomap_write_end(iter, pos, bytes, bytes, folio); -good: if (WARN_ON_ONCE(bytes == 0)) return -EIO; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 4dccd4d90622..74198dd82b03 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4551,7 +4551,7 @@ xfs_bmapi_convert_delalloc( * the extent. Just return the real extent at this offset. */ if (!isnullstartblock(bma.got.br_startblock)) { - xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags); + xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags); *seq = READ_ONCE(ifp->if_seq); goto out_trans_cancel; } @@ -4598,7 +4598,7 @@ xfs_bmapi_convert_delalloc( XFS_STATS_INC(mp, xs_xstrat_quick); ASSERT(!isnullstartblock(bma.got.br_startblock)); - xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags); + xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags); *seq = READ_ONCE(ifp->if_seq); if (whichfork == XFS_COW_FORK) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 4098a9875c5b..2705f91bdd0d 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -359,7 +359,7 @@ retry: isnullstartblock(imap.br_startblock)) goto allocate_blocks; - xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0); + xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0); trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap); return 0; allocate_blocks: diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 73a36b7be3bd..797ea0c8b14e 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1001,7 +1001,7 @@ xfs_free_file_space( /* * Now that we've unmap all full blocks we'll have to zero out any - * partial block at the beginning and/or end. iomap_zero_range is smart + * partial block at the beginning and/or end. xfs_zero_range is smart * enough to skip any holes, including those we just created, but we * must take care not to zero beyond EOF and enlarge i_size. */ @@ -1009,15 +1009,14 @@ xfs_free_file_space( return 0; if (offset + len > XFS_ISIZE(ip)) len = XFS_ISIZE(ip) - offset; - error = iomap_zero_range(VFS_I(ip), offset, len, NULL, - &xfs_buffered_write_iomap_ops); + error = xfs_zero_range(ip, offset, len, NULL); if (error) return error; /* * If we zeroed right up to EOF and EOF straddles a page boundary we * must make sure that the post-EOF area is also zeroed because the - * page could be mmap'd and iomap_zero_range doesn't do that for us. + * page could be mmap'd and xfs_zero_range doesn't do that for us. * Writeback of the eof page will do this, albeit clumsily. */ if (offset + len >= XFS_ISIZE(ip) && offset_in_page(offset + len) > 0) { diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 631c5a61d89b..bbb0fbd34e64 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1892,6 +1892,7 @@ xfs_free_buftarg( list_lru_destroy(&btp->bt_lru); blkdev_issue_flush(btp->bt_bdev); + fs_put_dax(btp->bt_daxdev); kmem_free(btp); } @@ -1932,11 +1933,10 @@ xfs_setsize_buftarg_early( return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev)); } -xfs_buftarg_t * +struct xfs_buftarg * xfs_alloc_buftarg( struct xfs_mount *mp, - struct block_device *bdev, - struct dax_device *dax_dev) + struct block_device *bdev) { xfs_buftarg_t *btp; @@ -1945,7 +1945,7 @@ xfs_alloc_buftarg( btp->bt_mount = mp; btp->bt_dev = bdev->bd_dev; btp->bt_bdev = bdev; - btp->bt_daxdev = dax_dev; + btp->bt_daxdev = fs_dax_get_by_bdev(bdev, &btp->bt_dax_part_off); /* * Buffer IO error rate limiting. Limit it to no more than 10 messages diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 6b0200b8007d..edcb6254fa6a 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -89,6 +89,7 @@ typedef struct xfs_buftarg { dev_t bt_dev; struct block_device *bt_bdev; struct dax_device *bt_daxdev; + u64 bt_dax_part_off; struct xfs_mount *bt_mount; unsigned int bt_meta_sectorsize; size_t bt_meta_sectormask; @@ -338,8 +339,8 @@ xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset) /* * Handling of buftargs. */ -extern struct xfs_buftarg *xfs_alloc_buftarg(struct xfs_mount *, - struct block_device *, struct dax_device *); +struct xfs_buftarg *xfs_alloc_buftarg(struct xfs_mount *mp, + struct block_device *bdev); extern void xfs_free_buftarg(struct xfs_buftarg *); extern void xfs_buftarg_wait(struct xfs_buftarg *); extern void xfs_buftarg_drain(struct xfs_buftarg *); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 27594738b0d1..8d4c5ca261bd 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -437,8 +437,7 @@ restart: } trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize); - error = iomap_zero_range(inode, isize, iocb->ki_pos - isize, - NULL, &xfs_buffered_write_iomap_ops); + error = xfs_zero_range(ip, isize, iocb->ki_pos - isize, NULL); if (error) return error; } else diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 093758440ad5..e552ce541ec2 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -28,7 +28,6 @@ #include "xfs_dquot.h" #include "xfs_reflink.h" - #define XFS_ALLOC_ALIGN(mp, off) \ (((off) >> mp->m_allocsize_log) << mp->m_allocsize_log) @@ -54,7 +53,8 @@ xfs_bmbt_to_iomap( struct xfs_inode *ip, struct iomap *iomap, struct xfs_bmbt_irec *imap, - u16 flags) + unsigned int mapping_flags, + u16 iomap_flags) { struct xfs_mount *mp = ip->i_mount; struct xfs_buftarg *target = xfs_inode_buftarg(ip); @@ -71,16 +71,22 @@ xfs_bmbt_to_iomap( iomap->type = IOMAP_DELALLOC; } else { iomap->addr = BBTOB(xfs_fsb_to_db(ip, imap->br_startblock)); + if (mapping_flags & IOMAP_DAX) + iomap->addr += target->bt_dax_part_off; + if (imap->br_state == XFS_EXT_UNWRITTEN) iomap->type = IOMAP_UNWRITTEN; else iomap->type = IOMAP_MAPPED; + } iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff); iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount); - iomap->bdev = target->bt_bdev; - iomap->dax_dev = target->bt_daxdev; - iomap->flags = flags; + if (mapping_flags & IOMAP_DAX) + iomap->dax_dev = target->bt_daxdev; + else + iomap->bdev = target->bt_bdev; + iomap->flags = iomap_flags; if (xfs_ipincount(ip) && (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) @@ -188,6 +194,7 @@ xfs_iomap_write_direct( struct xfs_inode *ip, xfs_fileoff_t offset_fsb, xfs_fileoff_t count_fsb, + unsigned int flags, struct xfs_bmbt_irec *imap) { struct xfs_mount *mp = ip->i_mount; @@ -229,7 +236,7 @@ xfs_iomap_write_direct( * the reserve block pool for bmbt block allocation if there is no space * left but we need to do unwritten extent conversion. */ - if (IS_DAX(VFS_I(ip))) { + if (flags & IOMAP_DAX) { bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO; if (imap->br_state == XFS_EXT_UNWRITTEN) { force = true; @@ -620,7 +627,7 @@ imap_needs_alloc( imap->br_startblock == DELAYSTARTBLOCK) return true; /* we convert unwritten extents before copying the data for DAX */ - if (IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN) + if ((flags & IOMAP_DAX) && imap->br_state == XFS_EXT_UNWRITTEN) return true; return false; } @@ -800,7 +807,7 @@ xfs_direct_write_iomap_begin( xfs_iunlock(ip, lockmode); trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); - return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags); allocate_blocks: error = -EAGAIN; @@ -826,23 +833,24 @@ allocate_blocks: xfs_iunlock(ip, lockmode); error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb, - &imap); + flags, &imap); if (error) return error; trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap); - return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags | IOMAP_F_NEW); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, + iomap_flags | IOMAP_F_NEW); out_found_cow: xfs_iunlock(ip, lockmode); length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount); trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap); if (imap.br_startblock != HOLESTARTBLOCK) { - error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0); + error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0); if (error) return error; } - return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED); + return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED); out_unlock: if (lockmode) @@ -1052,23 +1060,24 @@ retry: */ xfs_iunlock(ip, XFS_ILOCK_EXCL); trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap); - return xfs_bmbt_to_iomap(ip, iomap, &imap, IOMAP_F_NEW); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW); found_imap: xfs_iunlock(ip, XFS_ILOCK_EXCL); - return xfs_bmbt_to_iomap(ip, iomap, &imap, 0); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0); found_cow: xfs_iunlock(ip, XFS_ILOCK_EXCL); if (imap.br_startoff <= offset_fsb) { - error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0); + error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0); if (error) return error; - return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED); + return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, + IOMAP_F_SHARED); } xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb); - return xfs_bmbt_to_iomap(ip, iomap, &cmap, 0); + return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0); out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -1177,7 +1186,8 @@ xfs_read_iomap_begin( if (error) return error; trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); - return xfs_bmbt_to_iomap(ip, iomap, &imap, shared ? IOMAP_F_SHARED : 0); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, + shared ? IOMAP_F_SHARED : 0); } const struct iomap_ops xfs_read_iomap_ops = { @@ -1236,7 +1246,8 @@ xfs_seek_iomap_begin( if (data_fsb < cow_fsb + cmap.br_blockcount) end_fsb = min(end_fsb, data_fsb); xfs_trim_extent(&cmap, offset_fsb, end_fsb); - error = xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED); + error = xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, + IOMAP_F_SHARED); /* * This is a COW extent, so we must probe the page cache * because there could be dirty page cache being backed @@ -1258,7 +1269,7 @@ xfs_seek_iomap_begin( imap.br_state = XFS_EXT_NORM; done: xfs_trim_extent(&imap, offset_fsb, end_fsb); - error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0); + error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0); out_unlock: xfs_iunlock(ip, lockmode); return error; @@ -1305,9 +1316,40 @@ out_unlock: if (error) return error; ASSERT(nimaps); - return xfs_bmbt_to_iomap(ip, iomap, &imap, 0); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0); } const struct iomap_ops xfs_xattr_iomap_ops = { .iomap_begin = xfs_xattr_iomap_begin, }; + +int +xfs_zero_range( + struct xfs_inode *ip, + loff_t pos, + loff_t len, + bool *did_zero) +{ + struct inode *inode = VFS_I(ip); + + if (IS_DAX(inode)) + return dax_zero_range(inode, pos, len, did_zero, + &xfs_direct_write_iomap_ops); + return iomap_zero_range(inode, pos, len, did_zero, + &xfs_buffered_write_iomap_ops); +} + +int +xfs_truncate_page( + struct xfs_inode *ip, + loff_t pos, + bool *did_zero) +{ + struct inode *inode = VFS_I(ip); + + if (IS_DAX(inode)) + return dax_truncate_page(inode, pos, did_zero, + &xfs_direct_write_iomap_ops); + return iomap_truncate_page(inode, pos, did_zero, + &xfs_buffered_write_iomap_ops); +} diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 7d3703556d0e..e88dc162c785 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -12,13 +12,19 @@ struct xfs_inode; struct xfs_bmbt_irec; int xfs_iomap_write_direct(struct xfs_inode *ip, xfs_fileoff_t offset_fsb, - xfs_fileoff_t count_fsb, struct xfs_bmbt_irec *imap); + xfs_fileoff_t count_fsb, unsigned int flags, + struct xfs_bmbt_irec *imap); int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool); xfs_fileoff_t xfs_iomap_eof_align_last_fsb(struct xfs_inode *ip, xfs_fileoff_t end_fsb); -int xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, - struct xfs_bmbt_irec *, u16); +int xfs_bmbt_to_iomap(struct xfs_inode *ip, struct iomap *iomap, + struct xfs_bmbt_irec *imap, unsigned int mapping_flags, + u16 iomap_flags); + +int xfs_zero_range(struct xfs_inode *ip, loff_t pos, loff_t len, + bool *did_zero); +int xfs_truncate_page(struct xfs_inode *ip, loff_t pos, bool *did_zero); static inline xfs_filblks_t xfs_aligned_fsb_count( diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 3447c19e99da..b79b3846e71b 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -890,8 +890,8 @@ xfs_setattr_size( */ if (newsize > oldsize) { trace_xfs_zero_eof(ip, oldsize, newsize - oldsize); - error = iomap_zero_range(inode, oldsize, newsize - oldsize, - &did_zeroing, &xfs_buffered_write_iomap_ops); + error = xfs_zero_range(ip, oldsize, newsize - oldsize, + &did_zeroing); } else { /* * iomap won't detect a dirty page over an unwritten block (or a @@ -903,8 +903,7 @@ xfs_setattr_size( newsize); if (error) return error; - error = iomap_truncate_page(inode, newsize, &did_zeroing, - &xfs_buffered_write_iomap_ops); + error = xfs_truncate_page(ip, newsize, &did_zeroing); } if (error) diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 5e1d29d8b2e7..d6334abbc0b3 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -155,7 +155,7 @@ xfs_fs_map_blocks( xfs_iunlock(ip, lock_flags); error = xfs_iomap_write_direct(ip, offset_fsb, - end_fsb - offset_fsb, &imap); + end_fsb - offset_fsb, 0, &imap); if (error) goto out_unlock; @@ -173,7 +173,7 @@ xfs_fs_map_blocks( } xfs_iunlock(ip, XFS_IOLOCK_EXCL); - error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0); + error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0); *device_generation = mp->m_generation; return error; out_unlock: diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 8b6c7163f684..db70060e7bf6 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1272,8 +1272,7 @@ xfs_reflink_zero_posteof( return 0; trace_xfs_zero_eof(ip, isize, pos - isize); - return iomap_zero_range(VFS_I(ip), isize, pos - isize, NULL, - &xfs_buffered_write_iomap_ops); + return xfs_zero_range(ip, isize, pos - isize, NULL); } /* diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index c7ac486ca5d3..e8f37bdc8354 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -331,13 +331,34 @@ xfs_set_inode_alloc( return xfs_is_inode32(mp) ? maxagi : agcount; } -static bool -xfs_buftarg_is_dax( - struct super_block *sb, - struct xfs_buftarg *bt) +static int +xfs_setup_dax_always( + struct xfs_mount *mp) { - return dax_supported(bt->bt_daxdev, bt->bt_bdev, sb->s_blocksize, 0, - bdev_nr_sectors(bt->bt_bdev)); + if (!mp->m_ddev_targp->bt_daxdev && + (!mp->m_rtdev_targp || !mp->m_rtdev_targp->bt_daxdev)) { + xfs_alert(mp, + "DAX unsupported by block device. Turning off DAX."); + goto disable_dax; + } + + if (mp->m_super->s_blocksize != PAGE_SIZE) { + xfs_alert(mp, + "DAX not supported for blocksize. Turning off DAX."); + goto disable_dax; + } + + if (xfs_has_reflink(mp)) { + xfs_alert(mp, "DAX and reflink cannot be used together!"); + return -EINVAL; + } + + xfs_warn(mp, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); + return 0; + +disable_dax: + xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER); + return 0; } STATIC int @@ -370,26 +391,19 @@ STATIC void xfs_close_devices( struct xfs_mount *mp) { - struct dax_device *dax_ddev = mp->m_ddev_targp->bt_daxdev; - if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { struct block_device *logdev = mp->m_logdev_targp->bt_bdev; - struct dax_device *dax_logdev = mp->m_logdev_targp->bt_daxdev; xfs_free_buftarg(mp->m_logdev_targp); xfs_blkdev_put(logdev); - fs_put_dax(dax_logdev); } if (mp->m_rtdev_targp) { struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev; - struct dax_device *dax_rtdev = mp->m_rtdev_targp->bt_daxdev; xfs_free_buftarg(mp->m_rtdev_targp); xfs_blkdev_put(rtdev); - fs_put_dax(dax_rtdev); } xfs_free_buftarg(mp->m_ddev_targp); - fs_put_dax(dax_ddev); } /* @@ -407,8 +421,6 @@ xfs_open_devices( struct xfs_mount *mp) { struct block_device *ddev = mp->m_super->s_bdev; - struct dax_device *dax_ddev = fs_dax_get_by_bdev(ddev); - struct dax_device *dax_logdev = NULL, *dax_rtdev = NULL; struct block_device *logdev = NULL, *rtdev = NULL; int error; @@ -418,8 +430,7 @@ xfs_open_devices( if (mp->m_logname) { error = xfs_blkdev_get(mp, mp->m_logname, &logdev); if (error) - goto out; - dax_logdev = fs_dax_get_by_bdev(logdev); + return error; } if (mp->m_rtname) { @@ -433,25 +444,24 @@ xfs_open_devices( error = -EINVAL; goto out_close_rtdev; } - dax_rtdev = fs_dax_get_by_bdev(rtdev); } /* * Setup xfs_mount buffer target pointers */ error = -ENOMEM; - mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, dax_ddev); + mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev); if (!mp->m_ddev_targp) goto out_close_rtdev; if (rtdev) { - mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, dax_rtdev); + mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev); if (!mp->m_rtdev_targp) goto out_free_ddev_targ; } if (logdev && logdev != ddev) { - mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, dax_logdev); + mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev); if (!mp->m_logdev_targp) goto out_free_rtdev_targ; } else { @@ -467,14 +477,9 @@ xfs_open_devices( xfs_free_buftarg(mp->m_ddev_targp); out_close_rtdev: xfs_blkdev_put(rtdev); - fs_put_dax(dax_rtdev); out_close_logdev: - if (logdev && logdev != ddev) { + if (logdev && logdev != ddev) xfs_blkdev_put(logdev); - fs_put_dax(dax_logdev); - } - out: - fs_put_dax(dax_ddev); return error; } @@ -1593,26 +1598,9 @@ xfs_fs_fill_super( sb->s_flags |= SB_I_VERSION; if (xfs_has_dax_always(mp)) { - bool rtdev_is_dax = false, datadev_is_dax; - - xfs_warn(mp, - "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); - - datadev_is_dax = xfs_buftarg_is_dax(sb, mp->m_ddev_targp); - if (mp->m_rtdev_targp) - rtdev_is_dax = xfs_buftarg_is_dax(sb, - mp->m_rtdev_targp); - if (!rtdev_is_dax && !datadev_is_dax) { - xfs_alert(mp, - "DAX unsupported by block device. Turning off DAX."); - xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER); - } - if (xfs_has_reflink(mp)) { - xfs_alert(mp, - "DAX and reflink cannot be used together!"); - error = -EINVAL; + error = xfs_setup_dax_always(mp); + if (error) goto out_filestream_unmount; - } } if (xfs_has_discard(mp)) { |