diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-13 19:15:14 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-13 19:15:14 -0800 |
commit | d080827f850ba4df5b955d5ca8c8c0fc92fe18c0 (patch) | |
tree | 37262315200bbbe50bdd64ce3011951a92855159 /fs/block_dev.c | |
parent | cbd88cd4c07f9361914ab7fd7e21c9227986fe68 (diff) | |
parent | 8b63b6bfc1a551acf154061699028c7032d7890c (diff) |
Merge tag 'libnvdimm-for-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams:
"The bulk of this has appeared in -next and independently received a
build success notification from the kbuild robot. The 'for-4.5/block-
dax' topic branch was rebased over the weekend to drop the "block
device end-of-life" rework that Al would like to see re-implemented
with a notifier, and to address bug reports against the badblocks
integration.
There is pending feedback against "libnvdimm: Add a poison list and
export badblocks" received last week. Linda identified some localized
fixups that we will handle incrementally.
Summary:
- Media error handling: The 'badblocks' implementation that
originated in md-raid is up-levelled to a generic capability of a
block device. This initial implementation is limited to being
consulted in the pmem block-i/o path. Later, 'badblocks' will be
consulted when creating dax mappings.
- Raw block device dax: For virtualization and other cases that want
large contiguous mappings of persistent memory, add the capability
to dax-mmap a block device directly.
- Increased /dev/mem restrictions: Add an option to treat all
io-memory as IORESOURCE_EXCLUSIVE, i.e. disable /dev/mem access
while a driver is actively using an address range. This behavior
is controlled via the new CONFIG_IO_STRICT_DEVMEM option and can be
overridden by the existing "iomem=relaxed" kernel command line
option.
- Miscellaneous fixes include a 'pfn'-device huge page alignment fix,
block device shutdown crash fix, and other small libnvdimm fixes"
* tag 'libnvdimm-for-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (32 commits)
block: kill disk_{check|set|clear|alloc}_badblocks
libnvdimm, pmem: nvdimm_read_bytes() badblocks support
pmem, dax: disable dax in the presence of bad blocks
pmem: fail io-requests to known bad blocks
libnvdimm: convert to statically allocated badblocks
libnvdimm: don't fail init for full badblocks list
block, badblocks: introduce devm_init_badblocks
block: clarify badblocks lifetime
badblocks: rename badblocks_free to badblocks_exit
libnvdimm, pmem: move definition of nvdimm_namespace_add_poison to nd.h
libnvdimm: Add a poison list and export badblocks
nfit_test: Enable DSMs for all test NFITs
md: convert to use the generic badblocks code
block: Add badblock management for gendisks
badblocks: Add core badblock management code
block: fix del_gendisk() vs blkdev_ioctl crash
block: enable dax for raw block devices
block: introduce bdev_file_inode()
restrict /dev/mem to idle io memory ranges
arch: consolidate CONFIG_STRICT_DEVM in lib/Kconfig.debug
...
Diffstat (limited to 'fs/block_dev.c')
-rw-r--r-- | fs/block_dev.c | 122 |
1 files changed, 107 insertions, 15 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c index 01b8e0d4b4ff..d878e4860fb7 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -156,11 +156,16 @@ blkdev_get_block(struct inode *inode, sector_t iblock, return 0; } +static struct inode *bdev_file_inode(struct file *file) +{ + return file->f_mapping->host; +} + static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; + struct inode *inode = bdev_file_inode(file); if (IS_DAX(inode)) return dax_do_io(iocb, inode, iter, offset, blkdev_get_block, @@ -338,7 +343,7 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping, */ static loff_t block_llseek(struct file *file, loff_t offset, int whence) { - struct inode *bd_inode = file->f_mapping->host; + struct inode *bd_inode = bdev_file_inode(file); loff_t retval; mutex_lock(&bd_inode->i_mutex); @@ -349,7 +354,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int whence) int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync) { - struct inode *bd_inode = filp->f_mapping->host; + struct inode *bd_inode = bdev_file_inode(filp); struct block_device *bdev = I_BDEV(bd_inode); int error; @@ -1224,8 +1229,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) } } - if (!ret) + if (!ret) { bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); + if (!blkdev_dax_capable(bdev)) + bdev->bd_inode->i_flags &= ~S_DAX; + } /* * If the device is invalidated, rescan partition @@ -1239,6 +1247,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) else if (ret == -ENOMEDIUM) invalidate_partitions(disk, bdev); } + if (ret) goto out_clear; } else { @@ -1259,12 +1268,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) goto out_clear; } bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); - /* - * If the partition is not aligned on a page - * boundary, we can't do dax I/O to it. - */ - if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) || - (bdev->bd_part->nr_sects % (PAGE_SIZE / 512))) + if (!blkdev_dax_capable(bdev)) bdev->bd_inode->i_flags &= ~S_DAX; } } else { @@ -1599,14 +1603,14 @@ EXPORT_SYMBOL(blkdev_put); static int blkdev_close(struct inode * inode, struct file * filp) { - struct block_device *bdev = I_BDEV(filp->f_mapping->host); + struct block_device *bdev = I_BDEV(bdev_file_inode(filp)); blkdev_put(bdev, filp->f_mode); return 0; } static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) { - struct block_device *bdev = I_BDEV(file->f_mapping->host); + struct block_device *bdev = I_BDEV(bdev_file_inode(file)); fmode_t mode = file->f_mode; /* @@ -1631,7 +1635,7 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; - struct inode *bd_inode = file->f_mapping->host; + struct inode *bd_inode = bdev_file_inode(file); loff_t size = i_size_read(bd_inode); struct blk_plug plug; ssize_t ret; @@ -1663,7 +1667,7 @@ EXPORT_SYMBOL_GPL(blkdev_write_iter); ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; - struct inode *bd_inode = file->f_mapping->host; + struct inode *bd_inode = bdev_file_inode(file); loff_t size = i_size_read(bd_inode); loff_t pos = iocb->ki_pos; @@ -1702,13 +1706,101 @@ static const struct address_space_operations def_blk_aops = { .is_dirty_writeback = buffer_check_dirty_writeback, }; +#ifdef CONFIG_FS_DAX +/* + * In the raw block case we do not need to contend with truncation nor + * unwritten file extents. Without those concerns there is no need for + * additional locking beyond the mmap_sem context that these routines + * are already executing under. + * + * Note, there is no protection if the block device is dynamically + * resized (partition grow/shrink) during a fault. A stable block device + * size is already not enforced in the blkdev_direct_IO path. + * + * For DAX, it is the responsibility of the block device driver to + * ensure the whole-disk device size is stable while requests are in + * flight. + * + * Finally, unlike the filemap_page_mkwrite() case there is no + * filesystem superblock to sync against freezing. We still include a + * pfn_mkwrite callback for dax drivers to receive write fault + * notifications. + */ +static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + return __dax_fault(vma, vmf, blkdev_get_block, NULL); +} + +static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd, unsigned int flags) +{ + return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL); +} + +static void blkdev_vm_open(struct vm_area_struct *vma) +{ + struct inode *bd_inode = bdev_file_inode(vma->vm_file); + struct block_device *bdev = I_BDEV(bd_inode); + + mutex_lock(&bd_inode->i_mutex); + bdev->bd_map_count++; + mutex_unlock(&bd_inode->i_mutex); +} + +static void blkdev_vm_close(struct vm_area_struct *vma) +{ + struct inode *bd_inode = bdev_file_inode(vma->vm_file); + struct block_device *bdev = I_BDEV(bd_inode); + + mutex_lock(&bd_inode->i_mutex); + bdev->bd_map_count--; + mutex_unlock(&bd_inode->i_mutex); +} + +static const struct vm_operations_struct blkdev_dax_vm_ops = { + .open = blkdev_vm_open, + .close = blkdev_vm_close, + .fault = blkdev_dax_fault, + .pmd_fault = blkdev_dax_pmd_fault, + .pfn_mkwrite = blkdev_dax_fault, +}; + +static const struct vm_operations_struct blkdev_default_vm_ops = { + .open = blkdev_vm_open, + .close = blkdev_vm_close, + .fault = filemap_fault, + .map_pages = filemap_map_pages, +}; + +static int blkdev_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct inode *bd_inode = bdev_file_inode(file); + struct block_device *bdev = I_BDEV(bd_inode); + + file_accessed(file); + mutex_lock(&bd_inode->i_mutex); + bdev->bd_map_count++; + if (IS_DAX(bd_inode)) { + vma->vm_ops = &blkdev_dax_vm_ops; + vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; + } else { + vma->vm_ops = &blkdev_default_vm_ops; + } + mutex_unlock(&bd_inode->i_mutex); + + return 0; +} +#else +#define blkdev_mmap generic_file_mmap +#endif + const struct file_operations def_blk_fops = { .open = blkdev_open, .release = blkdev_close, .llseek = block_llseek, .read_iter = blkdev_read_iter, .write_iter = blkdev_write_iter, - .mmap = generic_file_mmap, + .mmap = blkdev_mmap, .fsync = blkdev_fsync, .unlocked_ioctl = block_ioctl, #ifdef CONFIG_COMPAT |