diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-08-30 10:24:50 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-08-30 10:24:50 -0700 |
commit | aa99f3c2b9c797d8fee28c674a2cbb5adb2ce2ef (patch) | |
tree | 98ccf3a82c39f7097111a08cfc7531a41be3ef06 /fs/xfs/xfs_inode.c | |
parent | a1ca8e7147d07cb8649c618bc9902a9a7e6444e1 (diff) | |
parent | 7882c55ef64a8179160f24d86e82e525ffcce020 (diff) |
Merge tag 'hole_punch_for_v5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
Pull fs hole punching vs cache filling race fixes from Jan Kara:
"Fix races leading to possible data corruption or stale data exposure
in multiple filesystems when hole punching races with operations such
as readahead.
This is the series I was sending for the last merge window but with
your objection fixed - now filemap_fault() has been modified to take
invalidate_lock only when we need to create new page in the page cache
and / or bring it uptodate"
* tag 'hole_punch_for_v5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
filesystems/locking: fix Malformed table warning
cifs: Fix race between hole punch and page fault
ceph: Fix race between hole punch and page fault
fuse: Convert to using invalidate_lock
f2fs: Convert to using invalidate_lock
zonefs: Convert to using invalidate_lock
xfs: Convert double locking of MMAPLOCK to use VFS helpers
xfs: Convert to use invalidate_lock
xfs: Refactor xfs_isilocked()
ext2: Convert to using invalidate_lock
ext4: Convert to use mapping->invalidate_lock
mm: Add functions to lock invalidate_lock for two mappings
mm: Protect operations adding pages to page cache with invalidate_lock
documentation: Sync file_operations members with reality
mm: Fix comments mentioning i_mutex
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r-- | fs/xfs/xfs_inode.c | 121 |
1 files changed, 63 insertions, 58 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 990b72ae3635..f00145e1a976 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -132,7 +132,7 @@ xfs_ilock_attr_map_shared( /* * In addition to i_rwsem in the VFS inode, the xfs inode contains 2 - * multi-reader locks: i_mmap_lock and the i_lock. This routine allows + * multi-reader locks: invalidate_lock and the i_lock. This routine allows * various combinations of the locks to be obtained. * * The 3 locks should always be ordered so that the IO lock is obtained first, @@ -140,23 +140,23 @@ xfs_ilock_attr_map_shared( * * Basic locking order: * - * i_rwsem -> i_mmap_lock -> page_lock -> i_ilock + * i_rwsem -> invalidate_lock -> page_lock -> i_ilock * * mmap_lock locking order: * * i_rwsem -> page lock -> mmap_lock - * mmap_lock -> i_mmap_lock -> page_lock + * mmap_lock -> invalidate_lock -> page_lock * * The difference in mmap_lock locking order mean that we cannot hold the - * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can - * fault in pages during copy in/out (for buffered IO) or require the mmap_lock - * in get_user_pages() to map the user pages into the kernel address space for - * direct IO. Similarly the i_rwsem cannot be taken inside a page fault because - * page faults already hold the mmap_lock. + * invalidate_lock over syscall based read(2)/write(2) based IO. These IO paths + * can fault in pages during copy in/out (for buffered IO) or require the + * mmap_lock in get_user_pages() to map the user pages into the kernel address + * space for direct IO. Similarly the i_rwsem cannot be taken inside a page + * fault because page faults already hold the mmap_lock. * * Hence to serialise fully against both syscall and mmap based IO, we need to - * take both the i_rwsem and the i_mmap_lock. These locks should *only* be both - * taken in places where we need to invalidate the page cache in a race + * take both the i_rwsem and the invalidate_lock. These locks should *only* be + * both taken in places where we need to invalidate the page cache in a race * free manner (e.g. truncate, hole punch and other extent manipulation * functions). */ @@ -188,10 +188,13 @@ xfs_ilock( XFS_IOLOCK_DEP(lock_flags)); } - if (lock_flags & XFS_MMAPLOCK_EXCL) - mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags)); - else if (lock_flags & XFS_MMAPLOCK_SHARED) - mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags)); + if (lock_flags & XFS_MMAPLOCK_EXCL) { + down_write_nested(&VFS_I(ip)->i_mapping->invalidate_lock, + XFS_MMAPLOCK_DEP(lock_flags)); + } else if (lock_flags & XFS_MMAPLOCK_SHARED) { + down_read_nested(&VFS_I(ip)->i_mapping->invalidate_lock, + XFS_MMAPLOCK_DEP(lock_flags)); + } if (lock_flags & XFS_ILOCK_EXCL) mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); @@ -240,10 +243,10 @@ xfs_ilock_nowait( } if (lock_flags & XFS_MMAPLOCK_EXCL) { - if (!mrtryupdate(&ip->i_mmaplock)) + if (!down_write_trylock(&VFS_I(ip)->i_mapping->invalidate_lock)) goto out_undo_iolock; } else if (lock_flags & XFS_MMAPLOCK_SHARED) { - if (!mrtryaccess(&ip->i_mmaplock)) + if (!down_read_trylock(&VFS_I(ip)->i_mapping->invalidate_lock)) goto out_undo_iolock; } @@ -258,9 +261,9 @@ xfs_ilock_nowait( out_undo_mmaplock: if (lock_flags & XFS_MMAPLOCK_EXCL) - mrunlock_excl(&ip->i_mmaplock); + up_write(&VFS_I(ip)->i_mapping->invalidate_lock); else if (lock_flags & XFS_MMAPLOCK_SHARED) - mrunlock_shared(&ip->i_mmaplock); + up_read(&VFS_I(ip)->i_mapping->invalidate_lock); out_undo_iolock: if (lock_flags & XFS_IOLOCK_EXCL) up_write(&VFS_I(ip)->i_rwsem); @@ -307,9 +310,9 @@ xfs_iunlock( up_read(&VFS_I(ip)->i_rwsem); if (lock_flags & XFS_MMAPLOCK_EXCL) - mrunlock_excl(&ip->i_mmaplock); + up_write(&VFS_I(ip)->i_mapping->invalidate_lock); else if (lock_flags & XFS_MMAPLOCK_SHARED) - mrunlock_shared(&ip->i_mmaplock); + up_read(&VFS_I(ip)->i_mapping->invalidate_lock); if (lock_flags & XFS_ILOCK_EXCL) mrunlock_excl(&ip->i_lock); @@ -335,7 +338,7 @@ xfs_ilock_demote( if (lock_flags & XFS_ILOCK_EXCL) mrdemote(&ip->i_lock); if (lock_flags & XFS_MMAPLOCK_EXCL) - mrdemote(&ip->i_mmaplock); + downgrade_write(&VFS_I(ip)->i_mapping->invalidate_lock); if (lock_flags & XFS_IOLOCK_EXCL) downgrade_write(&VFS_I(ip)->i_rwsem); @@ -343,9 +346,29 @@ xfs_ilock_demote( } #if defined(DEBUG) || defined(XFS_WARN) -int +static inline bool +__xfs_rwsem_islocked( + struct rw_semaphore *rwsem, + bool shared) +{ + if (!debug_locks) + return rwsem_is_locked(rwsem); + + if (!shared) + return lockdep_is_held_type(rwsem, 0); + + /* + * We are checking that the lock is held at least in shared + * mode but don't care that it might be held exclusively + * (i.e. shared | excl). Hence we check if the lock is held + * in any mode rather than an explicit shared mode. + */ + return lockdep_is_held_type(rwsem, -1); +} + +bool xfs_isilocked( - xfs_inode_t *ip, + struct xfs_inode *ip, uint lock_flags) { if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) { @@ -355,20 +378,17 @@ xfs_isilocked( } if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) { - if (!(lock_flags & XFS_MMAPLOCK_SHARED)) - return !!ip->i_mmaplock.mr_writer; - return rwsem_is_locked(&ip->i_mmaplock.mr_lock); + return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem, + (lock_flags & XFS_IOLOCK_SHARED)); } - if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { - if (!(lock_flags & XFS_IOLOCK_SHARED)) - return !debug_locks || - lockdep_is_held_type(&VFS_I(ip)->i_rwsem, 0); - return rwsem_is_locked(&VFS_I(ip)->i_rwsem); + if (lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) { + return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem, + (lock_flags & XFS_IOLOCK_SHARED)); } ASSERT(0); - return 0; + return false; } #endif @@ -532,12 +552,10 @@ again: } /* - * xfs_lock_two_inodes() can only be used to lock one type of lock at a time - - * the mmaplock or the ilock, but not more than one type at a time. If we lock - * more than one at a time, lockdep will report false positives saying we have - * violated locking orders. The iolock must be double-locked separately since - * we use i_rwsem for that. We now support taking one lock EXCL and the other - * SHARED. + * xfs_lock_two_inodes() can only be used to lock ilock. The iolock and + * mmaplock must be double-locked separately since we use i_rwsem and + * invalidate_lock for that. We now support taking one lock EXCL and the + * other SHARED. */ void xfs_lock_two_inodes( @@ -555,15 +573,8 @@ xfs_lock_two_inodes( ASSERT(hweight32(ip1_mode) == 1); ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); - ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || - !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); - ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || - !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); - ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || - !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); - ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || - !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); - + ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))); + ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))); ASSERT(ip0->i_ino != ip1->i_ino); if (ip0->i_ino > ip1->i_ino) { @@ -3741,11 +3752,8 @@ xfs_ilock2_io_mmap( ret = xfs_iolock_two_inodes_and_break_layout(VFS_I(ip1), VFS_I(ip2)); if (ret) return ret; - if (ip1 == ip2) - xfs_ilock(ip1, XFS_MMAPLOCK_EXCL); - else - xfs_lock_two_inodes(ip1, XFS_MMAPLOCK_EXCL, - ip2, XFS_MMAPLOCK_EXCL); + filemap_invalidate_lock_two(VFS_I(ip1)->i_mapping, + VFS_I(ip2)->i_mapping); return 0; } @@ -3755,12 +3763,9 @@ xfs_iunlock2_io_mmap( struct xfs_inode *ip1, struct xfs_inode *ip2) { - bool same_inode = (ip1 == ip2); - - xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL); - if (!same_inode) - xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL); + filemap_invalidate_unlock_two(VFS_I(ip1)->i_mapping, + VFS_I(ip2)->i_mapping); inode_unlock(VFS_I(ip2)); - if (!same_inode) + if (ip1 != ip2) inode_unlock(VFS_I(ip1)); } |