summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_addr.c6
-rw-r--r--fs/Kconfig.binfmt18
-rw-r--r--fs/afs/Makefile1
-rw-r--r--fs/afs/callback.c20
-rw-r--r--fs/afs/cmservice.c29
-rw-r--r--fs/afs/dir.c21
-rw-r--r--fs/afs/dir_silly.c5
-rw-r--r--fs/afs/file.c6
-rw-r--r--fs/afs/fsclient.c2
-rw-r--r--fs/afs/inode.c17
-rw-r--r--fs/afs/internal.h33
-rw-r--r--fs/afs/misc.c48
-rw-r--r--fs/afs/netdevices.c48
-rw-r--r--fs/afs/protocol_uae.h132
-rw-r--r--fs/afs/rxrpc.c2
-rw-r--r--fs/afs/security.c2
-rw-r--r--fs/afs/server.c39
-rw-r--r--fs/afs/server_list.c6
-rw-r--r--fs/afs/write.c3
-rw-r--r--fs/aio.c9
-rw-r--r--fs/binfmt_flat.c99
-rw-r--r--fs/btrfs/ioctl.c34
-rw-r--r--fs/btrfs/sysfs.c18
-rw-r--r--fs/buffer.c62
-rw-r--r--fs/ceph/debugfs.c24
-rw-r--r--fs/ceph/file.c23
-rw-r--r--fs/ceph/super.c4
-rw-r--r--fs/ceph/super.h2
-rw-r--r--fs/char_dev.c3
-rw-r--r--fs/cifs/cifs_spnego.c25
-rw-r--r--fs/cifs/cifsacl.c28
-rw-r--r--fs/cifs/cifsfs.c4
-rw-r--r--fs/cifs/connect.c4
-rw-r--r--fs/configfs/dir.c3
-rw-r--r--fs/crypto/Kconfig1
-rw-r--r--fs/crypto/bio.c73
-rw-r--r--fs/crypto/crypto.c299
-rw-r--r--fs/crypto/fname.c1
-rw-r--r--fs/crypto/fscrypt_private.h15
-rw-r--r--fs/crypto/hooks.c1
-rw-r--r--fs/crypto/keyinfo.c3
-rw-r--r--fs/crypto/policy.c2
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/debugfs/file.c14
-rw-r--r--fs/debugfs/inode.c55
-rw-r--r--fs/devpts/inode.c1
-rw-r--r--fs/dlm/debug_fs.c21
-rw-r--r--fs/dlm/dlm_internal.h8
-rw-r--r--fs/dlm/lockspace.c3
-rw-r--r--fs/dlm/lowcomms.c18
-rw-r--r--fs/dlm/main.c5
-rw-r--r--fs/ecryptfs/crypto.c42
-rw-r--r--fs/ecryptfs/debug.c22
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h2
-rw-r--r--fs/ecryptfs/inode.c2
-rw-r--r--fs/ecryptfs/keystore.c11
-rw-r--r--fs/efivarfs/file.c26
-rw-r--r--fs/ext2/balloc.c3
-rw-r--r--fs/ext2/ialloc.c5
-rw-r--r--fs/ext2/inode.c7
-rw-r--r--fs/ext2/ioctl.c16
-rw-r--r--fs/ext2/super.c17
-rw-r--r--fs/ext2/xattr.c164
-rw-r--r--fs/ext4/balloc.c4
-rw-r--r--fs/ext4/dir.c27
-rw-r--r--fs/ext4/ext4.h65
-rw-r--r--fs/ext4/ext4_jbd2.h12
-rw-r--r--fs/ext4/extents.c4
-rw-r--r--fs/ext4/extents_status.c1
-rw-r--r--fs/ext4/file.c4
-rw-r--r--fs/ext4/indirect.c22
-rw-r--r--fs/ext4/inline.c21
-rw-r--r--fs/ext4/inode.c130
-rw-r--r--fs/ext4/ioctl.c99
-rw-r--r--fs/ext4/mballoc.c5
-rw-r--r--fs/ext4/move_extent.c15
-rw-r--r--fs/ext4/namei.c213
-rw-r--r--fs/ext4/page-io.c44
-rw-r--r--fs/ext4/sysfs.c6
-rw-r--r--fs/f2fs/checkpoint.c107
-rw-r--r--fs/f2fs/data.c266
-rw-r--r--fs/f2fs/debug.c7
-rw-r--r--fs/f2fs/dir.c16
-rw-r--r--fs/f2fs/extent_cache.c7
-rw-r--r--fs/f2fs/f2fs.h129
-rw-r--r--fs/f2fs/file.c302
-rw-r--r--fs/f2fs/gc.c196
-rw-r--r--fs/f2fs/inline.c16
-rw-r--r--fs/f2fs/inode.c78
-rw-r--r--fs/f2fs/namei.c10
-rw-r--r--fs/f2fs/node.c38
-rw-r--r--fs/f2fs/recovery.c43
-rw-r--r--fs/f2fs/segment.c170
-rw-r--r--fs/f2fs/segment.h16
-rw-r--r--fs/f2fs/super.c610
-rw-r--r--fs/f2fs/sysfs.c28
-rw-r--r--fs/f2fs/xattr.c10
-rw-r--r--fs/fscache/object-list.c2
-rw-r--r--fs/fuse/file.c29
-rw-r--r--fs/gfs2/aops.c110
-rw-r--r--fs/gfs2/aops.h4
-rw-r--r--fs/gfs2/bmap.c16
-rw-r--r--fs/gfs2/dir.c4
-rw-r--r--fs/gfs2/file.c79
-rw-r--r--fs/gfs2/glock.c42
-rw-r--r--fs/gfs2/glock.h11
-rw-r--r--fs/gfs2/glops.c12
-rw-r--r--fs/gfs2/incore.h6
-rw-r--r--fs/gfs2/inode.c2
-rw-r--r--fs/gfs2/log.c3
-rw-r--r--fs/gfs2/lops.c22
-rw-r--r--fs/gfs2/meta_io.c6
-rw-r--r--fs/gfs2/ops_fstype.c27
-rw-r--r--fs/gfs2/quota.c2
-rw-r--r--fs/gfs2/recovery.c3
-rw-r--r--fs/gfs2/rgrp.c48
-rw-r--r--fs/gfs2/rgrp.h3
-rw-r--r--fs/gfs2/super.c43
-rw-r--r--fs/gfs2/super.h2
-rw-r--r--fs/gfs2/sys.c8
-rw-r--r--fs/gfs2/trans.c6
-rw-r--r--fs/gfs2/util.c8
-rw-r--r--fs/hfsplus/ioctl.c21
-rw-r--r--fs/inode.c106
-rw-r--r--fs/internal.h2
-rw-r--r--fs/io_uring.c341
-rw-r--r--fs/iomap.c17
-rw-r--r--fs/jbd2/commit.c25
-rw-r--r--fs/jbd2/journal.c25
-rw-r--r--fs/jbd2/transaction.c49
-rw-r--r--fs/jffs2/file.c4
-rw-r--r--fs/jffs2/fs.c2
-rw-r--r--fs/jffs2/os-linux.h2
-rw-r--r--fs/jfs/ioctl.c22
-rw-r--r--fs/lockd/clntproc.c21
-rw-r--r--fs/lockd/svc4proc.c14
-rw-r--r--fs/lockd/svclock.c118
-rw-r--r--fs/lockd/svcproc.c14
-rw-r--r--fs/lockd/svcsubs.c2
-rw-r--r--fs/lockd/xdr.c3
-rw-r--r--fs/lockd/xdr4.c3
-rw-r--r--fs/locks.c67
-rw-r--r--fs/namei.c2
-rw-r--r--fs/nfs/nfs4file.c23
-rw-r--r--fs/nfs/nfs4idmap.c30
-rw-r--r--fs/nfs/unlink.c6
-rw-r--r--fs/nfsd/blocklayout.c8
-rw-r--r--fs/nfsd/cache.h5
-rw-r--r--fs/nfsd/fault_inject.c12
-rw-r--r--fs/nfsd/netns.h44
-rw-r--r--fs/nfsd/nfs4idmap.c2
-rw-r--r--fs/nfsd/nfs4state.c453
-rw-r--r--fs/nfsd/nfs4xdr.c38
-rw-r--r--fs/nfsd/nfscache.c236
-rw-r--r--fs/nfsd/nfsctl.c238
-rw-r--r--fs/nfsd/nfsd.h11
-rw-r--r--fs/nfsd/state.h15
-rw-r--r--fs/nfsd/vfs.c2
-rw-r--r--fs/nfsd/xdr4.h5
-rw-r--r--fs/nilfs2/ioctl.c9
-rw-r--r--fs/notify/fanotify/fanotify.c5
-rw-r--r--fs/notify/fanotify/fanotify_user.c22
-rw-r--r--fs/notify/fsnotify.c41
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c8
-rw-r--r--fs/ocfs2/alloc.c10
-rw-r--r--fs/ocfs2/blockcheck.c56
-rw-r--r--fs/ocfs2/blockcheck.h7
-rw-r--r--fs/ocfs2/cluster/heartbeat.c102
-rw-r--r--fs/ocfs2/cluster/heartbeat.h2
-rw-r--r--fs/ocfs2/cluster/netdebug.c39
-rw-r--r--fs/ocfs2/cluster/nodemanager.c4
-rw-r--r--fs/ocfs2/cluster/quorum.c2
-rw-r--r--fs/ocfs2/cluster/tcp.c5
-rw-r--r--fs/ocfs2/cluster/tcp.h5
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c44
-rw-r--r--fs/ocfs2/dlm/dlmdebug.h10
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c10
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c2
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c8
-rw-r--r--fs/ocfs2/dlmglue.c96
-rw-r--r--fs/ocfs2/ioctl.c13
-rw-r--r--fs/ocfs2/localalloc.c6
-rw-r--r--fs/ocfs2/ocfs2.h4
-rw-r--r--fs/ocfs2/super.c29
-rw-r--r--fs/orangefs/file.c37
-rw-r--r--fs/orangefs/orangefs-debugfs.c54
-rw-r--r--fs/orangefs/orangefs-debugfs.h2
-rw-r--r--fs/orangefs/orangefs-mod.c6
-rw-r--r--fs/proc/base.c31
-rw-r--r--fs/proc/meminfo.c2
-rw-r--r--fs/proc/root.c2
-rw-r--r--fs/proc/task_mmu.c117
-rw-r--r--fs/proc/task_nommu.c6
-rw-r--r--fs/proc/vmcore.c6
-rw-r--r--fs/pstore/ftrace.c18
-rw-r--r--fs/pstore/inode.c13
-rw-r--r--fs/pstore/ram.c21
-rw-r--r--fs/quota/dquot.c11
-rw-r--r--fs/quota/quota.c38
-rw-r--r--fs/read_write.c124
-rw-r--r--fs/reiserfs/ioctl.c10
-rw-r--r--fs/seq_file.c11
-rw-r--r--fs/splice.c8
-rw-r--r--fs/sysfs/group.c54
-rw-r--r--fs/tracefs/inode.c3
-rw-r--r--fs/ubifs/Kconfig13
-rw-r--r--fs/ubifs/auth.c88
-rw-r--r--fs/ubifs/compress.c27
-rw-r--r--fs/ubifs/crypto.c19
-rw-r--r--fs/ubifs/debug.c169
-rw-r--r--fs/ubifs/debug.h4
-rw-r--r--fs/ubifs/ioctl.c13
-rw-r--r--fs/ubifs/log.c5
-rw-r--r--fs/ubifs/master.c53
-rw-r--r--fs/ubifs/orphan.c94
-rw-r--r--fs/ubifs/recovery.c2
-rw-r--r--fs/ubifs/sb.c52
-rw-r--r--fs/ubifs/super.c55
-rw-r--r--fs/ubifs/tnc.c16
-rw-r--r--fs/ubifs/ubifs-media.h30
-rw-r--r--fs/ubifs/ubifs.h6
-rw-r--r--fs/udf/inode.c93
-rw-r--r--fs/unicode/utf8-core.c28
-rw-r--r--fs/xfs/Makefile9
-rw-r--r--fs/xfs/kmem.c5
-rw-r--r--fs/xfs/kmem.h8
-rw-r--r--fs/xfs/libxfs/xfs_ag.c100
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c8
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c227
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c3
-rw-r--r--fs/xfs/libxfs/xfs_attr.c5
-rw-r--r--fs/xfs/libxfs/xfs_attr.h8
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c15
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c14
-rw-r--r--fs/xfs/libxfs/xfs_bit.c1
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c19
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c5
-rw-r--r--fs/xfs/libxfs/xfs_btree.c49
-rw-r--r--fs/xfs/libxfs/xfs_btree.h14
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c12
-rw-r--r--fs/xfs/libxfs/xfs_da_format.c3
-rw-r--r--fs/xfs/libxfs/xfs_defer.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c6
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c11
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c14
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c11
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c10
-rw-r--r--fs/xfs/libxfs/xfs_dir2_sf.c5
-rw-r--r--fs/xfs/libxfs/xfs_dquot_buf.c10
-rw-r--r--fs/xfs/libxfs/xfs_format.h2
-rw-r--r--fs/xfs/libxfs/xfs_fs.h124
-rw-r--r--fs/xfs/libxfs/xfs_health.h2
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c245
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h18
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c56
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.h3
-rw-r--r--fs/xfs/libxfs/xfs_iext_tree.c6
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c9
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c4
-rw-r--r--fs/xfs/libxfs/xfs_log_rlimit.c2
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c2
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c4
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c7
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c6
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c8
-rw-r--r--fs/xfs/libxfs/xfs_sb.c39
-rw-r--r--fs/xfs/libxfs/xfs_shared.h49
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c10
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c17
-rw-r--r--fs/xfs/libxfs/xfs_trans_space.h7
-rw-r--r--fs/xfs/libxfs/xfs_types.c13
-rw-r--r--fs/xfs/scrub/agheader.c11
-rw-r--r--fs/xfs/scrub/agheader_repair.c5
-rw-r--r--fs/xfs/scrub/alloc.c7
-rw-r--r--fs/xfs/scrub/attr.c122
-rw-r--r--fs/xfs/scrub/attr.h71
-rw-r--r--fs/xfs/scrub/bitmap.c5
-rw-r--r--fs/xfs/scrub/bmap.c8
-rw-r--r--fs/xfs/scrub/btree.c7
-rw-r--r--fs/xfs/scrub/common.c8
-rw-r--r--fs/xfs/scrub/dabtree.c8
-rw-r--r--fs/xfs/scrub/dir.c10
-rw-r--r--fs/xfs/scrub/fscounters.c12
-rw-r--r--fs/xfs/scrub/health.c8
-rw-r--r--fs/xfs/scrub/ialloc.c28
-rw-r--r--fs/xfs/scrub/inode.c10
-rw-r--r--fs/xfs/scrub/parent.c8
-rw-r--r--fs/xfs/scrub/quota.c13
-rw-r--r--fs/xfs/scrub/refcount.c10
-rw-r--r--fs/xfs/scrub/repair.c14
-rw-r--r--fs/xfs/scrub/rmap.c9
-rw-r--r--fs/xfs/scrub/rtbitmap.c7
-rw-r--r--fs/xfs/scrub/scrub.c20
-rw-r--r--fs/xfs/scrub/symlink.c8
-rw-r--r--fs/xfs/scrub/trace.c6
-rw-r--r--fs/xfs/xfs_acl.c4
-rw-r--r--fs/xfs/xfs_aops.c121
-rw-r--r--fs/xfs/xfs_aops.h1
-rw-r--r--fs/xfs/xfs_attr_inactive.c7
-rw-r--r--fs/xfs/xfs_attr_list.c7
-rw-r--r--fs/xfs/xfs_bio_io.c61
-rw-r--r--fs/xfs/xfs_bmap_item.c350
-rw-r--r--fs/xfs/xfs_bmap_item.h2
-rw-r--r--fs/xfs/xfs_bmap_util.c11
-rw-r--r--fs/xfs/xfs_buf.c171
-rw-r--r--fs/xfs/xfs_buf.h53
-rw-r--r--fs/xfs/xfs_buf_item.c40
-rw-r--r--fs/xfs/xfs_buf_item.h6
-rw-r--r--fs/xfs/xfs_dir2_readdir.c5
-rw-r--r--fs/xfs/xfs_discard.c4
-rw-r--r--fs/xfs/xfs_dquot.c6
-rw-r--r--fs/xfs/xfs_dquot.h1
-rw-r--r--fs/xfs/xfs_dquot_item.c118
-rw-r--r--fs/xfs/xfs_dquot_item.h4
-rw-r--r--fs/xfs/xfs_error.c3
-rw-r--r--fs/xfs/xfs_export.c4
-rw-r--r--fs/xfs/xfs_extfree_item.c410
-rw-r--r--fs/xfs/xfs_extfree_item.h6
-rw-r--r--fs/xfs/xfs_file.c38
-rw-r--r--fs/xfs/xfs_filestream.c5
-rw-r--r--fs/xfs/xfs_fsmap.c4
-rw-r--r--fs/xfs/xfs_fsops.c8
-rw-r--r--fs/xfs/xfs_globals.c4
-rw-r--r--fs/xfs/xfs_health.c6
-rw-r--r--fs/xfs/xfs_icache.c4
-rw-r--r--fs/xfs/xfs_icreate_item.c75
-rw-r--r--fs/xfs/xfs_inode.c42
-rw-r--r--fs/xfs/xfs_inode_item.c16
-rw-r--r--fs/xfs/xfs_inode_item.h2
-rw-r--r--fs/xfs/xfs_ioctl.c448
-rw-r--r--fs/xfs/xfs_ioctl.h8
-rw-r--r--fs/xfs/xfs_ioctl32.c161
-rw-r--r--fs/xfs/xfs_ioctl32.h14
-rw-r--r--fs/xfs/xfs_iomap.c5
-rw-r--r--fs/xfs/xfs_iops.c10
-rw-r--r--fs/xfs/xfs_itable.c749
-rw-r--r--fs/xfs/xfs_itable.h106
-rw-r--r--fs/xfs/xfs_iwalk.c720
-rw-r--r--fs/xfs/xfs_iwalk.h46
-rw-r--r--fs/xfs/xfs_linux.h5
-rw-r--r--fs/xfs/xfs_log.c644
-rw-r--r--fs/xfs/xfs_log.h17
-rw-r--r--fs/xfs/xfs_log_cil.c51
-rw-r--r--fs/xfs/xfs_log_priv.h36
-rw-r--r--fs/xfs/xfs_log_recover.c463
-rw-r--r--fs/xfs/xfs_message.c2
-rw-r--r--fs/xfs/xfs_mount.c102
-rw-r--r--fs/xfs/xfs_mount.h22
-rw-r--r--fs/xfs/xfs_ondisk.h5
-rw-r--r--fs/xfs/xfs_pnfs.c9
-rw-r--r--fs/xfs/xfs_pwork.c136
-rw-r--r--fs/xfs/xfs_pwork.h61
-rw-r--r--fs/xfs/xfs_qm.c68
-rw-r--r--fs/xfs/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/xfs_qm_syscalls.c5
-rw-r--r--fs/xfs/xfs_quotaops.c3
-rw-r--r--fs/xfs/xfs_refcount_item.c357
-rw-r--r--fs/xfs/xfs_refcount_item.h2
-rw-r--r--fs/xfs/xfs_reflink.c15
-rw-r--r--fs/xfs/xfs_rmap_item.c380
-rw-r--r--fs/xfs/xfs_rmap_item.h2
-rw-r--r--fs/xfs/xfs_rtalloc.c6
-rw-r--r--fs/xfs/xfs_stats.c1
-rw-r--r--fs/xfs/xfs_super.c32
-rw-r--r--fs/xfs/xfs_super.h14
-rw-r--r--fs/xfs/xfs_symlink.c9
-rw-r--r--fs/xfs/xfs_sysctl.c3
-rw-r--r--fs/xfs/xfs_sysctl.h3
-rw-r--r--fs/xfs/xfs_sysfs.c42
-rw-r--r--fs/xfs/xfs_trace.c8
-rw-r--r--fs/xfs/xfs_trace.h61
-rw-r--r--fs/xfs/xfs_trans.c43
-rw-r--r--fs/xfs/xfs_trans.h70
-rw-r--r--fs/xfs/xfs_trans_ail.c53
-rw-r--r--fs/xfs/xfs_trans_bmap.c232
-rw-r--r--fs/xfs/xfs_trans_buf.c11
-rw-r--r--fs/xfs/xfs_trans_dquot.c11
-rw-r--r--fs/xfs/xfs_trans_extfree.c286
-rw-r--r--fs/xfs/xfs_trans_inode.c3
-rw-r--r--fs/xfs/xfs_trans_priv.h4
-rw-r--r--fs/xfs/xfs_trans_refcount.c240
-rw-r--r--fs/xfs/xfs_trans_rmap.c257
-rw-r--r--fs/xfs/xfs_xattr.c5
383 files changed, 9811 insertions, 8164 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index bc57ae9e2963..cce9ace651a2 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -35,8 +35,9 @@
* @page: structure to page
*
*/
-static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
+static int v9fs_fid_readpage(void *data, struct page *page)
{
+ struct p9_fid *fid = data;
struct inode *inode = page->mapping->host;
struct bio_vec bvec = {.bv_page = page, .bv_len = PAGE_SIZE};
struct iov_iter to;
@@ -107,7 +108,8 @@ static int v9fs_vfs_readpages(struct file *filp, struct address_space *mapping,
if (ret == 0)
return ret;
- ret = read_cache_pages(mapping, pages, (void *)v9fs_vfs_readpage, filp);
+ ret = read_cache_pages(mapping, pages, v9fs_fid_readpage,
+ filp->private_data);
p9_debug(P9_DEBUG_VFS, " = %d\n", ret);
return ret;
}
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index f87ddd1b6d72..62dc4f577ba1 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -91,12 +91,28 @@ config BINFMT_SCRIPT
Most systems will not boot if you say M or N here. If unsure, say Y.
+config ARCH_HAS_BINFMT_FLAT
+ bool
+
config BINFMT_FLAT
bool "Kernel support for flat binaries"
- depends on !MMU || ARM || M68K
+ depends on ARCH_HAS_BINFMT_FLAT
help
Support uClinux FLAT format binaries.
+config BINFMT_FLAT_ARGVP_ENVP_ON_STACK
+ bool
+
+config BINFMT_FLAT_OLD_ALWAYS_RAM
+ bool
+
+config BINFMT_FLAT_OLD
+ bool "Enable support for very old legacy flat binaries"
+ depends on BINFMT_FLAT
+ help
+ Support decade old uClinux FLAT format binaries. Unless you know
+ you have some of those say N here.
+
config BINFMT_ZFLAT
bool "Enable ZFLAT support"
depends on BINFMT_FLAT
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index cbf31f6cd177..10359bea7070 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -29,7 +29,6 @@ kafs-y := \
server.o \
server_list.o \
super.o \
- netdevices.o \
vlclient.o \
vl_list.o \
vl_probe.o \
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 915010464572..6cdd7047c809 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -48,7 +48,7 @@ static struct afs_cb_interest *afs_create_interest(struct afs_server *server,
refcount_set(&new->usage, 1);
new->sb = vnode->vfs_inode.i_sb;
new->vid = vnode->volume->vid;
- new->server = afs_get_server(server);
+ new->server = afs_get_server(server, afs_server_trace_get_new_cbi);
INIT_HLIST_NODE(&new->cb_vlink);
write_lock(&server->cb_break_lock);
@@ -195,7 +195,7 @@ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi)
write_unlock(&cbi->server->cb_break_lock);
if (vi)
kfree_rcu(vi, rcu);
- afs_put_server(net, cbi->server);
+ afs_put_server(net, cbi->server, afs_server_trace_put_cbi);
}
kfree_rcu(cbi, rcu);
}
@@ -212,7 +212,7 @@ void afs_init_callback_state(struct afs_server *server)
/*
* actually break a callback
*/
-void __afs_break_callback(struct afs_vnode *vnode)
+void __afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reason)
{
_enter("");
@@ -223,13 +223,17 @@ void __afs_break_callback(struct afs_vnode *vnode)
if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB)
afs_lock_may_be_available(vnode);
+
+ trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, true);
+ } else {
+ trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, false);
}
}
-void afs_break_callback(struct afs_vnode *vnode)
+void afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reason)
{
write_seqlock(&vnode->cb_lock);
- __afs_break_callback(vnode);
+ __afs_break_callback(vnode, reason);
write_sequnlock(&vnode->cb_lock);
}
@@ -277,6 +281,8 @@ static void afs_break_one_callback(struct afs_server *server,
write_lock(&volume->cb_v_break_lock);
volume->cb_v_break++;
+ trace_afs_cb_break(fid, volume->cb_v_break,
+ afs_cb_break_for_volume_callback, false);
write_unlock(&volume->cb_v_break_lock);
} else {
data.volume = NULL;
@@ -285,8 +291,10 @@ static void afs_break_one_callback(struct afs_server *server,
afs_iget5_test, &data);
if (inode) {
vnode = AFS_FS_I(inode);
- afs_break_callback(vnode);
+ afs_break_callback(vnode, afs_cb_break_for_callback);
iput(inode);
+ } else {
+ trace_afs_cb_miss(fid, afs_cb_break_for_callback);
}
}
}
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 3451be03667f..4f1b6f466ff5 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -256,8 +256,11 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
* server holds up change visibility till it receives our reply so as
* to maintain cache coherency.
*/
- if (call->server)
+ if (call->server) {
+ trace_afs_server(call->server, atomic_read(&call->server->usage),
+ afs_server_trace_callback);
afs_break_callbacks(call->server, call->count, call->request);
+ }
afs_send_empty_reply(call);
afs_put_call(call);
@@ -580,9 +583,8 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
*/
static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
{
- struct afs_interface *ifs;
struct afs_call *call = container_of(work, struct afs_call, work);
- int loop, nifs;
+ int loop;
struct {
struct /* InterfaceAddr */ {
@@ -600,19 +602,7 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
_enter("");
- nifs = 0;
- ifs = kcalloc(32, sizeof(*ifs), GFP_KERNEL);
- if (ifs) {
- nifs = afs_get_ipv4_interfaces(call->net, ifs, 32, false);
- if (nifs < 0) {
- kfree(ifs);
- ifs = NULL;
- nifs = 0;
- }
- }
-
memset(&reply, 0, sizeof(reply));
- reply.ia.nifs = htonl(nifs);
reply.ia.uuid[0] = call->net->uuid.time_low;
reply.ia.uuid[1] = htonl(ntohs(call->net->uuid.time_mid));
@@ -622,15 +612,6 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
for (loop = 0; loop < 6; loop++)
reply.ia.uuid[loop + 5] = htonl((s8) call->net->uuid.node[loop]);
- if (ifs) {
- for (loop = 0; loop < nifs; loop++) {
- reply.ia.ifaddr[loop] = ifs[loop].address.s_addr;
- reply.ia.netmask[loop] = ifs[loop].netmask.s_addr;
- reply.ia.mtu[loop] = htonl(ifs[loop].mtu);
- }
- kfree(ifs);
- }
-
reply.cap.capcount = htonl(1);
reply.cap.caps[0] = htonl(AFS_CAP_ERROR_TRANSLATION);
afs_send_simple_reply(call, &reply, sizeof(reply));
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index da9563d62b32..e640d67274be 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -238,8 +238,7 @@ retry:
if (nr_inline > (PAGE_SIZE - sizeof(*req)) / sizeof(struct page *))
nr_inline = 0;
- req = kzalloc(sizeof(*req) + sizeof(struct page *) * nr_inline,
- GFP_KERNEL);
+ req = kzalloc(struct_size(req, array, nr_inline), GFP_KERNEL);
if (!req)
return ERR_PTR(-ENOMEM);
@@ -1363,12 +1362,12 @@ static int afs_dir_remove_link(struct afs_vnode *dvnode, struct dentry *dentry,
drop_nlink(&vnode->vfs_inode);
if (vnode->vfs_inode.i_nlink == 0) {
set_bit(AFS_VNODE_DELETED, &vnode->flags);
- __afs_break_callback(vnode);
+ __afs_break_callback(vnode, afs_cb_break_for_unlink);
}
write_sequnlock(&vnode->cb_lock);
ret = 0;
} else {
- afs_break_callback(vnode);
+ afs_break_callback(vnode, afs_cb_break_for_unlink);
if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
kdebug("AFS_VNODE_DELETED");
@@ -1390,7 +1389,8 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
{
struct afs_fs_cursor fc;
struct afs_status_cb *scb;
- struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
+ struct afs_vnode *dvnode = AFS_FS_I(dir);
+ struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
struct key *key;
bool need_rehash = false;
int ret;
@@ -1413,15 +1413,12 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
}
/* Try to make sure we have a callback promise on the victim. */
- if (d_really_is_positive(dentry)) {
- vnode = AFS_FS_I(d_inode(dentry));
- ret = afs_validate(vnode, key);
- if (ret < 0)
- goto error_key;
- }
+ ret = afs_validate(vnode, key);
+ if (ret < 0)
+ goto error_key;
spin_lock(&dentry->d_lock);
- if (vnode && d_count(dentry) > 1) {
+ if (d_count(dentry) > 1) {
spin_unlock(&dentry->d_lock);
/* Start asynchronous writeout of the inode */
write_inode_now(d_inode(dentry), 0);
diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c
index 057b8d322422..361088a5edb9 100644
--- a/fs/afs/dir_silly.c
+++ b/fs/afs/dir_silly.c
@@ -60,11 +60,6 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
afs_edit_dir_add(dvnode, &new->d_name,
&vnode->fid, afs_edit_dir_for_silly_1);
-
- /* vfs_unlink and the like do not issue this when a file is
- * sillyrenamed, so do it here.
- */
- fsnotify_nameremove(old, 0);
}
kfree(scb);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 8fd7d3b9a1b1..56b69576274d 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -310,8 +310,7 @@ int afs_page_filler(void *data, struct page *page)
/* fall through */
default:
go_on:
- req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *),
- GFP_KERNEL);
+ req = kzalloc(struct_size(req, array, 1), GFP_KERNEL);
if (!req)
goto enomem;
@@ -461,8 +460,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping,
n++;
}
- req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *) * n,
- GFP_NOFS);
+ req = kzalloc(struct_size(req, array, n), GFP_NOFS);
if (!req)
return -ENOMEM;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index a1ef0266422a..1ce73e014139 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -1911,7 +1911,7 @@ struct afs_call *afs_fs_get_capabilities(struct afs_net *net,
return ERR_PTR(-ENOMEM);
call->key = key;
- call->server = afs_get_server(server);
+ call->server = afs_get_server(server, afs_server_trace_get_caps);
call->server_index = server_index;
call->upgrade = true;
call->async = true;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 18a50d4febcf..7b1c18c32f48 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -283,7 +283,7 @@ void afs_vnode_commit_status(struct afs_fs_cursor *fc,
if (scb->status.abort_code == VNOVNODE) {
set_bit(AFS_VNODE_DELETED, &vnode->flags);
clear_nlink(&vnode->vfs_inode);
- __afs_break_callback(vnode);
+ __afs_break_callback(vnode, afs_cb_break_for_deleted);
}
} else {
if (scb->have_status)
@@ -594,8 +594,9 @@ bool afs_check_validity(struct afs_vnode *vnode)
struct afs_cb_interest *cbi;
struct afs_server *server;
struct afs_volume *volume = vnode->volume;
+ enum afs_cb_break_reason need_clear = afs_cb_break_no_break;
time64_t now = ktime_get_real_seconds();
- bool valid, need_clear = false;
+ bool valid;
unsigned int cb_break, cb_s_break, cb_v_break;
int seq = 0;
@@ -613,13 +614,13 @@ bool afs_check_validity(struct afs_vnode *vnode)
vnode->cb_v_break != cb_v_break) {
vnode->cb_s_break = cb_s_break;
vnode->cb_v_break = cb_v_break;
- need_clear = true;
+ need_clear = afs_cb_break_for_vsbreak;
valid = false;
} else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
- need_clear = true;
+ need_clear = afs_cb_break_for_zap;
valid = false;
} else if (vnode->cb_expires_at - 10 <= now) {
- need_clear = true;
+ need_clear = afs_cb_break_for_lapsed;
valid = false;
} else {
valid = true;
@@ -635,10 +636,12 @@ bool afs_check_validity(struct afs_vnode *vnode)
done_seqretry(&vnode->cb_lock, seq);
- if (need_clear) {
+ if (need_clear != afs_cb_break_no_break) {
write_seqlock(&vnode->cb_lock);
if (cb_break == vnode->cb_break)
- __afs_break_callback(vnode);
+ __afs_break_callback(vnode, need_clear);
+ else
+ trace_afs_cb_miss(&vnode->fid, need_clear);
write_sequnlock(&vnode->cb_lock);
valid = false;
}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 7ee63526c6a2..f66a3be12fd6 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -514,6 +514,7 @@ struct afs_server {
atomic_t usage;
u32 addr_version; /* Address list version */
u32 cm_epoch; /* Server RxRPC epoch */
+ unsigned int debug_id; /* Debugging ID for traces */
/* file service access */
rwlock_t fs_lock; /* access lock */
@@ -719,15 +720,6 @@ struct afs_permits {
};
/*
- * record of one of a system's set of network interfaces
- */
-struct afs_interface {
- struct in_addr address; /* IPv4 address bound to interface */
- struct in_addr netmask; /* netmask applied to address */
- unsigned mtu; /* MTU of interface */
-};
-
-/*
* Error prioritisation and accumulation.
*/
struct afs_error {
@@ -844,9 +836,9 @@ extern struct fscache_cookie_def afs_vnode_cache_index_def;
* callback.c
*/
extern void afs_init_callback_state(struct afs_server *);
-extern void __afs_break_callback(struct afs_vnode *);
-extern void afs_break_callback(struct afs_vnode *);
-extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break*);
+extern void __afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason);
+extern void afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason);
+extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break *);
extern int afs_register_server_cb_interest(struct afs_vnode *,
struct afs_server_list *, unsigned int);
@@ -1090,12 +1082,6 @@ extern struct vfsmount *afs_d_automount(struct path *);
extern void afs_mntpt_kill_timer(void);
/*
- * netdevices.c
- */
-extern int afs_get_ipv4_interfaces(struct afs_net *, struct afs_interface *,
- size_t, bool);
-
-/*
* proc.c
*/
#ifdef CONFIG_PROC_FS
@@ -1240,17 +1226,12 @@ extern void __exit afs_clean_up_permit_cache(void);
*/
extern spinlock_t afs_server_peer_lock;
-static inline struct afs_server *afs_get_server(struct afs_server *server)
-{
- atomic_inc(&server->usage);
- return server;
-}
-
extern struct afs_server *afs_find_server(struct afs_net *,
const struct sockaddr_rxrpc *);
extern struct afs_server *afs_find_server_by_uuid(struct afs_net *, const uuid_t *);
extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, const uuid_t *);
-extern void afs_put_server(struct afs_net *, struct afs_server *);
+extern struct afs_server *afs_get_server(struct afs_server *, enum afs_server_trace);
+extern void afs_put_server(struct afs_net *, struct afs_server *, enum afs_server_trace);
extern void afs_manage_servers(struct work_struct *);
extern void afs_servers_timer(struct timer_list *);
extern void __net_exit afs_purge_servers(struct afs_net *);
@@ -1434,7 +1415,7 @@ static inline void afs_check_for_remote_deletion(struct afs_fs_cursor *fc,
{
if (fc->ac.error == -ENOENT) {
set_bit(AFS_VNODE_DELETED, &vnode->flags);
- afs_break_callback(vnode);
+ afs_break_callback(vnode, afs_cb_break_for_deleted);
}
}
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 5497ab38f585..52b19e9c1535 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -10,6 +10,7 @@
#include <linux/errno.h>
#include "internal.h"
#include "afs_fs.h"
+#include "protocol_uae.h"
/*
* convert an AFS abort code to a Linux error number
@@ -65,34 +66,25 @@ int afs_abort_to_error(u32 abort_code)
case AFSVL_PERM: return -EACCES;
case AFSVL_NOMEM: return -EREMOTEIO;
- /* Unified AFS error table; ET "uae" == 0x2f6df00 */
- case 0x2f6df00: return -EPERM;
- case 0x2f6df01: return -ENOENT;
- case 0x2f6df04: return -EIO;
- case 0x2f6df0a: return -EAGAIN;
- case 0x2f6df0b: return -ENOMEM;
- case 0x2f6df0c: return -EACCES;
- case 0x2f6df0f: return -EBUSY;
- case 0x2f6df10: return -EEXIST;
- case 0x2f6df11: return -EXDEV;
- case 0x2f6df12: return -ENODEV;
- case 0x2f6df13: return -ENOTDIR;
- case 0x2f6df14: return -EISDIR;
- case 0x2f6df15: return -EINVAL;
- case 0x2f6df1a: return -EFBIG;
- case 0x2f6df1b: return -ENOSPC;
- case 0x2f6df1d: return -EROFS;
- case 0x2f6df1e: return -EMLINK;
- case 0x2f6df20: return -EDOM;
- case 0x2f6df21: return -ERANGE;
- case 0x2f6df22: return -EDEADLK;
- case 0x2f6df23: return -ENAMETOOLONG;
- case 0x2f6df24: return -ENOLCK;
- case 0x2f6df26: return -ENOTEMPTY;
- case 0x2f6df28: return -EWOULDBLOCK;
- case 0x2f6df69: return -ENOTCONN;
- case 0x2f6df6c: return -ETIMEDOUT;
- case 0x2f6df78: return -EDQUOT;
+ /* Unified AFS error table */
+ case UAEPERM: return -EPERM;
+ case UAENOENT: return -ENOENT;
+ case UAEACCES: return -EACCES;
+ case UAEBUSY: return -EBUSY;
+ case UAEEXIST: return -EEXIST;
+ case UAENOTDIR: return -ENOTDIR;
+ case UAEISDIR: return -EISDIR;
+ case UAEFBIG: return -EFBIG;
+ case UAENOSPC: return -ENOSPC;
+ case UAEROFS: return -EROFS;
+ case UAEMLINK: return -EMLINK;
+ case UAEDEADLK: return -EDEADLK;
+ case UAENAMETOOLONG: return -ENAMETOOLONG;
+ case UAENOLCK: return -ENOLCK;
+ case UAENOTEMPTY: return -ENOTEMPTY;
+ case UAELOOP: return -ELOOP;
+ case UAENOMEDIUM: return -ENOMEDIUM;
+ case UAEDQUOT: return -EDQUOT;
/* RXKAD abort codes; from include/rxrpc/packet.h. ET "RXK" == 0x1260B00 */
case RXKADINCONSISTENCY: return -EPROTO;
diff --git a/fs/afs/netdevices.c b/fs/afs/netdevices.c
deleted file mode 100644
index 2a009d1939d7..000000000000
--- a/fs/afs/netdevices.c
+++ /dev/null
@@ -1,48 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* AFS network device helpers
- *
- * Copyright (c) 2007 Patrick McHardy <kaber@trash.net>
- */
-
-#include <linux/string.h>
-#include <linux/rtnetlink.h>
-#include <linux/inetdevice.h>
-#include <linux/netdevice.h>
-#include <linux/if_arp.h>
-#include <net/net_namespace.h>
-#include "internal.h"
-
-/*
- * get a list of this system's interface IPv4 addresses, netmasks and MTUs
- * - maxbufs must be at least 1
- * - returns the number of interface records in the buffer
- */
-int afs_get_ipv4_interfaces(struct afs_net *net, struct afs_interface *bufs,
- size_t maxbufs, bool wantloopback)
-{
- struct net_device *dev;
- struct in_device *idev;
- int n = 0;
-
- ASSERT(maxbufs > 0);
-
- rtnl_lock();
- for_each_netdev(net->net, dev) {
- if (dev->type == ARPHRD_LOOPBACK && !wantloopback)
- continue;
- idev = __in_dev_get_rtnl(dev);
- if (!idev)
- continue;
- for_primary_ifa(idev) {
- bufs[n].address.s_addr = ifa->ifa_address;
- bufs[n].netmask.s_addr = ifa->ifa_mask;
- bufs[n].mtu = dev->mtu;
- n++;
- if (n >= maxbufs)
- goto out;
- } endfor_ifa(idev);
- }
-out:
- rtnl_unlock();
- return n;
-}
diff --git a/fs/afs/protocol_uae.h b/fs/afs/protocol_uae.h
new file mode 100644
index 000000000000..1b3d1060bd34
--- /dev/null
+++ b/fs/afs/protocol_uae.h
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Universal AFS Error codes (UAE).
+ *
+ * Copyright (C) 2003, Daria Phoebe Brashear
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ */
+
+enum {
+ UAEPERM = 0x2f6df00, /* Operation not permitted */
+ UAENOENT = 0x2f6df01, /* No such file or directory */
+ UAESRCH = 0x2f6df02, /* No such process */
+ UAEINTR = 0x2f6df03, /* Interrupted system call */
+ UAEIO = 0x2f6df04, /* I/O error */
+ UAENXIO = 0x2f6df05, /* No such device or address */
+ UAE2BIG = 0x2f6df06, /* Arg list too long */
+ UAENOEXEC = 0x2f6df07, /* Exec format error */
+ UAEBADF = 0x2f6df08, /* Bad file number */
+ UAECHILD = 0x2f6df09, /* No child processes */
+ UAEAGAIN = 0x2f6df0a, /* Try again */
+ UAENOMEM = 0x2f6df0b, /* Out of memory */
+ UAEACCES = 0x2f6df0c, /* Permission denied */
+ UAEFAULT = 0x2f6df0d, /* Bad address */
+ UAENOTBLK = 0x2f6df0e, /* Block device required */
+ UAEBUSY = 0x2f6df0f, /* Device or resource busy */
+ UAEEXIST = 0x2f6df10, /* File exists */
+ UAEXDEV = 0x2f6df11, /* Cross-device link */
+ UAENODEV = 0x2f6df12, /* No such device */
+ UAENOTDIR = 0x2f6df13, /* Not a directory */
+ UAEISDIR = 0x2f6df14, /* Is a directory */
+ UAEINVAL = 0x2f6df15, /* Invalid argument */
+ UAENFILE = 0x2f6df16, /* File table overflow */
+ UAEMFILE = 0x2f6df17, /* Too many open files */
+ UAENOTTY = 0x2f6df18, /* Not a typewriter */
+ UAETXTBSY = 0x2f6df19, /* Text file busy */
+ UAEFBIG = 0x2f6df1a, /* File too large */
+ UAENOSPC = 0x2f6df1b, /* No space left on device */
+ UAESPIPE = 0x2f6df1c, /* Illegal seek */
+ UAEROFS = 0x2f6df1d, /* Read-only file system */
+ UAEMLINK = 0x2f6df1e, /* Too many links */
+ UAEPIPE = 0x2f6df1f, /* Broken pipe */
+ UAEDOM = 0x2f6df20, /* Math argument out of domain of func */
+ UAERANGE = 0x2f6df21, /* Math result not representable */
+ UAEDEADLK = 0x2f6df22, /* Resource deadlock would occur */
+ UAENAMETOOLONG = 0x2f6df23, /* File name too long */
+ UAENOLCK = 0x2f6df24, /* No record locks available */
+ UAENOSYS = 0x2f6df25, /* Function not implemented */
+ UAENOTEMPTY = 0x2f6df26, /* Directory not empty */
+ UAELOOP = 0x2f6df27, /* Too many symbolic links encountered */
+ UAEWOULDBLOCK = 0x2f6df28, /* Operation would block */
+ UAENOMSG = 0x2f6df29, /* No message of desired type */
+ UAEIDRM = 0x2f6df2a, /* Identifier removed */
+ UAECHRNG = 0x2f6df2b, /* Channel number out of range */
+ UAEL2NSYNC = 0x2f6df2c, /* Level 2 not synchronized */
+ UAEL3HLT = 0x2f6df2d, /* Level 3 halted */
+ UAEL3RST = 0x2f6df2e, /* Level 3 reset */
+ UAELNRNG = 0x2f6df2f, /* Link number out of range */
+ UAEUNATCH = 0x2f6df30, /* Protocol driver not attached */
+ UAENOCSI = 0x2f6df31, /* No CSI structure available */
+ UAEL2HLT = 0x2f6df32, /* Level 2 halted */
+ UAEBADE = 0x2f6df33, /* Invalid exchange */
+ UAEBADR = 0x2f6df34, /* Invalid request descriptor */
+ UAEXFULL = 0x2f6df35, /* Exchange full */
+ UAENOANO = 0x2f6df36, /* No anode */
+ UAEBADRQC = 0x2f6df37, /* Invalid request code */
+ UAEBADSLT = 0x2f6df38, /* Invalid slot */
+ UAEBFONT = 0x2f6df39, /* Bad font file format */
+ UAENOSTR = 0x2f6df3a, /* Device not a stream */
+ UAENODATA = 0x2f6df3b, /* No data available */
+ UAETIME = 0x2f6df3c, /* Timer expired */
+ UAENOSR = 0x2f6df3d, /* Out of streams resources */
+ UAENONET = 0x2f6df3e, /* Machine is not on the network */
+ UAENOPKG = 0x2f6df3f, /* Package not installed */
+ UAEREMOTE = 0x2f6df40, /* Object is remote */
+ UAENOLINK = 0x2f6df41, /* Link has been severed */
+ UAEADV = 0x2f6df42, /* Advertise error */
+ UAESRMNT = 0x2f6df43, /* Srmount error */
+ UAECOMM = 0x2f6df44, /* Communication error on send */
+ UAEPROTO = 0x2f6df45, /* Protocol error */
+ UAEMULTIHOP = 0x2f6df46, /* Multihop attempted */
+ UAEDOTDOT = 0x2f6df47, /* RFS specific error */
+ UAEBADMSG = 0x2f6df48, /* Not a data message */
+ UAEOVERFLOW = 0x2f6df49, /* Value too large for defined data type */
+ UAENOTUNIQ = 0x2f6df4a, /* Name not unique on network */
+ UAEBADFD = 0x2f6df4b, /* File descriptor in bad state */
+ UAEREMCHG = 0x2f6df4c, /* Remote address changed */
+ UAELIBACC = 0x2f6df4d, /* Can not access a needed shared library */
+ UAELIBBAD = 0x2f6df4e, /* Accessing a corrupted shared library */
+ UAELIBSCN = 0x2f6df4f, /* .lib section in a.out corrupted */
+ UAELIBMAX = 0x2f6df50, /* Attempting to link in too many shared libraries */
+ UAELIBEXEC = 0x2f6df51, /* Cannot exec a shared library directly */
+ UAEILSEQ = 0x2f6df52, /* Illegal byte sequence */
+ UAERESTART = 0x2f6df53, /* Interrupted system call should be restarted */
+ UAESTRPIPE = 0x2f6df54, /* Streams pipe error */
+ UAEUSERS = 0x2f6df55, /* Too many users */
+ UAENOTSOCK = 0x2f6df56, /* Socket operation on non-socket */
+ UAEDESTADDRREQ = 0x2f6df57, /* Destination address required */
+ UAEMSGSIZE = 0x2f6df58, /* Message too long */
+ UAEPROTOTYPE = 0x2f6df59, /* Protocol wrong type for socket */
+ UAENOPROTOOPT = 0x2f6df5a, /* Protocol not available */
+ UAEPROTONOSUPPORT = 0x2f6df5b, /* Protocol not supported */
+ UAESOCKTNOSUPPORT = 0x2f6df5c, /* Socket type not supported */
+ UAEOPNOTSUPP = 0x2f6df5d, /* Operation not supported on transport endpoint */
+ UAEPFNOSUPPORT = 0x2f6df5e, /* Protocol family not supported */
+ UAEAFNOSUPPORT = 0x2f6df5f, /* Address family not supported by protocol */
+ UAEADDRINUSE = 0x2f6df60, /* Address already in use */
+ UAEADDRNOTAVAIL = 0x2f6df61, /* Cannot assign requested address */
+ UAENETDOWN = 0x2f6df62, /* Network is down */
+ UAENETUNREACH = 0x2f6df63, /* Network is unreachable */
+ UAENETRESET = 0x2f6df64, /* Network dropped connection because of reset */
+ UAECONNABORTED = 0x2f6df65, /* Software caused connection abort */
+ UAECONNRESET = 0x2f6df66, /* Connection reset by peer */
+ UAENOBUFS = 0x2f6df67, /* No buffer space available */
+ UAEISCONN = 0x2f6df68, /* Transport endpoint is already connected */
+ UAENOTCONN = 0x2f6df69, /* Transport endpoint is not connected */
+ UAESHUTDOWN = 0x2f6df6a, /* Cannot send after transport endpoint shutdown */
+ UAETOOMANYREFS = 0x2f6df6b, /* Too many references: cannot splice */
+ UAETIMEDOUT = 0x2f6df6c, /* Connection timed out */
+ UAECONNREFUSED = 0x2f6df6d, /* Connection refused */
+ UAEHOSTDOWN = 0x2f6df6e, /* Host is down */
+ UAEHOSTUNREACH = 0x2f6df6f, /* No route to host */
+ UAEALREADY = 0x2f6df70, /* Operation already in progress */
+ UAEINPROGRESS = 0x2f6df71, /* Operation now in progress */
+ UAESTALE = 0x2f6df72, /* Stale NFS file handle */
+ UAEUCLEAN = 0x2f6df73, /* Structure needs cleaning */
+ UAENOTNAM = 0x2f6df74, /* Not a XENIX named type file */
+ UAENAVAIL = 0x2f6df75, /* No XENIX semaphores available */
+ UAEISNAM = 0x2f6df76, /* Is a named type file */
+ UAEREMOTEIO = 0x2f6df77, /* Remote I/O error */
+ UAEDQUOT = 0x2f6df78, /* Quota exceeded */
+ UAENOMEDIUM = 0x2f6df79, /* No medium found */
+ UAEMEDIUMTYPE = 0x2f6df7a, /* Wrong medium type */
+};
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index d1dde2834b6d..0e5269374ac1 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -184,7 +184,7 @@ void afs_put_call(struct afs_call *call)
if (call->type->destructor)
call->type->destructor(call);
- afs_put_server(call->net, call->server);
+ afs_put_server(call->net, call->server, afs_server_trace_put_call);
afs_put_cb_interest(call->net, call->cbi);
afs_put_addrlist(call->alist);
kfree(call->request);
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 8866703b2e6c..71e71c07568f 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -28,7 +28,7 @@ struct key *afs_request_key(struct afs_cell *cell)
_debug("key %s", cell->anonymous_key->description);
key = request_key(&key_type_rxrpc, cell->anonymous_key->description,
- NULL, NULL);
+ NULL);
if (IS_ERR(key)) {
if (PTR_ERR(key) != -ENOKEY) {
_leave(" = %ld", PTR_ERR(key));
diff --git a/fs/afs/server.c b/fs/afs/server.c
index e900cd74361b..64d440aaabc0 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -13,6 +13,7 @@
static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */
static unsigned afs_server_update_delay = 30; /* Time till VLDB recheck in secs */
+static atomic_t afs_server_debug_id;
static void afs_inc_servers_outstanding(struct afs_net *net)
{
@@ -47,7 +48,7 @@ struct afs_server *afs_find_server(struct afs_net *net,
do {
if (server)
- afs_put_server(net, server);
+ afs_put_server(net, server, afs_server_trace_put_find_rsq);
server = NULL;
read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
@@ -112,7 +113,7 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu
* changes.
*/
if (server)
- afs_put_server(net, server);
+ afs_put_server(net, server, afs_server_trace_put_uuid_rsq);
server = NULL;
read_seqbegin_or_lock(&net->fs_lock, &seq);
@@ -127,7 +128,7 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu
} else if (diff > 0) {
p = p->rb_right;
} else {
- afs_get_server(server);
+ afs_get_server(server, afs_server_trace_get_by_uuid);
break;
}
@@ -198,7 +199,7 @@ static struct afs_server *afs_install_server(struct afs_net *net,
ret = 0;
exists:
- afs_get_server(server);
+ afs_get_server(server, afs_server_trace_get_install);
write_sequnlock(&net->fs_lock);
return server;
}
@@ -219,6 +220,7 @@ static struct afs_server *afs_alloc_server(struct afs_net *net,
goto enomem;
atomic_set(&server->usage, 1);
+ server->debug_id = atomic_inc_return(&afs_server_debug_id);
RCU_INIT_POINTER(server->addresses, alist);
server->addr_version = alist->version;
server->uuid = *uuid;
@@ -230,6 +232,7 @@ static struct afs_server *afs_alloc_server(struct afs_net *net,
spin_lock_init(&server->probe_lock);
afs_inc_servers_outstanding(net);
+ trace_afs_server(server, 1, afs_server_trace_alloc);
_leave(" = %p", server);
return server;
@@ -325,9 +328,22 @@ void afs_servers_timer(struct timer_list *timer)
}
/*
+ * Get a reference on a server object.
+ */
+struct afs_server *afs_get_server(struct afs_server *server,
+ enum afs_server_trace reason)
+{
+ unsigned int u = atomic_inc_return(&server->usage);
+
+ trace_afs_server(server, u, reason);
+ return server;
+}
+
+/*
* Release a reference on a server record.
*/
-void afs_put_server(struct afs_net *net, struct afs_server *server)
+void afs_put_server(struct afs_net *net, struct afs_server *server,
+ enum afs_server_trace reason)
{
unsigned int usage;
@@ -338,7 +354,7 @@ void afs_put_server(struct afs_net *net, struct afs_server *server)
usage = atomic_dec_return(&server->usage);
- _enter("{%u}", usage);
+ trace_afs_server(server, usage, reason);
if (likely(usage > 0))
return;
@@ -350,6 +366,8 @@ static void afs_server_rcu(struct rcu_head *rcu)
{
struct afs_server *server = container_of(rcu, struct afs_server, rcu);
+ trace_afs_server(server, atomic_read(&server->usage),
+ afs_server_trace_free);
afs_put_addrlist(rcu_access_pointer(server->addresses));
kfree(server);
}
@@ -365,7 +383,9 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
.index = alist->preferred,
.error = 0,
};
- _enter("%p", server);
+
+ trace_afs_server(server, atomic_read(&server->usage),
+ afs_server_trace_give_up_cb);
if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
@@ -373,6 +393,8 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
wait_var_event(&server->probe_outstanding,
atomic_read(&server->probe_outstanding) == 0);
+ trace_afs_server(server, atomic_read(&server->usage),
+ afs_server_trace_destroy);
call_rcu(&server->rcu, afs_server_rcu);
afs_dec_servers_outstanding(net);
}
@@ -392,6 +414,7 @@ static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
write_seqlock(&net->fs_lock);
usage = 1;
deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
+ trace_afs_server(server, usage, afs_server_trace_gc);
if (deleted) {
rb_erase(&server->uuid_rb, &net->fs_servers);
hlist_del_rcu(&server->proc_link);
@@ -514,6 +537,8 @@ static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct a
_enter("");
+ trace_afs_server(server, atomic_read(&server->usage), afs_server_trace_update);
+
alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
&server->uuid);
if (IS_ERR(alist)) {
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
index b4988bc8e6f2..888d91d195d9 100644
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -16,7 +16,8 @@ void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist)
if (slist && refcount_dec_and_test(&slist->usage)) {
for (i = 0; i < slist->nr_servers; i++) {
afs_put_cb_interest(net, slist->servers[i].cb_interest);
- afs_put_server(net, slist->servers[i].server);
+ afs_put_server(net, slist->servers[i].server,
+ afs_server_trace_put_slist);
}
kfree(slist);
}
@@ -67,7 +68,8 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
break;
if (j < slist->nr_servers) {
if (slist->servers[j].server == server) {
- afs_put_server(cell->net, server);
+ afs_put_server(cell->net, server,
+ afs_server_trace_put_slist_isort);
continue;
}
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 98eb7adbce91..cb76566763db 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -44,8 +44,7 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
return 0;
}
- req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *),
- GFP_KERNEL);
+ req = kzalloc(struct_size(req, array, 1), GFP_KERNEL);
if (!req)
return -ENOMEM;
diff --git a/fs/aio.c b/fs/aio.c
index c1e581dd32f5..2d405733a8c6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1479,8 +1479,9 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
return 0;
}
-static int aio_setup_rw(int rw, const struct iocb *iocb, struct iovec **iovec,
- bool vectored, bool compat, struct iov_iter *iter)
+static ssize_t aio_setup_rw(int rw, const struct iocb *iocb,
+ struct iovec **iovec, bool vectored, bool compat,
+ struct iov_iter *iter)
{
void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf;
size_t len = iocb->aio_nbytes;
@@ -1537,7 +1538,7 @@ static int aio_read(struct kiocb *req, const struct iocb *iocb,
return -EINVAL;
ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter);
- if (ret)
+ if (ret < 0)
return ret;
ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
if (!ret)
@@ -1565,7 +1566,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
return -EINVAL;
ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter);
- if (ret)
+ if (ret < 0)
return ret;
ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
if (!ret) {
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index e4b59e76afb0..8c6b50f34466 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -42,6 +42,11 @@
#include <asm/unaligned.h>
#include <asm/cacheflush.h>
#include <asm/page.h>
+#include <asm/flat.h>
+
+#ifndef flat_get_relocate_addr
+#define flat_get_relocate_addr(rel) (rel)
+#endif
/****************************************************************************/
@@ -63,6 +68,12 @@
#define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */
#define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */
+#ifdef CONFIG_BINFMT_SHARED_FLAT
+#define MAX_SHARED_LIBS (4)
+#else
+#define MAX_SHARED_LIBS (1)
+#endif
+
struct lib_info {
struct {
unsigned long start_code; /* Start of text segment */
@@ -120,14 +131,15 @@ static int create_flat_tables(struct linux_binprm *bprm, unsigned long arg_start
sp -= bprm->envc + 1;
sp -= bprm->argc + 1;
- sp -= flat_argvp_envp_on_stack() ? 2 : 0;
+ if (IS_ENABLED(CONFIG_BINFMT_FLAT_ARGVP_ENVP_ON_STACK))
+ sp -= 2; /* argvp + envp */
sp -= 1; /* &argc */
current->mm->start_stack = (unsigned long)sp & -FLAT_STACK_ALIGN;
sp = (unsigned long __user *)current->mm->start_stack;
__put_user(bprm->argc, sp++);
- if (flat_argvp_envp_on_stack()) {
+ if (IS_ENABLED(CONFIG_BINFMT_FLAT_ARGVP_ENVP_ON_STACK)) {
unsigned long argv, envp;
argv = (unsigned long)(sp + 2);
envp = (unsigned long)(sp + 2 + bprm->argc + 1);
@@ -345,7 +357,7 @@ calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp)
start_code = p->lib_list[id].start_code;
text_len = p->lib_list[id].text_len;
- if (!flat_reloc_valid(r, start_brk - start_data + text_len)) {
+ if (r > start_brk - start_data + text_len) {
pr_err("reloc outside program 0x%lx (0 - 0x%lx/0x%lx)",
r, start_brk-start_data+text_len, text_len);
goto failed;
@@ -368,6 +380,7 @@ failed:
/****************************************************************************/
+#ifdef CONFIG_BINFMT_FLAT_OLD
static void old_reloc(unsigned long rl)
{
static const char *segment[] = { "TEXT", "DATA", "BSS", "*UNKNOWN*" };
@@ -405,6 +418,7 @@ static void old_reloc(unsigned long rl)
pr_debug("Relocation became %lx\n", val);
}
+#endif /* CONFIG_BINFMT_FLAT_OLD */
/****************************************************************************/
@@ -415,7 +429,8 @@ static int load_flat_file(struct linux_binprm *bprm,
unsigned long textpos, datapos, realdatastart;
u32 text_len, data_len, bss_len, stack_len, full_data, flags;
unsigned long len, memp, memp_size, extra, rlim;
- u32 __user *reloc, *rp;
+ __be32 __user *reloc;
+ u32 __user *rp;
struct inode *inode;
int i, rev, relocs;
loff_t fpos;
@@ -454,6 +469,7 @@ static int load_flat_file(struct linux_binprm *bprm,
if (flags & FLAT_FLAG_KTRACE)
pr_info("Loading file: %s\n", bprm->filename);
+#ifdef CONFIG_BINFMT_FLAT_OLD
if (rev != FLAT_VERSION && rev != OLD_FLAT_VERSION) {
pr_err("bad flat file version 0x%x (supported 0x%lx and 0x%lx)\n",
rev, FLAT_VERSION, OLD_FLAT_VERSION);
@@ -470,6 +486,23 @@ static int load_flat_file(struct linux_binprm *bprm,
}
/*
+ * fix up the flags for the older format, there were all kinds
+ * of endian hacks, this only works for the simple cases
+ */
+ if (rev == OLD_FLAT_VERSION &&
+ (flags || IS_ENABLED(CONFIG_BINFMT_FLAT_OLD_ALWAYS_RAM)))
+ flags = FLAT_FLAG_RAM;
+
+#else /* CONFIG_BINFMT_FLAT_OLD */
+ if (rev != FLAT_VERSION) {
+ pr_err("bad flat file version 0x%x (supported 0x%lx)\n",
+ rev, FLAT_VERSION);
+ ret = -ENOEXEC;
+ goto err;
+ }
+#endif /* !CONFIG_BINFMT_FLAT_OLD */
+
+ /*
* Make sure the header params are sane.
* 28 bits (256 MB) is way more than reasonable in this case.
* If some top bits are set we have probable binary corruption.
@@ -480,13 +513,6 @@ static int load_flat_file(struct linux_binprm *bprm,
goto err;
}
- /*
- * fix up the flags for the older format, there were all kinds
- * of endian hacks, this only works for the simple cases
- */
- if (rev == OLD_FLAT_VERSION && flat_old_ram_flag(flags))
- flags = FLAT_FLAG_RAM;
-
#ifndef CONFIG_BINFMT_ZFLAT
if (flags & (FLAT_FLAG_GZIP|FLAT_FLAG_GZDATA)) {
pr_err("Support for ZFLAT executables is not enabled.\n");
@@ -547,7 +573,7 @@ static int load_flat_file(struct linux_binprm *bprm,
goto err;
}
- len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
+ len = data_len + extra;
len = PAGE_ALIGN(len);
realdatastart = vm_mmap(NULL, 0, len,
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
@@ -561,9 +587,7 @@ static int load_flat_file(struct linux_binprm *bprm,
vm_munmap(textpos, text_len);
goto err;
}
- datapos = ALIGN(realdatastart +
- MAX_SHARED_LIBS * sizeof(unsigned long),
- FLAT_DATA_ALIGN);
+ datapos = ALIGN(realdatastart, FLAT_DATA_ALIGN);
pr_debug("Allocated data+bss+stack (%u bytes): %lx\n",
data_len + bss_len + stack_len, datapos);
@@ -587,13 +611,13 @@ static int load_flat_file(struct linux_binprm *bprm,
goto err;
}
- reloc = (u32 __user *)
+ reloc = (__be32 __user *)
(datapos + (ntohl(hdr->reloc_start) - text_len));
memp = realdatastart;
memp_size = len;
} else {
- len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(u32);
+ len = text_len + data_len + extra;
len = PAGE_ALIGN(len);
textpos = vm_mmap(NULL, 0, len,
PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
@@ -608,11 +632,9 @@ static int load_flat_file(struct linux_binprm *bprm,
}
realdatastart = textpos + ntohl(hdr->data_start);
- datapos = ALIGN(realdatastart +
- MAX_SHARED_LIBS * sizeof(u32),
- FLAT_DATA_ALIGN);
+ datapos = ALIGN(realdatastart, FLAT_DATA_ALIGN);
- reloc = (u32 __user *)
+ reloc = (__be32 __user *)
(datapos + (ntohl(hdr->reloc_start) - text_len));
memp = textpos;
memp_size = len;
@@ -627,8 +649,9 @@ static int load_flat_file(struct linux_binprm *bprm,
(text_len + full_data
- sizeof(struct flat_hdr)),
0);
- memmove((void *) datapos, (void *) realdatastart,
- full_data);
+ if (datapos != realdatastart)
+ memmove((void *)datapos, (void *)realdatastart,
+ full_data);
#else
/*
* This is used on MMU systems mainly for testing.
@@ -684,8 +707,7 @@ static int load_flat_file(struct linux_binprm *bprm,
if (IS_ERR_VALUE(result)) {
ret = result;
pr_err("Unable to read code+data+bss, errno %d\n", ret);
- vm_munmap(textpos, text_len + data_len + extra +
- MAX_SHARED_LIBS * sizeof(u32));
+ vm_munmap(textpos, text_len + data_len + extra);
goto err;
}
}
@@ -775,20 +797,18 @@ static int load_flat_file(struct linux_binprm *bprm,
* __start to address 4 so that is okay).
*/
if (rev > OLD_FLAT_VERSION) {
- u32 __maybe_unused persistent = 0;
for (i = 0; i < relocs; i++) {
u32 addr, relval;
+ __be32 tmp;
/*
* Get the address of the pointer to be
* relocated (of course, the address has to be
* relocated first).
*/
- if (get_user(relval, reloc + i))
+ if (get_user(tmp, reloc + i))
return -EFAULT;
- relval = ntohl(relval);
- if (flat_set_persistent(relval, &persistent))
- continue;
+ relval = ntohl(tmp);
addr = flat_get_relocate_addr(relval);
rp = (u32 __user *)calc_reloc(addr, libinfo, id, 1);
if (rp == (u32 __user *)RELOC_FAILED) {
@@ -797,8 +817,7 @@ static int load_flat_file(struct linux_binprm *bprm,
}
/* Get the pointer's value. */
- ret = flat_get_addr_from_rp(rp, relval, flags,
- &addr, &persistent);
+ ret = flat_get_addr_from_rp(rp, relval, flags, &addr);
if (unlikely(ret))
goto err;
@@ -807,8 +826,13 @@ static int load_flat_file(struct linux_binprm *bprm,
* Do the relocation. PIC relocs in the data section are
* already in target order
*/
- if ((flags & FLAT_FLAG_GOTPIC) == 0)
- addr = ntohl(addr);
+ if ((flags & FLAT_FLAG_GOTPIC) == 0) {
+ /*
+ * Meh, the same value can have a different
+ * byte order based on a flag..
+ */
+ addr = ntohl((__force __be32)addr);
+ }
addr = calc_reloc(addr, libinfo, id, 0);
if (addr == RELOC_FAILED) {
ret = -ENOEXEC;
@@ -821,14 +845,15 @@ static int load_flat_file(struct linux_binprm *bprm,
goto err;
}
}
+#ifdef CONFIG_BINFMT_FLAT_OLD
} else {
for (i = 0; i < relocs; i++) {
- u32 relval;
+ __be32 relval;
if (get_user(relval, reloc + i))
return -EFAULT;
- relval = ntohl(relval);
- old_reloc(relval);
+ old_reloc(ntohl(relval));
}
+#endif /* CONFIG_BINFMT_FLAT_OLD */
}
flush_icache_range(start_code, end_code);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 2a1be0d1a698..cfeff1b8dce0 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -187,7 +187,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
struct btrfs_inode *binode = BTRFS_I(inode);
struct btrfs_root *root = binode->root;
struct btrfs_trans_handle *trans;
- unsigned int fsflags;
+ unsigned int fsflags, old_fsflags;
int ret;
const char *comp = NULL;
u32 binode_flags = binode->flags;
@@ -212,13 +212,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
inode_lock(inode);
fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
- if ((fsflags ^ btrfs_inode_flags_to_fsflags(binode->flags)) &
- (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
- if (!capable(CAP_LINUX_IMMUTABLE)) {
- ret = -EPERM;
- goto out_unlock;
- }
- }
+ old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
+ ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags);
+ if (ret)
+ goto out_unlock;
if (fsflags & FS_SYNC_FL)
binode_flags |= BTRFS_INODE_SYNC;
@@ -376,9 +373,7 @@ static int btrfs_ioctl_fsgetxattr(struct file *file, void __user *arg)
struct btrfs_inode *binode = BTRFS_I(file_inode(file));
struct fsxattr fa;
- memset(&fa, 0, sizeof(fa));
- fa.fsx_xflags = btrfs_inode_flags_to_xflags(binode->flags);
-
+ simple_fill_fsxattr(&fa, btrfs_inode_flags_to_xflags(binode->flags));
if (copy_to_user(arg, &fa, sizeof(fa)))
return -EFAULT;
@@ -391,7 +386,7 @@ static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg)
struct btrfs_inode *binode = BTRFS_I(inode);
struct btrfs_root *root = binode->root;
struct btrfs_trans_handle *trans;
- struct fsxattr fa;
+ struct fsxattr fa, old_fa;
unsigned old_flags;
unsigned old_i_flags;
int ret = 0;
@@ -402,7 +397,6 @@ static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg)
if (btrfs_root_readonly(root))
return -EROFS;
- memset(&fa, 0, sizeof(fa));
if (copy_from_user(&fa, arg, sizeof(fa)))
return -EFAULT;
@@ -422,13 +416,11 @@ static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg)
old_flags = binode->flags;
old_i_flags = inode->i_flags;
- /* We need the capabilities to change append-only or immutable inode */
- if (((old_flags & (BTRFS_INODE_APPEND | BTRFS_INODE_IMMUTABLE)) ||
- (fa.fsx_xflags & (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE))) &&
- !capable(CAP_LINUX_IMMUTABLE)) {
- ret = -EPERM;
+ simple_fill_fsxattr(&old_fa,
+ btrfs_inode_flags_to_xflags(binode->flags));
+ ret = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa);
+ if (ret)
goto out_unlock;
- }
if (fa.fsx_xflags & FS_XFLAG_SYNC)
binode->flags |= BTRFS_INODE_SYNC;
@@ -2928,8 +2920,10 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
inode_lock(inode);
err = btrfs_delete_subvolume(dir, dentry);
inode_unlock(inode);
- if (!err)
+ if (!err) {
+ fsnotify_rmdir(dir, dentry);
d_delete(dentry);
+ }
out_dput:
dput(dentry);
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 2f078b77fe14..c1dfc97893ba 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -303,11 +303,12 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
return snprintf(buf, PAGE_SIZE, "%llu\n", val);
}
-static struct attribute *raid_attributes[] = {
+static struct attribute *raid_attrs[] = {
BTRFS_ATTR_PTR(raid, total_bytes),
BTRFS_ATTR_PTR(raid, used_bytes),
NULL
};
+ATTRIBUTE_GROUPS(raid);
static void release_raid_kobj(struct kobject *kobj)
{
@@ -317,7 +318,7 @@ static void release_raid_kobj(struct kobject *kobj)
struct kobj_type btrfs_raid_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.release = release_raid_kobj,
- .default_attrs = raid_attributes,
+ .default_groups = raid_groups,
};
#define SPACE_INFO_ATTR(field) \
@@ -364,6 +365,7 @@ static struct attribute *space_info_attrs[] = {
BTRFS_ATTR_PTR(space_info, total_bytes_pinned),
NULL,
};
+ATTRIBUTE_GROUPS(space_info);
static void space_info_release(struct kobject *kobj)
{
@@ -375,7 +377,7 @@ static void space_info_release(struct kobject *kobj)
struct kobj_type space_info_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.release = space_info_release,
- .default_attrs = space_info_attrs,
+ .default_groups = space_info_groups,
};
static const struct attribute *allocation_attrs[] = {
@@ -910,12 +912,10 @@ void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group);
}
-static int btrfs_init_debugfs(void)
+static void btrfs_init_debugfs(void)
{
#ifdef CONFIG_DEBUG_FS
btrfs_debugfs_root_dentry = debugfs_create_dir("btrfs", NULL);
- if (!btrfs_debugfs_root_dentry)
- return -ENOMEM;
/*
* Example code, how to export data through debugfs.
@@ -929,7 +929,6 @@ static int btrfs_init_debugfs(void)
#endif
#endif
- return 0;
}
int __init btrfs_init_sysfs(void)
@@ -940,9 +939,7 @@ int __init btrfs_init_sysfs(void)
if (!btrfs_kset)
return -ENOMEM;
- ret = btrfs_init_debugfs();
- if (ret)
- goto out1;
+ btrfs_init_debugfs();
init_feature_attrs();
ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
@@ -959,7 +956,6 @@ out_remove_group:
sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
out2:
debugfs_remove_recursive(btrfs_debugfs_root_dentry);
-out1:
kset_unregister(btrfs_kset);
return ret;
diff --git a/fs/buffer.c b/fs/buffer.c
index 40547bbbea94..86a38b979323 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2086,38 +2086,6 @@ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
}
EXPORT_SYMBOL(block_write_begin);
-void __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
- struct page *page)
-{
- loff_t old_size = inode->i_size;
- bool i_size_changed = false;
-
- /*
- * No need to use i_size_read() here, the i_size cannot change under us
- * because we hold i_rwsem.
- *
- * But it's important to update i_size while still holding page lock:
- * page writeout could otherwise come in and zero beyond i_size.
- */
- if (pos + copied > inode->i_size) {
- i_size_write(inode, pos + copied);
- i_size_changed = true;
- }
-
- unlock_page(page);
-
- if (old_size < pos)
- pagecache_isize_extended(inode, old_size, pos);
- /*
- * Don't mark the inode dirty under page lock. First, it unnecessarily
- * makes the holding time of page lock longer. Second, it forces lock
- * ordering of page lock and transaction start for journaling
- * filesystems.
- */
- if (i_size_changed)
- mark_inode_dirty(inode);
-}
-
int block_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
@@ -2158,9 +2126,37 @@ int generic_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
+ struct inode *inode = mapping->host;
+ loff_t old_size = inode->i_size;
+ bool i_size_changed = false;
+
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
- __generic_write_end(mapping->host, pos, copied, page);
+
+ /*
+ * No need to use i_size_read() here, the i_size cannot change under us
+ * because we hold i_rwsem.
+ *
+ * But it's important to update i_size while still holding page lock:
+ * page writeout could otherwise come in and zero beyond i_size.
+ */
+ if (pos + copied > inode->i_size) {
+ i_size_write(inode, pos + copied);
+ i_size_changed = true;
+ }
+
+ unlock_page(page);
put_page(page);
+
+ if (old_size < pos)
+ pagecache_isize_extended(inode, old_size, pos);
+ /*
+ * Don't mark the inode dirty under page lock. First, it unnecessarily
+ * makes the holding time of page lock longer. Second, it forces lock
+ * ordering of page lock and transaction start for journaling
+ * filesystems.
+ */
+ if (i_size_changed)
+ mark_inode_dirty(inode);
return copied;
}
EXPORT_SYMBOL(generic_write_end);
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index b3fc5fe26a1a..83cd41fa2b01 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -245,21 +245,17 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
debugfs_remove(fsc->debugfs_mdsc);
}
-int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
+void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
{
char name[100];
- int err = -ENOMEM;
dout("ceph_fs_debugfs_init\n");
- BUG_ON(!fsc->client->debugfs_dir);
fsc->debugfs_congestion_kb =
debugfs_create_file("writeback_congestion_kb",
0600,
fsc->client->debugfs_dir,
fsc,
&congestion_kb_fops);
- if (!fsc->debugfs_congestion_kb)
- goto out;
snprintf(name, sizeof(name), "../../bdi/%s",
dev_name(fsc->sb->s_bdi->dev));
@@ -267,52 +263,36 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
debugfs_create_symlink("bdi",
fsc->client->debugfs_dir,
name);
- if (!fsc->debugfs_bdi)
- goto out;
fsc->debugfs_mdsmap = debugfs_create_file("mdsmap",
0400,
fsc->client->debugfs_dir,
fsc,
&mdsmap_show_fops);
- if (!fsc->debugfs_mdsmap)
- goto out;
fsc->debugfs_mds_sessions = debugfs_create_file("mds_sessions",
0400,
fsc->client->debugfs_dir,
fsc,
&mds_sessions_show_fops);
- if (!fsc->debugfs_mds_sessions)
- goto out;
fsc->debugfs_mdsc = debugfs_create_file("mdsc",
0400,
fsc->client->debugfs_dir,
fsc,
&mdsc_show_fops);
- if (!fsc->debugfs_mdsc)
- goto out;
fsc->debugfs_caps = debugfs_create_file("caps",
0400,
fsc->client->debugfs_dir,
fsc,
&caps_show_fops);
- if (!fsc->debugfs_caps)
- goto out;
-
- return 0;
-
-out:
- ceph_fs_debugfs_cleanup(fsc);
- return err;
}
#else /* CONFIG_DEBUG_FS */
-int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
+void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
{
return 0;
}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 183c37c0a8fc..c5517ffeb11c 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1889,9 +1889,9 @@ static int is_file_size_ok(struct inode *src_inode, struct inode *dst_inode,
return 0;
}
-static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off,
- struct file *dst_file, loff_t dst_off,
- size_t len, unsigned int flags)
+static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
+ struct file *dst_file, loff_t dst_off,
+ size_t len, unsigned int flags)
{
struct inode *src_inode = file_inode(src_file);
struct inode *dst_inode = file_inode(dst_file);
@@ -1909,6 +1909,8 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off,
if (src_inode == dst_inode)
return -EINVAL;
+ if (src_inode->i_sb != dst_inode->i_sb)
+ return -EXDEV;
if (ceph_snap(dst_inode) != CEPH_NOSNAP)
return -EROFS;
@@ -2100,6 +2102,21 @@ out:
return ret;
}
+static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off,
+ struct file *dst_file, loff_t dst_off,
+ size_t len, unsigned int flags)
+{
+ ssize_t ret;
+
+ ret = __ceph_copy_file_range(src_file, src_off, dst_file, dst_off,
+ len, flags);
+
+ if (ret == -EOPNOTSUPP || ret == -EXDEV)
+ ret = generic_copy_file_range(src_file, src_off, dst_file,
+ dst_off, len, flags);
+ return ret;
+}
+
const struct file_operations ceph_file_fops = {
.open = ceph_open,
.release = ceph_release,
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index d57fa60dcd43..ed1b65a6c2c3 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -937,9 +937,7 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
dout("mount opening path %s\n", path);
}
- err = ceph_fs_debugfs_init(fsc);
- if (err < 0)
- goto out;
+ ceph_fs_debugfs_init(fsc);
root = open_root_dentry(fsc, path, started);
if (IS_ERR(root)) {
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 5f27e1f7f2d6..fbe6869a3f95 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1102,7 +1102,7 @@ extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
int num_fcntl_locks, int num_flock_locks);
/* debugfs.c */
-extern int ceph_fs_debugfs_init(struct ceph_fs_client *client);
+extern void ceph_fs_debugfs_init(struct ceph_fs_client *client);
extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client);
/* quota.c */
diff --git a/fs/char_dev.c b/fs/char_dev.c
index d18cad28c1c3..00dfe17871ac 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -98,7 +98,7 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
int minorct, const char *name)
{
struct char_device_struct *cd, *curr, *prev = NULL;
- int ret = -EBUSY;
+ int ret;
int i;
if (major >= CHRDEV_MAJOR_MAX) {
@@ -129,6 +129,7 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
major = ret;
}
+ ret = -EBUSY;
i = major_to_index(major);
for (curr = chrdevs[i]; curr; prev = curr, curr = curr->next) {
if (curr->major < major)
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index d1b439ad0f1a..7f01c6e60791 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -32,25 +32,6 @@
#include "cifsproto.h"
static const struct cred *spnego_cred;
-static struct key_acl cifs_spnego_key_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 2,
- .possessor_viewable = true,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_VIEW | KEY_ACE_SEARCH | KEY_ACE_READ),
- KEY_OWNER_ACE(KEY_ACE_VIEW),
- }
-};
-
-static struct key_acl cifs_spnego_keyring_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 2,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_WRITE),
- KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ | KEY_ACE_CLEAR),
- }
-};
-
/* create a new cifs key */
static int
cifs_spnego_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
@@ -189,8 +170,7 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo)
cifs_dbg(FYI, "key description = %s\n", description);
saved_cred = override_creds(spnego_cred);
- spnego_key = request_key(&cifs_spnego_key_type, description, "",
- &cifs_spnego_key_acl);
+ spnego_key = request_key(&cifs_spnego_key_type, description, "");
revert_creds(saved_cred);
#ifdef CONFIG_CIFS_DEBUG2
@@ -227,7 +207,8 @@ init_cifs_spnego(void)
keyring = keyring_alloc(".cifs_spnego",
GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
- &cifs_spnego_keyring_acl,
+ (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ,
KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
if (IS_ERR(keyring)) {
ret = PTR_ERR(keyring);
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 78eed72f3af0..1d377b7f2860 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -33,25 +33,6 @@
#include "cifsproto.h"
#include "cifs_debug.h"
-static struct key_acl cifs_idmap_key_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 2,
- .possessor_viewable = true,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_VIEW | KEY_ACE_SEARCH | KEY_ACE_READ),
- KEY_OWNER_ACE(KEY_ACE_VIEW),
- }
-};
-
-static struct key_acl cifs_idmap_keyring_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 2,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_WRITE),
- KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ),
- }
-};
-
/* security id for everyone/world system group */
static const struct cifs_sid sid_everyone = {
1, 1, {0, 0, 0, 0, 0, 1}, {0} };
@@ -317,8 +298,7 @@ id_to_sid(unsigned int cid, uint sidtype, struct cifs_sid *ssid)
rc = 0;
saved_cred = override_creds(root_cred);
- sidkey = request_key(&cifs_idmap_key_type, desc, "",
- &cifs_idmap_key_acl);
+ sidkey = request_key(&cifs_idmap_key_type, desc, "");
if (IS_ERR(sidkey)) {
rc = -EINVAL;
cifs_dbg(FYI, "%s: Can't map %cid %u to a SID\n",
@@ -423,8 +403,7 @@ try_upcall_to_get_id:
return -ENOMEM;
saved_cred = override_creds(root_cred);
- sidkey = request_key(&cifs_idmap_key_type, sidstr, "",
- &cifs_idmap_key_acl);
+ sidkey = request_key(&cifs_idmap_key_type, sidstr, "");
if (IS_ERR(sidkey)) {
rc = -EINVAL;
cifs_dbg(FYI, "%s: Can't map SID %s to a %cid\n",
@@ -502,7 +481,8 @@ init_cifs_idmap(void)
keyring = keyring_alloc(".cifs_idmap",
GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
- &cifs_idmap_keyring_acl,
+ (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ,
KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
if (IS_ERR(keyring)) {
ret = PTR_ERR(keyring);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 72db1c89bf5a..24635b65effa 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1149,6 +1149,10 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off,
rc = cifs_file_copychunk_range(xid, src_file, off, dst_file, destoff,
len, flags);
free_xid(xid);
+
+ if (rc == -EOPNOTSUPP || rc == -EXDEV)
+ rc = generic_copy_file_range(src_file, off, dst_file,
+ destoff, len, flags);
return rc;
}
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index ae6bae2ecb5d..714a359c7c8d 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2992,7 +2992,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
}
cifs_dbg(FYI, "%s: desc=%s\n", __func__, desc);
- key = request_key(&key_type_logon, desc, "", NULL);
+ key = request_key(&key_type_logon, desc, "");
if (IS_ERR(key)) {
if (!ses->domainName) {
cifs_dbg(FYI, "domainName is NULL\n");
@@ -3003,7 +3003,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
/* didn't work, try to find a domain key */
sprintf(desc, "cifs:d:%s", ses->domainName);
cifs_dbg(FYI, "%s: desc=%s\n", __func__, desc);
- key = request_key(&key_type_logon, desc, "", NULL);
+ key = request_key(&key_type_logon, desc, "");
if (IS_ERR(key)) {
rc = PTR_ERR(key);
goto out_err;
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index d2ca5287762d..92112915de8e 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -13,6 +13,7 @@
#undef DEBUG
#include <linux/fs.h>
+#include <linux/fsnotify.h>
#include <linux/mount.h>
#include <linux/module.h>
#include <linux/slab.h>
@@ -1788,6 +1789,7 @@ void configfs_unregister_group(struct config_group *group)
configfs_detach_group(&group->cg_item);
d_inode(dentry)->i_flags |= S_DEAD;
dont_mount(dentry);
+ fsnotify_rmdir(d_inode(parent), dentry);
d_delete(dentry);
inode_unlock(d_inode(parent));
@@ -1916,6 +1918,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
configfs_detach_group(&group->cg_item);
d_inode(dentry)->i_flags |= S_DEAD;
dont_mount(dentry);
+ fsnotify_rmdir(d_inode(root), dentry);
inode_unlock(d_inode(dentry));
d_delete(dentry);
diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig
index 24ed99e2eca0..5fdf24877c17 100644
--- a/fs/crypto/Kconfig
+++ b/fs/crypto/Kconfig
@@ -7,7 +7,6 @@ config FS_ENCRYPTION
select CRYPTO_ECB
select CRYPTO_XTS
select CRYPTO_CTS
- select CRYPTO_SHA256
select KEYS
help
Enable encryption of files and directories. This
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index b46021ebde85..82da2510721f 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -33,9 +33,8 @@ static void __fscrypt_decrypt_bio(struct bio *bio, bool done)
bio_for_each_segment_all(bv, bio, iter_all) {
struct page *page = bv->bv_page;
- int ret = fscrypt_decrypt_page(page->mapping->host, page,
- PAGE_SIZE, 0, page->index);
-
+ int ret = fscrypt_decrypt_pagecache_blocks(page, bv->bv_len,
+ bv->bv_offset);
if (ret)
SetPageError(page);
else if (done)
@@ -53,9 +52,8 @@ EXPORT_SYMBOL(fscrypt_decrypt_bio);
static void completion_pages(struct work_struct *work)
{
- struct fscrypt_ctx *ctx =
- container_of(work, struct fscrypt_ctx, r.work);
- struct bio *bio = ctx->r.bio;
+ struct fscrypt_ctx *ctx = container_of(work, struct fscrypt_ctx, work);
+ struct bio *bio = ctx->bio;
__fscrypt_decrypt_bio(bio, true);
fscrypt_release_ctx(ctx);
@@ -64,57 +62,29 @@ static void completion_pages(struct work_struct *work)
void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, struct bio *bio)
{
- INIT_WORK(&ctx->r.work, completion_pages);
- ctx->r.bio = bio;
- fscrypt_enqueue_decrypt_work(&ctx->r.work);
+ INIT_WORK(&ctx->work, completion_pages);
+ ctx->bio = bio;
+ fscrypt_enqueue_decrypt_work(&ctx->work);
}
EXPORT_SYMBOL(fscrypt_enqueue_decrypt_bio);
-void fscrypt_pullback_bio_page(struct page **page, bool restore)
-{
- struct fscrypt_ctx *ctx;
- struct page *bounce_page;
-
- /* The bounce data pages are unmapped. */
- if ((*page)->mapping)
- return;
-
- /* The bounce data page is unmapped. */
- bounce_page = *page;
- ctx = (struct fscrypt_ctx *)page_private(bounce_page);
-
- /* restore control page */
- *page = ctx->w.control_page;
-
- if (restore)
- fscrypt_restore_control_page(bounce_page);
-}
-EXPORT_SYMBOL(fscrypt_pullback_bio_page);
-
int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
sector_t pblk, unsigned int len)
{
- struct fscrypt_ctx *ctx;
- struct page *ciphertext_page = NULL;
+ const unsigned int blockbits = inode->i_blkbits;
+ const unsigned int blocksize = 1 << blockbits;
+ struct page *ciphertext_page;
struct bio *bio;
int ret, err = 0;
- BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE);
-
- ctx = fscrypt_get_ctx(GFP_NOFS);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- ciphertext_page = fscrypt_alloc_bounce_page(ctx, GFP_NOWAIT);
- if (IS_ERR(ciphertext_page)) {
- err = PTR_ERR(ciphertext_page);
- goto errout;
- }
+ ciphertext_page = fscrypt_alloc_bounce_page(GFP_NOWAIT);
+ if (!ciphertext_page)
+ return -ENOMEM;
while (len--) {
- err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk,
- ZERO_PAGE(0), ciphertext_page,
- PAGE_SIZE, 0, GFP_NOFS);
+ err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk,
+ ZERO_PAGE(0), ciphertext_page,
+ blocksize, 0, GFP_NOFS);
if (err)
goto errout;
@@ -124,14 +94,11 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
goto errout;
}
bio_set_dev(bio, inode->i_sb->s_bdev);
- bio->bi_iter.bi_sector =
- pblk << (inode->i_sb->s_blocksize_bits - 9);
+ bio->bi_iter.bi_sector = pblk << (blockbits - 9);
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
- ret = bio_add_page(bio, ciphertext_page,
- inode->i_sb->s_blocksize, 0);
- if (ret != inode->i_sb->s_blocksize) {
+ ret = bio_add_page(bio, ciphertext_page, blocksize, 0);
+ if (WARN_ON(ret != blocksize)) {
/* should never happen! */
- WARN_ON(1);
bio_put(bio);
err = -EIO;
goto errout;
@@ -147,7 +114,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
}
err = 0;
errout:
- fscrypt_release_ctx(ctx);
+ fscrypt_free_bounce_page(ciphertext_page);
return err;
}
EXPORT_SYMBOL(fscrypt_zeroout_range);
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 335a362ee446..45c3d0427fb2 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -59,23 +59,16 @@ void fscrypt_enqueue_decrypt_work(struct work_struct *work)
EXPORT_SYMBOL(fscrypt_enqueue_decrypt_work);
/**
- * fscrypt_release_ctx() - Releases an encryption context
- * @ctx: The encryption context to release.
+ * fscrypt_release_ctx() - Release a decryption context
+ * @ctx: The decryption context to release.
*
- * If the encryption context was allocated from the pre-allocated pool, returns
- * it to that pool. Else, frees it.
- *
- * If there's a bounce page in the context, this frees that.
+ * If the decryption context was allocated from the pre-allocated pool, return
+ * it to that pool. Else, free it.
*/
void fscrypt_release_ctx(struct fscrypt_ctx *ctx)
{
unsigned long flags;
- if (ctx->flags & FS_CTX_HAS_BOUNCE_BUFFER_FL && ctx->w.bounce_page) {
- mempool_free(ctx->w.bounce_page, fscrypt_bounce_page_pool);
- ctx->w.bounce_page = NULL;
- }
- ctx->w.control_page = NULL;
if (ctx->flags & FS_CTX_REQUIRES_FREE_ENCRYPT_FL) {
kmem_cache_free(fscrypt_ctx_cachep, ctx);
} else {
@@ -87,12 +80,12 @@ void fscrypt_release_ctx(struct fscrypt_ctx *ctx)
EXPORT_SYMBOL(fscrypt_release_ctx);
/**
- * fscrypt_get_ctx() - Gets an encryption context
+ * fscrypt_get_ctx() - Get a decryption context
* @gfp_flags: The gfp flag for memory allocation
*
- * Allocates and initializes an encryption context.
+ * Allocate and initialize a decryption context.
*
- * Return: A new encryption context on success; an ERR_PTR() otherwise.
+ * Return: A new decryption context on success; an ERR_PTR() otherwise.
*/
struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags)
{
@@ -100,14 +93,8 @@ struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags)
unsigned long flags;
/*
- * We first try getting the ctx from a free list because in
- * the common case the ctx will have an allocated and
- * initialized crypto tfm, so it's probably a worthwhile
- * optimization. For the bounce page, we first try getting it
- * from the kernel allocator because that's just about as fast
- * as getting it from a list and because a cache of free pages
- * should generally be a "last resort" option for a filesystem
- * to be able to do its job.
+ * First try getting a ctx from the free list so that we don't have to
+ * call into the slab allocator.
*/
spin_lock_irqsave(&fscrypt_ctx_lock, flags);
ctx = list_first_entry_or_null(&fscrypt_free_ctxs,
@@ -123,11 +110,31 @@ struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags)
} else {
ctx->flags &= ~FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
}
- ctx->flags &= ~FS_CTX_HAS_BOUNCE_BUFFER_FL;
return ctx;
}
EXPORT_SYMBOL(fscrypt_get_ctx);
+struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags)
+{
+ return mempool_alloc(fscrypt_bounce_page_pool, gfp_flags);
+}
+
+/**
+ * fscrypt_free_bounce_page() - free a ciphertext bounce page
+ *
+ * Free a bounce page that was allocated by fscrypt_encrypt_pagecache_blocks(),
+ * or by fscrypt_alloc_bounce_page() directly.
+ */
+void fscrypt_free_bounce_page(struct page *bounce_page)
+{
+ if (!bounce_page)
+ return;
+ set_page_private(bounce_page, (unsigned long)NULL);
+ ClearPagePrivate(bounce_page);
+ mempool_free(bounce_page, fscrypt_bounce_page_pool);
+}
+EXPORT_SYMBOL(fscrypt_free_bounce_page);
+
void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
const struct fscrypt_info *ci)
{
@@ -141,10 +148,11 @@ void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
crypto_cipher_encrypt_one(ci->ci_essiv_tfm, iv->raw, iv->raw);
}
-int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
- u64 lblk_num, struct page *src_page,
- struct page *dest_page, unsigned int len,
- unsigned int offs, gfp_t gfp_flags)
+/* Encrypt or decrypt a single filesystem block of file contents */
+int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw,
+ u64 lblk_num, struct page *src_page,
+ struct page *dest_page, unsigned int len,
+ unsigned int offs, gfp_t gfp_flags)
{
union fscrypt_iv iv;
struct skcipher_request *req = NULL;
@@ -154,7 +162,10 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
struct crypto_skcipher *tfm = ci->ci_ctfm;
int res = 0;
- BUG_ON(len == 0);
+ if (WARN_ON_ONCE(len <= 0))
+ return -EINVAL;
+ if (WARN_ON_ONCE(len % FS_CRYPTO_BLOCK_SIZE != 0))
+ return -EINVAL;
fscrypt_generate_iv(&iv, lblk_num, ci);
@@ -186,126 +197,158 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
return 0;
}
-struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
- gfp_t gfp_flags)
-{
- ctx->w.bounce_page = mempool_alloc(fscrypt_bounce_page_pool, gfp_flags);
- if (ctx->w.bounce_page == NULL)
- return ERR_PTR(-ENOMEM);
- ctx->flags |= FS_CTX_HAS_BOUNCE_BUFFER_FL;
- return ctx->w.bounce_page;
-}
-
/**
- * fscypt_encrypt_page() - Encrypts a page
- * @inode: The inode for which the encryption should take place
- * @page: The page to encrypt. Must be locked for bounce-page
- * encryption.
- * @len: Length of data to encrypt in @page and encrypted
- * data in returned page.
- * @offs: Offset of data within @page and returned
- * page holding encrypted data.
- * @lblk_num: Logical block number. This must be unique for multiple
- * calls with same inode, except when overwriting
- * previously written data.
- * @gfp_flags: The gfp flag for memory allocation
- *
- * Encrypts @page using the ctx encryption context. Performs encryption
- * either in-place or into a newly allocated bounce page.
- * Called on the page write path.
+ * fscrypt_encrypt_pagecache_blocks() - Encrypt filesystem blocks from a pagecache page
+ * @page: The locked pagecache page containing the block(s) to encrypt
+ * @len: Total size of the block(s) to encrypt. Must be a nonzero
+ * multiple of the filesystem's block size.
+ * @offs: Byte offset within @page of the first block to encrypt. Must be
+ * a multiple of the filesystem's block size.
+ * @gfp_flags: Memory allocation flags
*
- * Bounce page allocation is the default.
- * In this case, the contents of @page are encrypted and stored in an
- * allocated bounce page. @page has to be locked and the caller must call
- * fscrypt_restore_control_page() on the returned ciphertext page to
- * release the bounce buffer and the encryption context.
+ * A new bounce page is allocated, and the specified block(s) are encrypted into
+ * it. In the bounce page, the ciphertext block(s) will be located at the same
+ * offsets at which the plaintext block(s) were located in the source page; any
+ * other parts of the bounce page will be left uninitialized. However, normally
+ * blocksize == PAGE_SIZE and the whole page is encrypted at once.
*
- * In-place encryption is used by setting the FS_CFLG_OWN_PAGES flag in
- * fscrypt_operations. Here, the input-page is returned with its content
- * encrypted.
+ * This is for use by the filesystem's ->writepages() method.
*
- * Return: A page with the encrypted content on success. Else, an
- * error value or NULL.
+ * Return: the new encrypted bounce page on success; an ERR_PTR() on failure
*/
-struct page *fscrypt_encrypt_page(const struct inode *inode,
- struct page *page,
- unsigned int len,
- unsigned int offs,
- u64 lblk_num, gfp_t gfp_flags)
+struct page *fscrypt_encrypt_pagecache_blocks(struct page *page,
+ unsigned int len,
+ unsigned int offs,
+ gfp_t gfp_flags)
{
- struct fscrypt_ctx *ctx;
- struct page *ciphertext_page = page;
+ const struct inode *inode = page->mapping->host;
+ const unsigned int blockbits = inode->i_blkbits;
+ const unsigned int blocksize = 1 << blockbits;
+ struct page *ciphertext_page;
+ u64 lblk_num = ((u64)page->index << (PAGE_SHIFT - blockbits)) +
+ (offs >> blockbits);
+ unsigned int i;
int err;
- BUG_ON(len % FS_CRYPTO_BLOCK_SIZE != 0);
+ if (WARN_ON_ONCE(!PageLocked(page)))
+ return ERR_PTR(-EINVAL);
- if (inode->i_sb->s_cop->flags & FS_CFLG_OWN_PAGES) {
- /* with inplace-encryption we just encrypt the page */
- err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk_num, page,
- ciphertext_page, len, offs,
- gfp_flags);
- if (err)
- return ERR_PTR(err);
+ if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offs, blocksize)))
+ return ERR_PTR(-EINVAL);
- return ciphertext_page;
- }
-
- BUG_ON(!PageLocked(page));
-
- ctx = fscrypt_get_ctx(gfp_flags);
- if (IS_ERR(ctx))
- return ERR_CAST(ctx);
-
- /* The encryption operation will require a bounce page. */
- ciphertext_page = fscrypt_alloc_bounce_page(ctx, gfp_flags);
- if (IS_ERR(ciphertext_page))
- goto errout;
+ ciphertext_page = fscrypt_alloc_bounce_page(gfp_flags);
+ if (!ciphertext_page)
+ return ERR_PTR(-ENOMEM);
- ctx->w.control_page = page;
- err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk_num,
- page, ciphertext_page, len, offs,
- gfp_flags);
- if (err) {
- ciphertext_page = ERR_PTR(err);
- goto errout;
+ for (i = offs; i < offs + len; i += blocksize, lblk_num++) {
+ err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk_num,
+ page, ciphertext_page,
+ blocksize, i, gfp_flags);
+ if (err) {
+ fscrypt_free_bounce_page(ciphertext_page);
+ return ERR_PTR(err);
+ }
}
SetPagePrivate(ciphertext_page);
- set_page_private(ciphertext_page, (unsigned long)ctx);
- lock_page(ciphertext_page);
+ set_page_private(ciphertext_page, (unsigned long)page);
return ciphertext_page;
+}
+EXPORT_SYMBOL(fscrypt_encrypt_pagecache_blocks);
-errout:
- fscrypt_release_ctx(ctx);
- return ciphertext_page;
+/**
+ * fscrypt_encrypt_block_inplace() - Encrypt a filesystem block in-place
+ * @inode: The inode to which this block belongs
+ * @page: The page containing the block to encrypt
+ * @len: Size of block to encrypt. Doesn't need to be a multiple of the
+ * fs block size, but must be a multiple of FS_CRYPTO_BLOCK_SIZE.
+ * @offs: Byte offset within @page at which the block to encrypt begins
+ * @lblk_num: Filesystem logical block number of the block, i.e. the 0-based
+ * number of the block within the file
+ * @gfp_flags: Memory allocation flags
+ *
+ * Encrypt a possibly-compressed filesystem block that is located in an
+ * arbitrary page, not necessarily in the original pagecache page. The @inode
+ * and @lblk_num must be specified, as they can't be determined from @page.
+ *
+ * Return: 0 on success; -errno on failure
+ */
+int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page,
+ unsigned int len, unsigned int offs,
+ u64 lblk_num, gfp_t gfp_flags)
+{
+ return fscrypt_crypt_block(inode, FS_ENCRYPT, lblk_num, page, page,
+ len, offs, gfp_flags);
}
-EXPORT_SYMBOL(fscrypt_encrypt_page);
+EXPORT_SYMBOL(fscrypt_encrypt_block_inplace);
/**
- * fscrypt_decrypt_page() - Decrypts a page in-place
- * @inode: The corresponding inode for the page to decrypt.
- * @page: The page to decrypt. Must be locked in case
- * it is a writeback page (FS_CFLG_OWN_PAGES unset).
- * @len: Number of bytes in @page to be decrypted.
- * @offs: Start of data in @page.
- * @lblk_num: Logical block number.
+ * fscrypt_decrypt_pagecache_blocks() - Decrypt filesystem blocks in a pagecache page
+ * @page: The locked pagecache page containing the block(s) to decrypt
+ * @len: Total size of the block(s) to decrypt. Must be a nonzero
+ * multiple of the filesystem's block size.
+ * @offs: Byte offset within @page of the first block to decrypt. Must be
+ * a multiple of the filesystem's block size.
*
- * Decrypts page in-place using the ctx encryption context.
+ * The specified block(s) are decrypted in-place within the pagecache page,
+ * which must still be locked and not uptodate. Normally, blocksize ==
+ * PAGE_SIZE and the whole page is decrypted at once.
*
- * Called from the read completion callback.
+ * This is for use by the filesystem's ->readpages() method.
*
- * Return: Zero on success, non-zero otherwise.
+ * Return: 0 on success; -errno on failure
*/
-int fscrypt_decrypt_page(const struct inode *inode, struct page *page,
- unsigned int len, unsigned int offs, u64 lblk_num)
+int fscrypt_decrypt_pagecache_blocks(struct page *page, unsigned int len,
+ unsigned int offs)
{
- if (!(inode->i_sb->s_cop->flags & FS_CFLG_OWN_PAGES))
- BUG_ON(!PageLocked(page));
+ const struct inode *inode = page->mapping->host;
+ const unsigned int blockbits = inode->i_blkbits;
+ const unsigned int blocksize = 1 << blockbits;
+ u64 lblk_num = ((u64)page->index << (PAGE_SHIFT - blockbits)) +
+ (offs >> blockbits);
+ unsigned int i;
+ int err;
+
+ if (WARN_ON_ONCE(!PageLocked(page)))
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offs, blocksize)))
+ return -EINVAL;
+
+ for (i = offs; i < offs + len; i += blocksize, lblk_num++) {
+ err = fscrypt_crypt_block(inode, FS_DECRYPT, lblk_num, page,
+ page, blocksize, i, GFP_NOFS);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(fscrypt_decrypt_pagecache_blocks);
- return fscrypt_do_page_crypto(inode, FS_DECRYPT, lblk_num, page, page,
- len, offs, GFP_NOFS);
+/**
+ * fscrypt_decrypt_block_inplace() - Decrypt a filesystem block in-place
+ * @inode: The inode to which this block belongs
+ * @page: The page containing the block to decrypt
+ * @len: Size of block to decrypt. Doesn't need to be a multiple of the
+ * fs block size, but must be a multiple of FS_CRYPTO_BLOCK_SIZE.
+ * @offs: Byte offset within @page at which the block to decrypt begins
+ * @lblk_num: Filesystem logical block number of the block, i.e. the 0-based
+ * number of the block within the file
+ *
+ * Decrypt a possibly-compressed filesystem block that is located in an
+ * arbitrary page, not necessarily in the original pagecache page. The @inode
+ * and @lblk_num must be specified, as they can't be determined from @page.
+ *
+ * Return: 0 on success; -errno on failure
+ */
+int fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page,
+ unsigned int len, unsigned int offs,
+ u64 lblk_num)
+{
+ return fscrypt_crypt_block(inode, FS_DECRYPT, lblk_num, page, page,
+ len, offs, GFP_NOFS);
}
-EXPORT_SYMBOL(fscrypt_decrypt_page);
+EXPORT_SYMBOL(fscrypt_decrypt_block_inplace);
/*
* Validate dentries in encrypted directories to make sure we aren't potentially
@@ -355,18 +398,6 @@ const struct dentry_operations fscrypt_d_ops = {
.d_revalidate = fscrypt_d_revalidate,
};
-void fscrypt_restore_control_page(struct page *page)
-{
- struct fscrypt_ctx *ctx;
-
- ctx = (struct fscrypt_ctx *)page_private(page);
- set_page_private(page, (unsigned long)NULL);
- ClearPagePrivate(page);
- unlock_page(page);
- fscrypt_release_ctx(ctx);
-}
-EXPORT_SYMBOL(fscrypt_restore_control_page);
-
static void fscrypt_destroy(void)
{
struct fscrypt_ctx *pos, *n;
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index eccea3d8f923..00d150ff3033 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -12,7 +12,6 @@
*/
#include <linux/scatterlist.h>
-#include <linux/ratelimit.h>
#include <crypto/skcipher.h>
#include "fscrypt_private.h"
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 7da276159593..8978eec9d766 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -94,7 +94,6 @@ typedef enum {
} fscrypt_direction_t;
#define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001
-#define FS_CTX_HAS_BOUNCE_BUFFER_FL 0x00000002
static inline bool fscrypt_valid_enc_modes(u32 contents_mode,
u32 filenames_mode)
@@ -117,14 +116,12 @@ static inline bool fscrypt_valid_enc_modes(u32 contents_mode,
/* crypto.c */
extern struct kmem_cache *fscrypt_info_cachep;
extern int fscrypt_initialize(unsigned int cop_flags);
-extern int fscrypt_do_page_crypto(const struct inode *inode,
- fscrypt_direction_t rw, u64 lblk_num,
- struct page *src_page,
- struct page *dest_page,
- unsigned int len, unsigned int offs,
- gfp_t gfp_flags);
-extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
- gfp_t gfp_flags);
+extern int fscrypt_crypt_block(const struct inode *inode,
+ fscrypt_direction_t rw, u64 lblk_num,
+ struct page *src_page, struct page *dest_page,
+ unsigned int len, unsigned int offs,
+ gfp_t gfp_flags);
+extern struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags);
extern const struct dentry_operations fscrypt_d_ops;
extern void __printf(3, 4) __cold
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
index bd525f7573a4..c1d6715d88e9 100644
--- a/fs/crypto/hooks.c
+++ b/fs/crypto/hooks.c
@@ -5,7 +5,6 @@
* Encryption hooks for higher-level filesystem operations.
*/
-#include <linux/ratelimit.h>
#include "fscrypt_private.h"
/**
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 4f85af8ab239..207ebed918c1 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -12,7 +12,6 @@
#include <keys/user-type.h>
#include <linux/hashtable.h>
#include <linux/scatterlist.h>
-#include <linux/ratelimit.h>
#include <crypto/aes.h>
#include <crypto/algapi.h>
#include <crypto/sha.h>
@@ -92,7 +91,7 @@ find_and_lock_process_key(const char *prefix,
if (!description)
return ERR_PTR(-ENOMEM);
- key = request_key(&key_type_logon, description, NULL, NULL);
+ key = request_key(&key_type_logon, description, NULL);
kfree(description);
if (IS_ERR(key))
return key;
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index d536889ac31b..4941fe8471ce 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -81,6 +81,8 @@ int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg)
if (ret == -ENODATA) {
if (!S_ISDIR(inode->i_mode))
ret = -ENOTDIR;
+ else if (IS_DEADDIR(inode))
+ ret = -ENOENT;
else if (!inode->i_sb->s_cop->empty_dir(inode))
ret = -ENOTEMPTY;
else
diff --git a/fs/dcache.c b/fs/dcache.c
index c435398f2c81..f41121e5d1ec 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2372,7 +2372,6 @@ EXPORT_SYMBOL(d_hash_and_lookup);
void d_delete(struct dentry * dentry)
{
struct inode *inode = dentry->d_inode;
- int isdir = d_is_dir(dentry);
spin_lock(&inode->i_lock);
spin_lock(&dentry->d_lock);
@@ -2387,7 +2386,6 @@ void d_delete(struct dentry * dentry)
spin_unlock(&dentry->d_lock);
spin_unlock(&inode->i_lock);
}
- fsnotify_nameremove(dentry, isdir);
}
EXPORT_SYMBOL(d_delete);
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index ddd708b09fa1..93e4ca6b2ad7 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -997,25 +997,19 @@ static const struct file_operations u32_array_fops = {
* @array as data. If the @mode variable is so set it can be read from.
* Writing is not supported. Seek within the file is also not supported.
* Once array is created its size can not be changed.
- *
- * The function returns a pointer to dentry on success. If an error occurs,
- * %ERR_PTR(-ERROR) or NULL will be returned. If debugfs is not enabled in
- * the kernel, the value %ERR_PTR(-ENODEV) will be returned.
*/
-struct dentry *debugfs_create_u32_array(const char *name, umode_t mode,
- struct dentry *parent,
- u32 *array, u32 elements)
+void debugfs_create_u32_array(const char *name, umode_t mode,
+ struct dentry *parent, u32 *array, u32 elements)
{
struct array_data *data = kmalloc(sizeof(*data), GFP_KERNEL);
if (data == NULL)
- return NULL;
+ return;
data->array = array;
data->elements = elements;
- return debugfs_create_file_unsafe(name, mode, parent, data,
- &u32_array_fops);
+ debugfs_create_file_unsafe(name, mode, parent, data, &u32_array_fops);
}
EXPORT_SYMBOL_GPL(debugfs_create_u32_array);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index acef14ad53db..042b688ed124 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -2,13 +2,16 @@
/*
* inode.c - part of debugfs, a tiny little debug file system
*
- * Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com>
+ * Copyright (C) 2004,2019 Greg Kroah-Hartman <greg@kroah.com>
* Copyright (C) 2004 IBM Inc.
+ * Copyright (C) 2019 Linux Foundation <gregkh@linuxfoundation.org>
*
* debugfs is for people to use instead of /proc or /sys.
* See ./Documentation/core-api/kernel-api.rst for more details.
*/
+#define pr_fmt(fmt) "debugfs: " fmt
+
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/mount.h>
@@ -285,15 +288,17 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
struct dentry *dentry;
int error;
- pr_debug("debugfs: creating file '%s'\n",name);
+ pr_debug("creating file '%s'\n", name);
if (IS_ERR(parent))
return parent;
error = simple_pin_fs(&debug_fs_type, &debugfs_mount,
&debugfs_mount_count);
- if (error)
+ if (error) {
+ pr_err("Unable to pin filesystem for file '%s'\n", name);
return ERR_PTR(error);
+ }
/* If the parent is not specified, we create it in the root.
* We need the root dentry to do this, which is in the super
@@ -306,6 +311,12 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
inode_lock(d_inode(parent));
dentry = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(dentry) && d_really_is_positive(dentry)) {
+ if (d_is_dir(dentry))
+ pr_err("Directory '%s' with parent '%s' already present!\n",
+ name, parent->d_name.name);
+ else
+ pr_err("File '%s' in directory '%s' already present!\n",
+ name, parent->d_name.name);
dput(dentry);
dentry = ERR_PTR(-EEXIST);
}
@@ -349,8 +360,11 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode,
return dentry;
inode = debugfs_get_inode(dentry->d_sb);
- if (unlikely(!inode))
+ if (unlikely(!inode)) {
+ pr_err("out of free dentries, can not create file '%s'\n",
+ name);
return failed_creating(dentry);
+ }
inode->i_mode = mode;
inode->i_private = data;
@@ -511,8 +525,11 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
return dentry;
inode = debugfs_get_inode(dentry->d_sb);
- if (unlikely(!inode))
+ if (unlikely(!inode)) {
+ pr_err("out of free dentries, can not create directory '%s'\n",
+ name);
return failed_creating(dentry);
+ }
inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
inode->i_op = &simple_dir_inode_operations;
@@ -550,8 +567,11 @@ struct dentry *debugfs_create_automount(const char *name,
return dentry;
inode = debugfs_get_inode(dentry->d_sb);
- if (unlikely(!inode))
+ if (unlikely(!inode)) {
+ pr_err("out of free dentries, can not create automount '%s'\n",
+ name);
return failed_creating(dentry);
+ }
make_empty_dir_inode(inode);
inode->i_flags |= S_AUTOMOUNT;
@@ -606,6 +626,8 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
inode = debugfs_get_inode(dentry->d_sb);
if (unlikely(!inode)) {
+ pr_err("out of free dentries, can not create symlink '%s'\n",
+ name);
kfree(link);
return failed_creating(dentry);
}
@@ -617,13 +639,10 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
}
EXPORT_SYMBOL_GPL(debugfs_create_symlink);
-static void __debugfs_remove_file(struct dentry *dentry, struct dentry *parent)
+static void __debugfs_file_removed(struct dentry *dentry)
{
struct debugfs_fsdata *fsd;
- simple_unlink(d_inode(parent), dentry);
- d_delete(dentry);
-
/*
* Paired with the closing smp_mb() implied by a successful
* cmpxchg() in debugfs_file_get(): either
@@ -644,16 +663,18 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent)
if (simple_positive(dentry)) {
dget(dentry);
- if (!d_is_reg(dentry)) {
- if (d_is_dir(dentry))
- ret = simple_rmdir(d_inode(parent), dentry);
- else
- simple_unlink(d_inode(parent), dentry);
+ if (d_is_dir(dentry)) {
+ ret = simple_rmdir(d_inode(parent), dentry);
if (!ret)
- d_delete(dentry);
+ fsnotify_rmdir(d_inode(parent), dentry);
} else {
- __debugfs_remove_file(dentry, parent);
+ simple_unlink(d_inode(parent), dentry);
+ fsnotify_unlink(d_inode(parent), dentry);
}
+ if (!ret)
+ d_delete(dentry);
+ if (d_is_reg(dentry))
+ __debugfs_file_removed(dentry);
dput(dentry);
}
return ret;
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 2c14ae044dce..beeadca23b05 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -621,6 +621,7 @@ void devpts_pty_kill(struct dentry *dentry)
dentry->d_fsdata = NULL;
drop_nlink(dentry->d_inode);
+ fsnotify_unlink(d_inode(dentry->d_parent), dentry);
d_delete(dentry);
dput(dentry); /* d_alloc_name() in devpts_pty_new() */
}
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 0e941d42e3e9..d6bbccb0ed15 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -737,7 +737,7 @@ void dlm_delete_debug_file(struct dlm_ls *ls)
debugfs_remove(ls->ls_debug_toss_dentry);
}
-int dlm_create_debug_file(struct dlm_ls *ls)
+void dlm_create_debug_file(struct dlm_ls *ls)
{
char name[DLM_LOCKSPACE_LEN + 8];
@@ -748,8 +748,6 @@ int dlm_create_debug_file(struct dlm_ls *ls)
dlm_root,
ls,
&format1_fops);
- if (!ls->ls_debug_rsb_dentry)
- goto fail;
/* format 2 */
@@ -761,8 +759,6 @@ int dlm_create_debug_file(struct dlm_ls *ls)
dlm_root,
ls,
&format2_fops);
- if (!ls->ls_debug_locks_dentry)
- goto fail;
/* format 3 */
@@ -774,8 +770,6 @@ int dlm_create_debug_file(struct dlm_ls *ls)
dlm_root,
ls,
&format3_fops);
- if (!ls->ls_debug_all_dentry)
- goto fail;
/* format 4 */
@@ -787,8 +781,6 @@ int dlm_create_debug_file(struct dlm_ls *ls)
dlm_root,
ls,
&format4_fops);
- if (!ls->ls_debug_toss_dentry)
- goto fail;
memset(name, 0, sizeof(name));
snprintf(name, DLM_LOCKSPACE_LEN + 8, "%s_waiters", ls->ls_name);
@@ -798,21 +790,12 @@ int dlm_create_debug_file(struct dlm_ls *ls)
dlm_root,
ls,
&waiters_fops);
- if (!ls->ls_debug_waiters_dentry)
- goto fail;
-
- return 0;
-
- fail:
- dlm_delete_debug_file(ls);
- return -ENOMEM;
}
-int __init dlm_register_debugfs(void)
+void __init dlm_register_debugfs(void)
{
mutex_init(&debug_buf_lock);
dlm_root = debugfs_create_dir("dlm", NULL);
- return dlm_root ? 0 : -ENOMEM;
}
void dlm_unregister_debugfs(void)
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index da1173a0b274..416d9de35679 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -719,14 +719,14 @@ int dlm_plock_init(void);
void dlm_plock_exit(void);
#ifdef CONFIG_DLM_DEBUG
-int dlm_register_debugfs(void);
+void dlm_register_debugfs(void);
void dlm_unregister_debugfs(void);
-int dlm_create_debug_file(struct dlm_ls *ls);
+void dlm_create_debug_file(struct dlm_ls *ls);
void dlm_delete_debug_file(struct dlm_ls *ls);
#else
-static inline int dlm_register_debugfs(void) { return 0; }
+static inline void dlm_register_debugfs(void) { }
static inline void dlm_unregister_debugfs(void) { }
-static inline int dlm_create_debug_file(struct dlm_ls *ls) { return 0; }
+static inline void dlm_create_debug_file(struct dlm_ls *ls) { }
static inline void dlm_delete_debug_file(struct dlm_ls *ls) { }
#endif
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 4c2c85a223ac..afb8340918b8 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -158,6 +158,7 @@ static struct attribute *dlm_attrs[] = {
&dlm_attr_recover_nodeid.attr,
NULL,
};
+ATTRIBUTE_GROUPS(dlm);
static ssize_t dlm_attr_show(struct kobject *kobj, struct attribute *attr,
char *buf)
@@ -187,7 +188,7 @@ static const struct sysfs_ops dlm_attr_ops = {
};
static struct kobj_type dlm_ktype = {
- .default_attrs = dlm_attrs,
+ .default_groups = dlm_groups,
.sysfs_ops = &dlm_attr_ops,
.release = lockspace_kobj_release,
};
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 114ebfe30929..3951d39b9b75 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1628,8 +1628,10 @@ static void clean_writequeues(void)
static void work_stop(void)
{
- destroy_workqueue(recv_workqueue);
- destroy_workqueue(send_workqueue);
+ if (recv_workqueue)
+ destroy_workqueue(recv_workqueue);
+ if (send_workqueue)
+ destroy_workqueue(send_workqueue);
}
static int work_start(void)
@@ -1689,13 +1691,17 @@ static void work_flush(void)
struct hlist_node *n;
struct connection *con;
- flush_workqueue(recv_workqueue);
- flush_workqueue(send_workqueue);
+ if (recv_workqueue)
+ flush_workqueue(recv_workqueue);
+ if (send_workqueue)
+ flush_workqueue(send_workqueue);
do {
ok = 1;
foreach_conn(stop_conn);
- flush_workqueue(recv_workqueue);
- flush_workqueue(send_workqueue);
+ if (recv_workqueue)
+ flush_workqueue(recv_workqueue);
+ if (send_workqueue)
+ flush_workqueue(send_workqueue);
for (i = 0; i < CONN_HASH_SIZE && ok; i++) {
hlist_for_each_entry_safe(con, n,
&connection_hash[i], list) {
diff --git a/fs/dlm/main.c b/fs/dlm/main.c
index 39579927ed84..afc66a1346d3 100644
--- a/fs/dlm/main.c
+++ b/fs/dlm/main.c
@@ -35,9 +35,7 @@ static int __init init_dlm(void)
if (error)
goto out_lockspace;
- error = dlm_register_debugfs();
- if (error)
- goto out_config;
+ dlm_register_debugfs();
error = dlm_user_init();
if (error)
@@ -61,7 +59,6 @@ static int __init init_dlm(void)
dlm_user_exit();
out_debug:
dlm_unregister_debugfs();
- out_config:
dlm_config_exit();
out_lockspace:
dlm_lockspace_exit();
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 91d65f337d87..f91db24bbf3b 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -23,6 +23,7 @@
#include <linux/slab.h>
#include <asm/unaligned.h>
#include <linux/kernel.h>
+#include <linux/xattr.h>
#include "ecryptfs_kernel.h"
#define DECRYPT 0
@@ -860,13 +861,10 @@ static struct ecryptfs_flag_map_elem ecryptfs_flag_map[] = {
* @crypt_stat: The cryptographic context
* @page_virt: Source data to be parsed
* @bytes_read: Updated with the number of bytes read
- *
- * Returns zero on success; non-zero if the flag set is invalid
*/
-static int ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat,
+static void ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat,
char *page_virt, int *bytes_read)
{
- int rc = 0;
int i;
u32 flags;
@@ -879,7 +877,6 @@ static int ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat,
/* Version is in top 8 bits of the 32-bit flag vector */
crypt_stat->file_version = ((flags >> 24) & 0xFF);
(*bytes_read) = 4;
- return rc;
}
/**
@@ -1004,8 +1001,10 @@ int ecryptfs_read_and_validate_header_region(struct inode *inode)
rc = ecryptfs_read_lower(file_size, 0, ECRYPTFS_SIZE_AND_MARKER_BYTES,
inode);
- if (rc < ECRYPTFS_SIZE_AND_MARKER_BYTES)
- return rc >= 0 ? -EINVAL : rc;
+ if (rc < 0)
+ return rc;
+ else if (rc < ECRYPTFS_SIZE_AND_MARKER_BYTES)
+ return -EINVAL;
rc = ecryptfs_validate_marker(marker);
if (!rc)
ecryptfs_i_size_init(file_size, inode);
@@ -1115,9 +1114,21 @@ ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry,
char *page_virt, size_t size)
{
int rc;
+ struct dentry *lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
+ struct inode *lower_inode = d_inode(lower_dentry);
- rc = ecryptfs_setxattr(ecryptfs_dentry, ecryptfs_inode,
- ECRYPTFS_XATTR_NAME, page_virt, size, 0);
+ if (!(lower_inode->i_opflags & IOP_XATTR)) {
+ rc = -EOPNOTSUPP;
+ goto out;
+ }
+
+ inode_lock(lower_inode);
+ rc = __vfs_setxattr(lower_dentry, lower_inode, ECRYPTFS_XATTR_NAME,
+ page_virt, size, 0);
+ if (!rc && ecryptfs_inode)
+ fsstack_copy_attr_all(ecryptfs_inode, lower_inode);
+ inode_unlock(lower_inode);
+out:
return rc;
}
@@ -1291,12 +1302,7 @@ static int ecryptfs_read_headers_virt(char *page_virt,
if (!(crypt_stat->flags & ECRYPTFS_I_SIZE_INITIALIZED))
ecryptfs_i_size_init(page_virt, d_inode(ecryptfs_dentry));
offset += MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
- rc = ecryptfs_process_flags(crypt_stat, (page_virt + offset),
- &bytes_read);
- if (rc) {
- ecryptfs_printk(KERN_WARNING, "Error processing flags\n");
- goto out;
- }
+ ecryptfs_process_flags(crypt_stat, (page_virt + offset), &bytes_read);
if (crypt_stat->file_version > ECRYPTFS_SUPPORTED_FILE_VERSION) {
ecryptfs_printk(KERN_WARNING, "File version is [%d]; only "
"file version [%d] is supported by this "
@@ -1367,8 +1373,10 @@ int ecryptfs_read_and_validate_xattr_region(struct dentry *dentry,
ecryptfs_inode_to_lower(inode),
ECRYPTFS_XATTR_NAME, file_size,
ECRYPTFS_SIZE_AND_MARKER_BYTES);
- if (rc < ECRYPTFS_SIZE_AND_MARKER_BYTES)
- return rc >= 0 ? -EINVAL : rc;
+ if (rc < 0)
+ return rc;
+ else if (rc < ECRYPTFS_SIZE_AND_MARKER_BYTES)
+ return -EINVAL;
rc = ecryptfs_validate_marker(marker);
if (!rc)
ecryptfs_i_size_init(file_size, inode);
diff --git a/fs/ecryptfs/debug.c b/fs/ecryptfs/debug.c
index d131d070826f..1f65e99f9a41 100644
--- a/fs/ecryptfs/debug.c
+++ b/fs/ecryptfs/debug.c
@@ -83,25 +83,9 @@ void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok)
*/
void ecryptfs_dump_hex(char *data, int bytes)
{
- int i = 0;
- int add_newline = 1;
-
if (ecryptfs_verbosity < 1)
return;
- if (bytes != 0) {
- printk(KERN_DEBUG "0x%.2x.", (unsigned char)data[i]);
- i++;
- }
- while (i < bytes) {
- printk("0x%.2x.", (unsigned char)data[i]);
- i++;
- if (i % 16 == 0) {
- printk("\n");
- add_newline = 0;
- } else
- add_newline = 1;
- }
- if (add_newline)
- printk("\n");
-}
+ print_hex_dump(KERN_DEBUG, "ecryptfs: ", DUMP_PREFIX_OFFSET, 16, 1,
+ data, bytes, false);
+}
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 67844fe41a61..1c1a56be7ea2 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -91,7 +91,7 @@ ecryptfs_get_encrypted_key_payload_data(struct key *key)
static inline struct key *ecryptfs_get_encrypted_key(char *sig)
{
- return request_key(&key_type_encrypted, sig, NULL, NULL);
+ return request_key(&key_type_encrypted, sig, NULL);
}
#else
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 1e994d780f37..18426f4855f1 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -1121,7 +1121,7 @@ static int ecryptfs_xattr_set(const struct xattr_handler *handler,
}
}
-const struct xattr_handler ecryptfs_xattr_handler = {
+static const struct xattr_handler ecryptfs_xattr_handler = {
.prefix = "", /* match anything */
.get = ecryptfs_xattr_get,
.set = ecryptfs_xattr_set,
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index ba382f135918..216fbe6a4837 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -1048,8 +1048,9 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
"rc = [%d]\n", __func__, rc);
goto out_free_unlock;
}
- while (s->decrypted_filename[s->i] != '\0'
- && s->i < s->block_aligned_filename_size)
+
+ while (s->i < s->block_aligned_filename_size &&
+ s->decrypted_filename[s->i] != '\0')
s->i++;
if (s->i == s->block_aligned_filename_size) {
printk(KERN_WARNING "%s: Invalid tag 70 packet; could not "
@@ -1610,10 +1611,10 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
{
int rc = 0;
- (*auth_tok_key) = request_key(&key_type_user, sig, NULL, NULL);
- if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) {
+ (*auth_tok_key) = request_key(&key_type_user, sig, NULL);
+ if (IS_ERR(*auth_tok_key)) {
(*auth_tok_key) = ecryptfs_get_encrypted_key(sig);
- if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) {
+ if (IS_ERR(*auth_tok_key)) {
printk(KERN_ERR "Could not find key with description: [%s]\n",
sig);
rc = process_request_key_err(PTR_ERR(*auth_tok_key));
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index ee3bc0c96b9d..e9e27a271af0 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -107,16 +107,22 @@ out_free:
return size;
}
-static int
-efivarfs_ioc_getxflags(struct file *file, void __user *arg)
+static inline unsigned int efivarfs_getflags(struct inode *inode)
{
- struct inode *inode = file->f_mapping->host;
unsigned int i_flags;
unsigned int flags = 0;
i_flags = inode->i_flags;
if (i_flags & S_IMMUTABLE)
flags |= FS_IMMUTABLE_FL;
+ return flags;
+}
+
+static int
+efivarfs_ioc_getxflags(struct file *file, void __user *arg)
+{
+ struct inode *inode = file->f_mapping->host;
+ unsigned int flags = efivarfs_getflags(inode);
if (copy_to_user(arg, &flags, sizeof(flags)))
return -EFAULT;
@@ -129,6 +135,7 @@ efivarfs_ioc_setxflags(struct file *file, void __user *arg)
struct inode *inode = file->f_mapping->host;
unsigned int flags;
unsigned int i_flags = 0;
+ unsigned int oldflags = efivarfs_getflags(inode);
int error;
if (!inode_owner_or_capable(inode))
@@ -140,9 +147,6 @@ efivarfs_ioc_setxflags(struct file *file, void __user *arg)
if (flags & ~FS_IMMUTABLE_FL)
return -EOPNOTSUPP;
- if (!capable(CAP_LINUX_IMMUTABLE))
- return -EPERM;
-
if (flags & FS_IMMUTABLE_FL)
i_flags |= S_IMMUTABLE;
@@ -152,12 +156,16 @@ efivarfs_ioc_setxflags(struct file *file, void __user *arg)
return error;
inode_lock(inode);
+
+ error = vfs_ioc_setflags_prepare(inode, oldflags, flags);
+ if (error)
+ goto out;
+
inode_set_flags(inode, i_flags, S_IMMUTABLE);
+out:
inode_unlock(inode);
-
mnt_drop_write_file(file);
-
- return 0;
+ return error;
}
static long
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 33db13365c5e..547c165299c0 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -1197,7 +1197,7 @@ static int ext2_has_free_blocks(struct ext2_sb_info *sbi)
/*
* Returns 1 if the passed-in block region is valid; 0 if some part overlaps
- * with filesystem metadata blocksi.
+ * with filesystem metadata blocks.
*/
int ext2_data_block_valid(struct ext2_sb_info *sbi, ext2_fsblk_t start_blk,
unsigned int count)
@@ -1212,7 +1212,6 @@ int ext2_data_block_valid(struct ext2_sb_info *sbi, ext2_fsblk_t start_blk,
(start_blk + count >= sbi->s_sb_block))
return 0;
-
return 1;
}
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index a0c5ea91fcd4..fda7d3f5b4be 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -172,9 +172,7 @@ static void ext2_preread_inode(struct inode *inode)
struct backing_dev_info *bdi;
bdi = inode_to_bdi(inode);
- if (bdi_read_congested(bdi))
- return;
- if (bdi_write_congested(bdi))
+ if (bdi_rw_congested(bdi))
return;
block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
@@ -511,6 +509,7 @@ repeat_in_this_group:
/*
* Scanned all blockgroups.
*/
+ brelse(bitmap_bh);
err = -ENOSPC;
goto fail;
got:
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index e474127dd255..7004ce581a32 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1400,7 +1400,7 @@ void ext2_set_file_ops(struct inode *inode)
struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
{
struct ext2_inode_info *ei;
- struct buffer_head * bh;
+ struct buffer_head * bh = NULL;
struct ext2_inode *raw_inode;
struct inode *inode;
long ret = -EIO;
@@ -1446,7 +1446,6 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
*/
if (inode->i_nlink == 0 && (inode->i_mode == 0 || ei->i_dtime)) {
/* this inode is deleted */
- brelse (bh);
ret = -ESTALE;
goto bad_inode;
}
@@ -1463,7 +1462,6 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
!ext2_data_block_valid(EXT2_SB(sb), ei->i_file_acl, 1)) {
ext2_error(sb, "ext2_iget", "bad extended attribute block %u",
ei->i_file_acl);
- brelse(bh);
ret = -EFSCORRUPTED;
goto bad_inode;
}
@@ -1526,6 +1524,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
return inode;
bad_inode:
+ brelse(bh);
iget_failed(inode);
return ERR_PTR(ret);
}
@@ -1640,7 +1639,7 @@ int ext2_write_inode(struct inode *inode, struct writeback_control *wbc)
}
int ext2_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int query_falgs)
+ u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
struct ext2_inode_info *ei = EXT2_I(inode);
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 0367c0039e68..1b853fb0b163 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -60,18 +60,10 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
}
oldflags = ei->i_flags;
- /*
- * The IMMUTABLE and APPEND_ONLY flags can only be changed by
- * the relevant capability.
- *
- * This test looks nicer. Thanks to Pauline Middelink
- */
- if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
- if (!capable(CAP_LINUX_IMMUTABLE)) {
- inode_unlock(inode);
- ret = -EPERM;
- goto setflags_out;
- }
+ ret = vfs_ioc_setflags_prepare(inode, oldflags, flags);
+ if (ret) {
+ inode_unlock(inode);
+ goto setflags_out;
}
flags = flags & EXT2_FL_USER_MODIFIABLE;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 1d7ab73b1014..44eb6e7eb492 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -303,16 +303,16 @@ static int ext2_show_options(struct seq_file *seq, struct dentry *root)
if (test_opt(sb, NOBH))
seq_puts(seq, ",nobh");
- if (sbi->s_mount_opt & EXT2_MOUNT_USRQUOTA)
+ if (test_opt(sb, USRQUOTA))
seq_puts(seq, ",usrquota");
- if (sbi->s_mount_opt & EXT2_MOUNT_GRPQUOTA)
+ if (test_opt(sb, GRPQUOTA))
seq_puts(seq, ",grpquota");
- if (sbi->s_mount_opt & EXT2_MOUNT_XIP)
+ if (test_opt(sb, XIP))
seq_puts(seq, ",xip");
- if (sbi->s_mount_opt & EXT2_MOUNT_DAX)
+ if (test_opt(sb, DAX))
seq_puts(seq, ",dax");
if (!test_opt(sb, RESERVATION))
@@ -935,8 +935,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_resgid = opts.s_resgid;
sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
- ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
- SB_POSIXACL : 0);
+ (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
sb->s_iflags |= SB_I_CGROUPWB;
if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV &&
@@ -967,11 +966,11 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
- if (sbi->s_mount_opt & EXT2_MOUNT_DAX) {
+ if (test_opt(sb, DAX)) {
if (!bdev_dax_supported(sb->s_bdev, blocksize)) {
ext2_msg(sb, KERN_ERR,
"DAX unsupported by block device. Turning off DAX.");
- sbi->s_mount_opt &= ~EXT2_MOUNT_DAX;
+ clear_opt(sbi->s_mount_opt, DAX);
}
}
@@ -1404,7 +1403,7 @@ out_set:
sbi->s_resuid = new_opts.s_resuid;
sbi->s_resgid = new_opts.s_resgid;
sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
- ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? SB_POSIXACL : 0);
+ (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
spin_unlock(&sbi->s_lock);
return 0;
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 1e33e0ac8cf1..79369c13cc55 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -134,6 +134,53 @@ ext2_xattr_handler(int name_index)
return handler;
}
+static bool
+ext2_xattr_header_valid(struct ext2_xattr_header *header)
+{
+ if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
+ header->h_blocks != cpu_to_le32(1))
+ return false;
+
+ return true;
+}
+
+static bool
+ext2_xattr_entry_valid(struct ext2_xattr_entry *entry,
+ char *end, size_t end_offs)
+{
+ struct ext2_xattr_entry *next;
+ size_t size;
+
+ next = EXT2_XATTR_NEXT(entry);
+ if ((char *)next >= end)
+ return false;
+
+ if (entry->e_value_block != 0)
+ return false;
+
+ size = le32_to_cpu(entry->e_value_size);
+ if (size > end_offs ||
+ le16_to_cpu(entry->e_value_offs) + size > end_offs)
+ return false;
+
+ return true;
+}
+
+static int
+ext2_xattr_cmp_entry(int name_index, size_t name_len, const char *name,
+ struct ext2_xattr_entry *entry)
+{
+ int cmp;
+
+ cmp = name_index - entry->e_name_index;
+ if (!cmp)
+ cmp = name_len - entry->e_name_len;
+ if (!cmp)
+ cmp = memcmp(name, entry->e_name, name_len);
+
+ return cmp;
+}
+
/*
* ext2_xattr_get()
*
@@ -152,7 +199,7 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
struct ext2_xattr_entry *entry;
size_t name_len, size;
char *end;
- int error;
+ int error, not_found;
struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
@@ -176,9 +223,9 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
ea_bdebug(bh, "b_count=%d, refcount=%d",
atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
end = bh->b_data + bh->b_size;
- if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
- HDR(bh)->h_blocks != cpu_to_le32(1)) {
-bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
+ if (!ext2_xattr_header_valid(HDR(bh))) {
+bad_block:
+ ext2_error(inode->i_sb, "ext2_xattr_get",
"inode %ld: bad block %d", inode->i_ino,
EXT2_I(inode)->i_file_acl);
error = -EIO;
@@ -188,29 +235,25 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
/* find named attribute */
entry = FIRST_ENTRY(bh);
while (!IS_LAST_ENTRY(entry)) {
- struct ext2_xattr_entry *next =
- EXT2_XATTR_NEXT(entry);
- if ((char *)next >= end)
+ if (!ext2_xattr_entry_valid(entry, end,
+ inode->i_sb->s_blocksize))
goto bad_block;
- if (name_index == entry->e_name_index &&
- name_len == entry->e_name_len &&
- memcmp(name, entry->e_name, name_len) == 0)
+
+ not_found = ext2_xattr_cmp_entry(name_index, name_len, name,
+ entry);
+ if (!not_found)
goto found;
- entry = next;
+ if (not_found < 0)
+ break;
+
+ entry = EXT2_XATTR_NEXT(entry);
}
if (ext2_xattr_cache_insert(ea_block_cache, bh))
ea_idebug(inode, "cache insert failed");
error = -ENODATA;
goto cleanup;
found:
- /* check the buffer size */
- if (entry->e_value_block != 0)
- goto bad_block;
size = le32_to_cpu(entry->e_value_size);
- if (size > inode->i_sb->s_blocksize ||
- le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
- goto bad_block;
-
if (ext2_xattr_cache_insert(ea_block_cache, bh))
ea_idebug(inode, "cache insert failed");
if (buffer) {
@@ -266,9 +309,9 @@ ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
ea_bdebug(bh, "b_count=%d, refcount=%d",
atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
end = bh->b_data + bh->b_size;
- if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
- HDR(bh)->h_blocks != cpu_to_le32(1)) {
-bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
+ if (!ext2_xattr_header_valid(HDR(bh))) {
+bad_block:
+ ext2_error(inode->i_sb, "ext2_xattr_list",
"inode %ld: bad block %d", inode->i_ino,
EXT2_I(inode)->i_file_acl);
error = -EIO;
@@ -278,11 +321,10 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
/* check the on-disk data structure */
entry = FIRST_ENTRY(bh);
while (!IS_LAST_ENTRY(entry)) {
- struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(entry);
-
- if ((char *)next >= end)
+ if (!ext2_xattr_entry_valid(entry, end,
+ inode->i_sb->s_blocksize))
goto bad_block;
- entry = next;
+ entry = EXT2_XATTR_NEXT(entry);
}
if (ext2_xattr_cache_insert(ea_block_cache, bh))
ea_idebug(inode, "cache insert failed");
@@ -367,7 +409,7 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name,
struct super_block *sb = inode->i_sb;
struct buffer_head *bh = NULL;
struct ext2_xattr_header *header = NULL;
- struct ext2_xattr_entry *here, *last;
+ struct ext2_xattr_entry *here = NULL, *last = NULL;
size_t name_len, free, min_offs = sb->s_blocksize;
int not_found = 1, error;
char *end;
@@ -406,47 +448,39 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name,
le32_to_cpu(HDR(bh)->h_refcount));
header = HDR(bh);
end = bh->b_data + bh->b_size;
- if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
- header->h_blocks != cpu_to_le32(1)) {
-bad_block: ext2_error(sb, "ext2_xattr_set",
+ if (!ext2_xattr_header_valid(header)) {
+bad_block:
+ ext2_error(sb, "ext2_xattr_set",
"inode %ld: bad block %d", inode->i_ino,
EXT2_I(inode)->i_file_acl);
error = -EIO;
goto cleanup;
}
- /* Find the named attribute. */
- here = FIRST_ENTRY(bh);
- while (!IS_LAST_ENTRY(here)) {
- struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(here);
- if ((char *)next >= end)
- goto bad_block;
- if (!here->e_value_block && here->e_value_size) {
- size_t offs = le16_to_cpu(here->e_value_offs);
- if (offs < min_offs)
- min_offs = offs;
- }
- not_found = name_index - here->e_name_index;
- if (!not_found)
- not_found = name_len - here->e_name_len;
- if (!not_found)
- not_found = memcmp(name, here->e_name,name_len);
- if (not_found <= 0)
- break;
- here = next;
- }
- last = here;
- /* We still need to compute min_offs and last. */
+ /*
+ * Find the named attribute. If not found, 'here' will point
+ * to entry where the new attribute should be inserted to
+ * maintain sorting.
+ */
+ last = FIRST_ENTRY(bh);
while (!IS_LAST_ENTRY(last)) {
- struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(last);
- if ((char *)next >= end)
+ if (!ext2_xattr_entry_valid(last, end, sb->s_blocksize))
goto bad_block;
- if (!last->e_value_block && last->e_value_size) {
+ if (last->e_value_size) {
size_t offs = le16_to_cpu(last->e_value_offs);
if (offs < min_offs)
min_offs = offs;
}
- last = next;
+ if (not_found > 0) {
+ not_found = ext2_xattr_cmp_entry(name_index,
+ name_len,
+ name, last);
+ if (not_found <= 0)
+ here = last;
+ }
+ last = EXT2_XATTR_NEXT(last);
}
+ if (not_found > 0)
+ here = last;
/* Check whether we have enough space left. */
free = min_offs - ((char*)last - (char*)header) - sizeof(__u32);
@@ -454,7 +488,6 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
/* We will use a new extended attribute block. */
free = sb->s_blocksize -
sizeof(struct ext2_xattr_header) - sizeof(__u32);
- here = last = NULL; /* avoid gcc uninitialized warning. */
}
if (not_found) {
@@ -470,14 +503,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
error = -EEXIST;
if (flags & XATTR_CREATE)
goto cleanup;
- if (!here->e_value_block && here->e_value_size) {
- size_t size = le32_to_cpu(here->e_value_size);
-
- if (le16_to_cpu(here->e_value_offs) + size >
- sb->s_blocksize || size > sb->s_blocksize)
- goto bad_block;
- free += EXT2_XATTR_SIZE(size);
- }
+ free += EXT2_XATTR_SIZE(le32_to_cpu(here->e_value_size));
free += EXT2_XATTR_LEN(name_len);
}
error = -ENOSPC;
@@ -506,11 +532,10 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
unlock_buffer(bh);
ea_bdebug(bh, "cloning");
- header = kmalloc(bh->b_size, GFP_KERNEL);
+ header = kmemdup(HDR(bh), bh->b_size, GFP_KERNEL);
error = -ENOMEM;
if (header == NULL)
goto cleanup;
- memcpy(header, HDR(bh), bh->b_size);
header->h_refcount = cpu_to_le32(1);
offset = (char *)here - bh->b_data;
@@ -542,7 +567,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
here->e_name_len = name_len;
memcpy(here->e_name, name, name_len);
} else {
- if (!here->e_value_block && here->e_value_size) {
+ if (here->e_value_size) {
char *first_val = (char *)header + min_offs;
size_t offs = le16_to_cpu(here->e_value_offs);
char *val = (char *)header + offs;
@@ -569,7 +594,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
last = ENTRY(header+1);
while (!IS_LAST_ENTRY(last)) {
size_t o = le16_to_cpu(last->e_value_offs);
- if (!last->e_value_block && o < offs)
+ if (o < offs)
last->e_value_offs =
cpu_to_le16(o + size);
last = EXT2_XATTR_NEXT(last);
@@ -784,8 +809,7 @@ ext2_xattr_delete_inode(struct inode *inode)
goto cleanup;
}
ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count)));
- if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
- HDR(bh)->h_blocks != cpu_to_le32(1)) {
+ if (!ext2_xattr_header_valid(HDR(bh))) {
ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
"inode %ld: bad block %d", inode->i_ino,
EXT2_I(inode)->i_file_acl);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index e5d6ee61ff48..0b202e00d93f 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -603,9 +603,9 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
}
/**
- * ext4_should_retry_alloc()
+ * ext4_should_retry_alloc() - check if a block allocation should be retried
* @sb: super block
- * @retries number of attemps has been made
+ * @retries: number of attemps has been made
*
* ext4_should_retry_alloc() is called when ENOSPC is returned, and if
* it is profitable to retry the operation, this function will wait
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index c7843b149a1e..86054f31fe4d 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -33,6 +33,9 @@
static int ext4_dx_readdir(struct file *, struct dir_context *);
/**
+ * is_dx_dir() - check if a directory is using htree indexing
+ * @inode: directory inode
+ *
* Check if the given dir-inode refers to an htree-indexed directory
* (or a directory which could potentially get converted to use htree
* indexing).
@@ -109,7 +112,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
struct buffer_head *bh = NULL;
- int dir_has_error = 0;
struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
if (IS_ENCRYPTED(inode)) {
@@ -145,8 +147,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
return err;
}
- offset = ctx->pos & (sb->s_blocksize - 1);
-
while (ctx->pos < inode->i_size) {
struct ext4_map_blocks map;
@@ -155,9 +155,18 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
goto errout;
}
cond_resched();
+ offset = ctx->pos & (sb->s_blocksize - 1);
map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb);
map.m_len = 1;
err = ext4_map_blocks(NULL, inode, &map, 0);
+ if (err == 0) {
+ /* m_len should never be zero but let's avoid
+ * an infinite loop if it somehow is */
+ if (map.m_len == 0)
+ map.m_len = 1;
+ ctx->pos += map.m_len * sb->s_blocksize;
+ continue;
+ }
if (err > 0) {
pgoff_t index = map.m_pblk >>
(PAGE_SHIFT - inode->i_blkbits);
@@ -176,13 +185,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
}
if (!bh) {
- if (!dir_has_error) {
- EXT4_ERROR_FILE(file, 0,
- "directory contains a "
- "hole at offset %llu",
- (unsigned long long) ctx->pos);
- dir_has_error = 1;
- }
/* corrupt size? Maybe no more blocks to read */
if (ctx->pos > inode->i_blocks << 9)
break;
@@ -192,8 +194,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
/* Check the checksum */
if (!buffer_verified(bh) &&
- !ext4_dirent_csum_verify(inode,
- (struct ext4_dir_entry *)bh->b_data)) {
+ !ext4_dirblock_csum_verify(inode, bh)) {
EXT4_ERROR_FILE(file, 0, "directory fails checksum "
"at offset %llu",
(unsigned long long)ctx->pos);
@@ -674,7 +675,7 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
return memcmp(str, name->name, len);
}
- return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr);
+ return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr, false);
}
static int ext4_d_hash(const struct dentry *dentry, struct qstr *str)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1cb67859e051..bf660aa7a9e0 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -421,7 +421,8 @@ struct flex_groups {
EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL)
/* Flags that are appropriate for regular files (all but dir-specific ones). */
-#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL))
+#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL |\
+ EXT4_PROJINHERIT_FL))
/* Flags that are appropriate for non-directories/regular files. */
#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)
@@ -2077,6 +2078,9 @@ struct ext4_filename {
#ifdef CONFIG_FS_ENCRYPTION
struct fscrypt_str crypto_buf;
#endif
+#ifdef CONFIG_UNICODE
+ struct fscrypt_str cf_name;
+#endif
};
#define fname_name(p) ((p)->disk_name.name)
@@ -2302,6 +2306,12 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
struct ext4_group_desc *gdp);
ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
+#ifdef CONFIG_UNICODE
+extern void ext4_fname_setup_ci_filename(struct inode *dir,
+ const struct qstr *iname,
+ struct fscrypt_str *fname);
+#endif
+
#ifdef CONFIG_FS_ENCRYPTION
static inline void ext4_fname_from_fscrypt_name(struct ext4_filename *dst,
const struct fscrypt_name *src)
@@ -2328,6 +2338,10 @@ static inline int ext4_fname_setup_filename(struct inode *dir,
return err;
ext4_fname_from_fscrypt_name(fname, &name);
+
+#ifdef CONFIG_UNICODE
+ ext4_fname_setup_ci_filename(dir, iname, &fname->cf_name);
+#endif
return 0;
}
@@ -2343,6 +2357,10 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir,
return err;
ext4_fname_from_fscrypt_name(fname, &name);
+
+#ifdef CONFIG_UNICODE
+ ext4_fname_setup_ci_filename(dir, &dentry->d_name, &fname->cf_name);
+#endif
return 0;
}
@@ -2356,6 +2374,11 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname)
fname->crypto_buf.name = NULL;
fname->usr_fname = NULL;
fname->disk_name.name = NULL;
+
+#ifdef CONFIG_UNICODE
+ kfree(fname->cf_name.name);
+ fname->cf_name.name = NULL;
+#endif
}
#else /* !CONFIG_FS_ENCRYPTION */
static inline int ext4_fname_setup_filename(struct inode *dir,
@@ -2366,6 +2389,11 @@ static inline int ext4_fname_setup_filename(struct inode *dir,
fname->usr_fname = iname;
fname->disk_name.name = (unsigned char *) iname->name;
fname->disk_name.len = iname->len;
+
+#ifdef CONFIG_UNICODE
+ ext4_fname_setup_ci_filename(dir, iname, &fname->cf_name);
+#endif
+
return 0;
}
@@ -2376,7 +2404,13 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir,
return ext4_fname_setup_filename(dir, &dentry->d_name, 1, fname);
}
-static inline void ext4_fname_free_filename(struct ext4_filename *fname) { }
+static inline void ext4_fname_free_filename(struct ext4_filename *fname)
+{
+#ifdef CONFIG_UNICODE
+ kfree(fname->cf_name.name);
+ fname->cf_name.name = NULL;
+#endif
+}
#endif /* !CONFIG_FS_ENCRYPTION */
/* dir.c */
@@ -2568,8 +2602,8 @@ extern int ext4_ext_migrate(struct inode *);
extern int ext4_ind_migrate(struct inode *inode);
/* namei.c */
-extern int ext4_dirent_csum_verify(struct inode *inode,
- struct ext4_dir_entry *dirent);
+extern int ext4_dirblock_csum_verify(struct inode *inode,
+ struct buffer_head *bh);
extern int ext4_orphan_add(handle_t *, struct inode *);
extern int ext4_orphan_del(handle_t *, struct inode *);
extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
@@ -3070,11 +3104,11 @@ extern int ext4_try_create_inline_dir(handle_t *handle,
extern int ext4_read_inline_dir(struct file *filp,
struct dir_context *ctx,
int *has_inline_data);
-extern int htree_inlinedir_to_tree(struct file *dir_file,
- struct inode *dir, ext4_lblk_t block,
- struct dx_hash_info *hinfo,
- __u32 start_hash, __u32 start_minor_hash,
- int *has_inline_data);
+extern int ext4_inlinedir_to_tree(struct file *dir_file,
+ struct inode *dir, ext4_lblk_t block,
+ struct dx_hash_info *hinfo,
+ __u32 start_hash, __u32 start_minor_hash,
+ int *has_inline_data);
extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
struct ext4_filename *fname,
struct ext4_dir_entry_2 **res_dir,
@@ -3113,14 +3147,13 @@ extern struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
struct ext4_dir_entry_2 *de,
int blocksize, int csum_size,
unsigned int parent_ino, int dotdot_real_len);
-extern void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
- unsigned int blocksize);
-extern int ext4_handle_dirty_dirent_node(handle_t *handle,
- struct inode *inode,
- struct buffer_head *bh);
+extern void ext4_initialize_dirent_tail(struct buffer_head *bh,
+ unsigned int blocksize);
+extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode,
+ struct buffer_head *bh);
extern int ext4_ci_compare(const struct inode *parent,
- const struct qstr *name,
- const struct qstr *entry);
+ const struct qstr *fname,
+ const struct qstr *entry, bool quick);
#define S_SHIFT 12
static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 75a5309f2231..ef8fcf7d0d3b 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -361,20 +361,20 @@ static inline int ext4_journal_force_commit(journal_t *journal)
}
static inline int ext4_jbd2_inode_add_write(handle_t *handle,
- struct inode *inode)
+ struct inode *inode, loff_t start_byte, loff_t length)
{
if (ext4_handle_valid(handle))
- return jbd2_journal_inode_add_write(handle,
- EXT4_I(inode)->jinode);
+ return jbd2_journal_inode_ranged_write(handle,
+ EXT4_I(inode)->jinode, start_byte, length);
return 0;
}
static inline int ext4_jbd2_inode_add_wait(handle_t *handle,
- struct inode *inode)
+ struct inode *inode, loff_t start_byte, loff_t length)
{
if (ext4_handle_valid(handle))
- return jbd2_journal_inode_add_wait(handle,
- EXT4_I(inode)->jinode);
+ return jbd2_journal_inode_ranged_wait(handle,
+ EXT4_I(inode)->jinode, start_byte, length);
return 0;
}
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index d40ed940001e..92266a2da7d6 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5676,8 +5676,8 @@ out_mutex:
}
/**
- * ext4_swap_extents - Swap extents between two inodes
- *
+ * ext4_swap_extents() - Swap extents between two inodes
+ * @handle: handle for this transaction
* @inode1: First inode
* @inode2: Second inode
* @lblk1: Start block for first inode
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 023a3eb3afa3..7521de2dcf3a 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -1317,7 +1317,6 @@ static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end,
es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk);
if (!es)
goto out_wrap;
- node = &es->rb_node;
while (*nr_to_scan > 0) {
if (es->es_lblk > end) {
ei->i_es_shrink_lblk = end + 1;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 2c5baa5e8291..f4a24a46245e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -165,6 +165,10 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
ret = generic_write_checks(iocb, from);
if (ret <= 0)
return ret;
+
+ if (unlikely(IS_IMMUTABLE(inode)))
+ return -EPERM;
+
/*
* If we have encountered a bitmap-format file, the size limit
* is smaller than s_maxbytes, which is for extent-mapped files.
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 2024d3fa5504..36699a131168 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -294,14 +294,12 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
}
/**
- * ext4_alloc_branch - allocate and set up a chain of blocks.
- * @handle: handle for this transaction
- * @inode: owner
- * @indirect_blks: number of allocated indirect blocks
- * @blks: number of allocated direct blocks
- * @goal: preferred place for allocation
- * @offsets: offsets (in the blocks) to store the pointers to next.
- * @branch: place to store the chain in.
+ * ext4_alloc_branch() - allocate and set up a chain of blocks
+ * @handle: handle for this transaction
+ * @ar: structure describing the allocation request
+ * @indirect_blks: number of allocated indirect blocks
+ * @offsets: offsets (in the blocks) to store the pointers to next.
+ * @branch: place to store the chain in.
*
* This function allocates blocks, zeroes out all but the last one,
* links them into chain and (if we are synchronous) writes them to disk.
@@ -396,15 +394,11 @@ failed:
}
/**
- * ext4_splice_branch - splice the allocated branch onto inode.
+ * ext4_splice_branch() - splice the allocated branch onto inode.
* @handle: handle for this transaction
- * @inode: owner
- * @block: (logical) number of block we are adding
- * @chain: chain of indirect blocks (with a missing link - see
- * ext4_alloc_branch)
+ * @ar: structure describing the allocation request
* @where: location of missing link
* @num: number of indirect blocks we are adding
- * @blks: number of direct blocks we are adding
*
* This function fills the missing link and does all housekeeping needed in
* inode (->i_blocks, etc.). In case of success we end up with the full
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index f73bc3925282..88cdf3c90bd1 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1132,7 +1132,6 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
{
int err, csum_size = 0, header_size = 0;
struct ext4_dir_entry_2 *de;
- struct ext4_dir_entry_tail *t;
void *target = dir_block->b_data;
/*
@@ -1158,13 +1157,11 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
inline_size - EXT4_INLINE_DOTDOT_SIZE + header_size,
inode->i_sb->s_blocksize - csum_size);
- if (csum_size) {
- t = EXT4_DIRENT_TAIL(dir_block->b_data,
- inode->i_sb->s_blocksize);
- initialize_dirent_tail(t, inode->i_sb->s_blocksize);
- }
+ if (csum_size)
+ ext4_initialize_dirent_tail(dir_block,
+ inode->i_sb->s_blocksize);
set_buffer_uptodate(dir_block);
- err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
+ err = ext4_handle_dirty_dirblock(handle, inode, dir_block);
if (err)
return err;
set_buffer_verified(dir_block);
@@ -1327,11 +1324,11 @@ out:
* inlined dir. It returns the number directory entries loaded
* into the tree. If there is an error it is returned in err.
*/
-int htree_inlinedir_to_tree(struct file *dir_file,
- struct inode *dir, ext4_lblk_t block,
- struct dx_hash_info *hinfo,
- __u32 start_hash, __u32 start_minor_hash,
- int *has_inline_data)
+int ext4_inlinedir_to_tree(struct file *dir_file,
+ struct inode *dir, ext4_lblk_t block,
+ struct dx_hash_info *hinfo,
+ __u32 start_hash, __u32 start_minor_hash,
+ int *has_inline_data)
{
int err = 0, count = 0;
unsigned int parent_ino;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c7f77c643008..420fe3deed39 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -731,10 +731,16 @@ out_sem:
!(flags & EXT4_GET_BLOCKS_ZERO) &&
!ext4_is_quota_file(inode) &&
ext4_should_order_data(inode)) {
+ loff_t start_byte =
+ (loff_t)map->m_lblk << inode->i_blkbits;
+ loff_t length = (loff_t)map->m_len << inode->i_blkbits;
+
if (flags & EXT4_GET_BLOCKS_IO_SUBMIT)
- ret = ext4_jbd2_inode_add_wait(handle, inode);
+ ret = ext4_jbd2_inode_add_wait(handle, inode,
+ start_byte, length);
else
- ret = ext4_jbd2_inode_add_write(handle, inode);
+ ret = ext4_jbd2_inode_add_write(handle, inode,
+ start_byte, length);
if (ret)
return ret;
}
@@ -1164,8 +1170,9 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
int err = 0;
unsigned blocksize = inode->i_sb->s_blocksize;
unsigned bbits;
- struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
- bool decrypt = false;
+ struct buffer_head *bh, *head, *wait[2];
+ int nr_wait = 0;
+ int i;
BUG_ON(!PageLocked(page));
BUG_ON(from > PAGE_SIZE);
@@ -1217,23 +1224,32 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
!buffer_unwritten(bh) &&
(block_start < from || block_end > to)) {
ll_rw_block(REQ_OP_READ, 0, 1, &bh);
- *wait_bh++ = bh;
- decrypt = IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode);
+ wait[nr_wait++] = bh;
}
}
/*
* If we issued read requests, let them complete.
*/
- while (wait_bh > wait) {
- wait_on_buffer(*--wait_bh);
- if (!buffer_uptodate(*wait_bh))
+ for (i = 0; i < nr_wait; i++) {
+ wait_on_buffer(wait[i]);
+ if (!buffer_uptodate(wait[i]))
err = -EIO;
}
- if (unlikely(err))
+ if (unlikely(err)) {
page_zero_new_buffers(page, from, to);
- else if (decrypt)
- err = fscrypt_decrypt_page(page->mapping->host, page,
- PAGE_SIZE, 0, page->index);
+ } else if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) {
+ for (i = 0; i < nr_wait; i++) {
+ int err2;
+
+ err2 = fscrypt_decrypt_pagecache_blocks(page, blocksize,
+ bh_offset(wait[i]));
+ if (err2) {
+ clear_buffer_uptodate(wait[i]);
+ err = err2;
+ }
+ }
+ }
+
return err;
}
#endif
@@ -4065,9 +4081,8 @@ static int __ext4_block_zero_page_range(handle_t *handle,
if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode)) {
/* We expect the key to be set. */
BUG_ON(!fscrypt_has_encryption_key(inode));
- BUG_ON(blocksize != PAGE_SIZE);
- WARN_ON_ONCE(fscrypt_decrypt_page(page->mapping->host,
- page, PAGE_SIZE, 0, page->index));
+ WARN_ON_ONCE(fscrypt_decrypt_pagecache_blocks(
+ page, blocksize, bh_offset(bh)));
}
}
if (ext4_should_journal_data(inode)) {
@@ -4085,7 +4100,8 @@ static int __ext4_block_zero_page_range(handle_t *handle,
err = 0;
mark_buffer_dirty(bh);
if (ext4_should_order_data(inode))
- err = ext4_jbd2_inode_add_write(handle, inode);
+ err = ext4_jbd2_inode_add_write(handle, inode, from,
+ length);
}
unlock:
@@ -4570,6 +4586,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
struct buffer_head *bh;
struct super_block *sb = inode->i_sb;
ext4_fsblk_t block;
+ struct blk_plug plug;
int inodes_per_block, inode_offset;
iloc->bh = NULL;
@@ -4658,6 +4675,7 @@ make_io:
* If we need to do any I/O, try to pre-readahead extra
* blocks from the inode table.
*/
+ blk_start_plug(&plug);
if (EXT4_SB(sb)->s_inode_readahead_blks) {
ext4_fsblk_t b, end, table;
unsigned num;
@@ -4688,6 +4706,7 @@ make_io:
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh);
+ blk_finish_plug(&plug);
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
EXT4_ERROR_INODE_BLOCK(inode, block,
@@ -5520,6 +5539,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
+ if (unlikely(IS_IMMUTABLE(inode)))
+ return -EPERM;
+
+ if (unlikely(IS_APPEND(inode) &&
+ (ia_valid & (ATTR_MODE | ATTR_UID |
+ ATTR_GID | ATTR_TIMES_SET))))
+ return -EPERM;
+
error = setattr_prepare(dentry, attr);
if (error)
return error;
@@ -5571,7 +5598,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (attr->ia_valid & ATTR_SIZE) {
handle_t *handle;
loff_t oldsize = inode->i_size;
- int shrink = (attr->ia_size <= inode->i_size);
+ int shrink = (attr->ia_size < inode->i_size);
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -5585,18 +5612,33 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size)
inode_inc_iversion(inode);
- if (ext4_should_order_data(inode) &&
- (attr->ia_size < inode->i_size)) {
- error = ext4_begin_ordered_truncate(inode,
+ if (shrink) {
+ if (ext4_should_order_data(inode)) {
+ error = ext4_begin_ordered_truncate(inode,
attr->ia_size);
- if (error)
- goto err_out;
+ if (error)
+ goto err_out;
+ }
+ /*
+ * Blocks are going to be removed from the inode. Wait
+ * for dio in flight.
+ */
+ inode_dio_wait(inode);
+ }
+
+ down_write(&EXT4_I(inode)->i_mmap_sem);
+
+ rc = ext4_break_layouts(inode);
+ if (rc) {
+ up_write(&EXT4_I(inode)->i_mmap_sem);
+ return rc;
}
+
if (attr->ia_size != inode->i_size) {
handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
- goto err_out;
+ goto out_mmap_sem;
}
if (ext4_handle_valid(handle) && shrink) {
error = ext4_orphan_add(handle, inode);
@@ -5624,42 +5666,31 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
i_size_write(inode, attr->ia_size);
up_write(&EXT4_I(inode)->i_data_sem);
ext4_journal_stop(handle);
- if (error) {
- if (orphan && inode->i_nlink)
- ext4_orphan_del(NULL, inode);
- goto err_out;
+ if (error)
+ goto out_mmap_sem;
+ if (!shrink) {
+ pagecache_isize_extended(inode, oldsize,
+ inode->i_size);
+ } else if (ext4_should_journal_data(inode)) {
+ ext4_wait_for_tail_page_commit(inode);
}
}
- if (!shrink) {
- pagecache_isize_extended(inode, oldsize, inode->i_size);
- } else {
- /*
- * Blocks are going to be removed from the inode. Wait
- * for dio in flight.
- */
- inode_dio_wait(inode);
- }
- if (orphan && ext4_should_journal_data(inode))
- ext4_wait_for_tail_page_commit(inode);
- down_write(&EXT4_I(inode)->i_mmap_sem);
-
- rc = ext4_break_layouts(inode);
- if (rc) {
- up_write(&EXT4_I(inode)->i_mmap_sem);
- error = rc;
- goto err_out;
- }
/*
* Truncate pagecache after we've waited for commit
* in data=journal mode to make pages freeable.
*/
truncate_pagecache(inode, inode->i_size);
- if (shrink) {
+ /*
+ * Call ext4_truncate() even if i_size didn't change to
+ * truncate possible preallocated blocks.
+ */
+ if (attr->ia_size <= oldsize) {
rc = ext4_truncate(inode);
if (rc)
error = rc;
}
+out_mmap_sem:
up_write(&EXT4_I(inode)->i_mmap_sem);
}
@@ -6190,6 +6221,9 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
get_block_t *get_block;
int retries = 0;
+ if (unlikely(IS_IMMUTABLE(inode)))
+ return VM_FAULT_SIGBUS;
+
sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index e486e49b31ed..442f7ef873fc 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -269,6 +269,29 @@ static int uuid_is_zero(__u8 u[16])
}
#endif
+/*
+ * If immutable is set and we are not clearing it, we're not allowed to change
+ * anything else in the inode. Don't error out if we're only trying to set
+ * immutable on an immutable file.
+ */
+static int ext4_ioctl_check_immutable(struct inode *inode, __u32 new_projid,
+ unsigned int flags)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ unsigned int oldflags = ei->i_flags;
+
+ if (!(oldflags & EXT4_IMMUTABLE_FL) || !(flags & EXT4_IMMUTABLE_FL))
+ return 0;
+
+ if ((oldflags & ~EXT4_IMMUTABLE_FL) != (flags & ~EXT4_IMMUTABLE_FL))
+ return -EPERM;
+ if (ext4_has_feature_project(inode->i_sb) &&
+ __kprojid_val(ei->i_projid) != new_projid)
+ return -EPERM;
+
+ return 0;
+}
+
static int ext4_ioctl_setflags(struct inode *inode,
unsigned int flags)
{
@@ -289,16 +312,9 @@ static int ext4_ioctl_setflags(struct inode *inode,
/* The JOURNAL_DATA flag is modifiable only by root */
jflag = flags & EXT4_JOURNAL_DATA_FL;
- /*
- * The IMMUTABLE and APPEND_ONLY flags can only be changed by
- * the relevant capability.
- *
- * This test looks nicer. Thanks to Pauline Middelink
- */
- if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
- if (!capable(CAP_LINUX_IMMUTABLE))
- goto flags_out;
- }
+ err = vfs_ioc_setflags_prepare(inode, oldflags, flags);
+ if (err)
+ goto flags_out;
/*
* The JOURNAL_DATA flag can only be changed by
@@ -340,6 +356,20 @@ static int ext4_ioctl_setflags(struct inode *inode,
}
}
+ /*
+ * Wait for all pending directio and then flush all the dirty pages
+ * for this file. The flush marks all the pages readonly, so any
+ * subsequent attempt to write to the file (particularly mmap pages)
+ * will come through the filesystem and fail.
+ */
+ if (S_ISREG(inode->i_mode) && !IS_IMMUTABLE(inode) &&
+ (flags & EXT4_IMMUTABLE_FL)) {
+ inode_dio_wait(inode);
+ err = filemap_write_and_wait(inode->i_mapping);
+ if (err)
+ goto flags_out;
+ }
+
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
@@ -704,28 +734,15 @@ group_add_out:
return err;
}
-static int ext4_ioctl_check_project(struct inode *inode, struct fsxattr *fa)
+static void ext4_fill_fsxattr(struct inode *inode, struct fsxattr *fa)
{
- /*
- * Project Quota ID state is only allowed to change from within the init
- * namespace. Enforce that restriction only if we are trying to change
- * the quota ID state. Everything else is allowed in user namespaces.
- */
- if (current_user_ns() == &init_user_ns)
- return 0;
-
- if (__kprojid_val(EXT4_I(inode)->i_projid) != fa->fsx_projid)
- return -EINVAL;
+ struct ext4_inode_info *ei = EXT4_I(inode);
- if (ext4_test_inode_flag(inode, EXT4_INODE_PROJINHERIT)) {
- if (!(fa->fsx_xflags & FS_XFLAG_PROJINHERIT))
- return -EINVAL;
- } else {
- if (fa->fsx_xflags & FS_XFLAG_PROJINHERIT)
- return -EINVAL;
- }
+ simple_fill_fsxattr(fa, ext4_iflags_to_xflags(ei->i_flags &
+ EXT4_FL_USER_VISIBLE));
- return 0;
+ if (ext4_has_feature_project(inode->i_sb))
+ fa->fsx_projid = from_kprojid(&init_user_ns, ei->i_projid);
}
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
@@ -742,6 +759,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return ext4_ioc_getfsmap(sb, (void __user *)arg);
case EXT4_IOC_GETFLAGS:
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
+ if (S_ISREG(inode->i_mode))
+ flags &= ~EXT4_PROJINHERIT_FL;
return put_user(flags, (int __user *) arg);
case EXT4_IOC_SETFLAGS: {
int err;
@@ -769,7 +788,11 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return err;
inode_lock(inode);
- err = ext4_ioctl_setflags(inode, flags);
+ err = ext4_ioctl_check_immutable(inode,
+ from_kprojid(&init_user_ns, ei->i_projid),
+ flags);
+ if (!err)
+ err = ext4_ioctl_setflags(inode, flags);
inode_unlock(inode);
mnt_drop_write_file(filp);
return err;
@@ -1096,13 +1119,7 @@ resizefs_out:
{
struct fsxattr fa;
- memset(&fa, 0, sizeof(struct fsxattr));
- fa.fsx_xflags = ext4_iflags_to_xflags(ei->i_flags & EXT4_FL_USER_VISIBLE);
-
- if (ext4_has_feature_project(inode->i_sb)) {
- fa.fsx_projid = (__u32)from_kprojid(&init_user_ns,
- EXT4_I(inode)->i_projid);
- }
+ ext4_fill_fsxattr(inode, &fa);
if (copy_to_user((struct fsxattr __user *)arg,
&fa, sizeof(fa)))
@@ -1111,7 +1128,7 @@ resizefs_out:
}
case EXT4_IOC_FSSETXATTR:
{
- struct fsxattr fa;
+ struct fsxattr fa, old_fa;
int err;
if (copy_from_user(&fa, (struct fsxattr __user *)arg,
@@ -1134,11 +1151,15 @@ resizefs_out:
return err;
inode_lock(inode);
- err = ext4_ioctl_check_project(inode, &fa);
+ ext4_fill_fsxattr(inode, &old_fa);
+ err = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa);
if (err)
goto out;
flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) |
(flags & EXT4_FL_XFLAG_VISIBLE);
+ err = ext4_ioctl_check_immutable(inode, fa.fsx_projid, flags);
+ if (err)
+ goto out;
err = ext4_ioctl_setflags(inode, flags);
if (err)
goto out;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 99ba720dbb7a..a3e2767bdf2f 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4696,8 +4696,9 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
* ext4_free_blocks() -- Free given blocks and update quota
* @handle: handle for this transaction
* @inode: inode
- * @block: start physical block to free
- * @count: number of blocks to count
+ * @bh: optional buffer of the block to be freed
+ * @block: starting physical block to be freed
+ * @count: number of blocks to be freed
* @flags: flags used by ext4_free_blocks
*/
void ext4_free_blocks(handle_t *handle, struct inode *inode,
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 1083a9f3f16a..30ce3dc69378 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -13,11 +13,10 @@
#include "ext4_extents.h"
/**
- * get_ext_path - Find an extent path for designated logical block number.
- *
- * @inode: an inode which is searched
+ * get_ext_path() - Find an extent path for designated logical block number.
+ * @inode: inode to be searched
* @lblock: logical block number to find an extent path
- * @path: pointer to an extent path pointer (for output)
+ * @ppath: pointer to an extent path pointer (for output)
*
* ext4_find_extent wrapper. Return 0 on success, or a negative error value
* on failure.
@@ -42,8 +41,9 @@ get_ext_path(struct inode *inode, ext4_lblk_t lblock,
}
/**
- * ext4_double_down_write_data_sem - Acquire two inodes' write lock
- * of i_data_sem
+ * ext4_double_down_write_data_sem() - write lock two inodes's i_data_sem
+ * @first: inode to be locked
+ * @second: inode to be locked
*
* Acquire write lock of i_data_sem of the two inodes
*/
@@ -390,7 +390,8 @@ data_copy:
/* Even in case of data=writeback it is reasonable to pin
* inode to transaction, to prevent unexpected data loss */
- *err = ext4_jbd2_inode_add_write(handle, orig_inode);
+ *err = ext4_jbd2_inode_add_write(handle, orig_inode,
+ (loff_t)orig_page_offset << PAGE_SHIFT, replaced_size);
unlock_pages:
unlock_page(pagep[0]);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index cd01c4a67ffb..129029534075 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -82,8 +82,18 @@ static struct buffer_head *ext4_append(handle_t *handle,
static int ext4_dx_csum_verify(struct inode *inode,
struct ext4_dir_entry *dirent);
+/*
+ * Hints to ext4_read_dirblock regarding whether we expect a directory
+ * block being read to be an index block, or a block containing
+ * directory entries (and if the latter, whether it was found via a
+ * logical block in an htree index block). This is used to control
+ * what sort of sanity checkinig ext4_read_dirblock() will do on the
+ * directory block read from the storage device. EITHER will means
+ * the caller doesn't know what kind of directory block will be read,
+ * so no specific verification will be done.
+ */
typedef enum {
- EITHER, INDEX, DIRENT
+ EITHER, INDEX, DIRENT, DIRENT_HTREE
} dirblock_type_t;
#define ext4_read_dirblock(inode, block, type) \
@@ -109,11 +119,14 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
return bh;
}
- if (!bh) {
+ if (!bh && (type == INDEX || type == DIRENT_HTREE)) {
ext4_error_inode(inode, func, line, block,
- "Directory hole found");
+ "Directory hole found for htree %s block",
+ (type == INDEX) ? "index" : "leaf");
return ERR_PTR(-EFSCORRUPTED);
}
+ if (!bh)
+ return NULL;
dirent = (struct ext4_dir_entry *) bh->b_data;
/* Determine whether or not we have an index block */
if (is_dx(inode)) {
@@ -150,7 +163,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
}
}
if (!is_dx_block) {
- if (ext4_dirent_csum_verify(inode, dirent))
+ if (ext4_dirblock_csum_verify(inode, bh))
set_buffer_verified(bh);
else {
ext4_error_inode(inode, func, line, block,
@@ -280,9 +293,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
struct inode *dir, struct inode *inode);
/* checksumming functions */
-void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
- unsigned int blocksize)
+void ext4_initialize_dirent_tail(struct buffer_head *bh,
+ unsigned int blocksize)
{
+ struct ext4_dir_entry_tail *t = EXT4_DIRENT_TAIL(bh->b_data, blocksize);
+
memset(t, 0, sizeof(struct ext4_dir_entry_tail));
t->det_rec_len = ext4_rec_len_to_disk(
sizeof(struct ext4_dir_entry_tail), blocksize);
@@ -291,17 +306,17 @@ void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
/* Walk through a dirent block to find a checksum "dirent" at the tail */
static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
- struct ext4_dir_entry *de)
+ struct buffer_head *bh)
{
struct ext4_dir_entry_tail *t;
#ifdef PARANOID
struct ext4_dir_entry *d, *top;
- d = de;
- top = (struct ext4_dir_entry *)(((void *)de) +
+ d = (struct ext4_dir_entry *)bh->b_data;
+ top = (struct ext4_dir_entry *)(bh->b_data +
(EXT4_BLOCK_SIZE(inode->i_sb) -
- sizeof(struct ext4_dir_entry_tail)));
+ sizeof(struct ext4_dir_entry_tail)));
while (d < top && d->rec_len)
d = (struct ext4_dir_entry *)(((void *)d) +
le16_to_cpu(d->rec_len));
@@ -311,7 +326,7 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
t = (struct ext4_dir_entry_tail *)d;
#else
- t = EXT4_DIRENT_TAIL(de, EXT4_BLOCK_SIZE(inode->i_sb));
+ t = EXT4_DIRENT_TAIL(bh->b_data, EXT4_BLOCK_SIZE(inode->i_sb));
#endif
if (t->det_reserved_zero1 ||
@@ -323,8 +338,7 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
return t;
}
-static __le32 ext4_dirent_csum(struct inode *inode,
- struct ext4_dir_entry *dirent, int size)
+static __le32 ext4_dirblock_csum(struct inode *inode, void *dirent, int size)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode);
@@ -344,49 +358,49 @@ static void __warn_no_space_for_csum(struct inode *inode, const char *func,
"No space for directory leaf checksum. Please run e2fsck -D.");
}
-int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent)
+int ext4_dirblock_csum_verify(struct inode *inode, struct buffer_head *bh)
{
struct ext4_dir_entry_tail *t;
if (!ext4_has_metadata_csum(inode->i_sb))
return 1;
- t = get_dirent_tail(inode, dirent);
+ t = get_dirent_tail(inode, bh);
if (!t) {
warn_no_space_for_csum(inode);
return 0;
}
- if (t->det_checksum != ext4_dirent_csum(inode, dirent,
- (void *)t - (void *)dirent))
+ if (t->det_checksum != ext4_dirblock_csum(inode, bh->b_data,
+ (char *)t - bh->b_data))
return 0;
return 1;
}
-static void ext4_dirent_csum_set(struct inode *inode,
- struct ext4_dir_entry *dirent)
+static void ext4_dirblock_csum_set(struct inode *inode,
+ struct buffer_head *bh)
{
struct ext4_dir_entry_tail *t;
if (!ext4_has_metadata_csum(inode->i_sb))
return;
- t = get_dirent_tail(inode, dirent);
+ t = get_dirent_tail(inode, bh);
if (!t) {
warn_no_space_for_csum(inode);
return;
}
- t->det_checksum = ext4_dirent_csum(inode, dirent,
- (void *)t - (void *)dirent);
+ t->det_checksum = ext4_dirblock_csum(inode, bh->b_data,
+ (char *)t - bh->b_data);
}
-int ext4_handle_dirty_dirent_node(handle_t *handle,
- struct inode *inode,
- struct buffer_head *bh)
+int ext4_handle_dirty_dirblock(handle_t *handle,
+ struct inode *inode,
+ struct buffer_head *bh)
{
- ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
+ ext4_dirblock_csum_set(inode, bh);
return ext4_handle_dirty_metadata(handle, inode, bh);
}
@@ -980,7 +994,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
(unsigned long)block));
- bh = ext4_read_dirblock(dir, block, DIRENT);
+ bh = ext4_read_dirblock(dir, block, DIRENT_HTREE);
if (IS_ERR(bh))
return PTR_ERR(bh);
@@ -1090,10 +1104,10 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
if (ext4_has_inline_data(dir)) {
int has_inline_data = 1;
- count = htree_inlinedir_to_tree(dir_file, dir, 0,
- &hinfo, start_hash,
- start_minor_hash,
- &has_inline_data);
+ count = ext4_inlinedir_to_tree(dir_file, dir, 0,
+ &hinfo, start_hash,
+ start_minor_hash,
+ &has_inline_data);
if (has_inline_data) {
*next_hash = ~0;
return count;
@@ -1259,19 +1273,24 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
#ifdef CONFIG_UNICODE
/*
* Test whether a case-insensitive directory entry matches the filename
- * being searched for.
+ * being searched for. If quick is set, assume the name being looked up
+ * is already in the casefolded form.
*
* Returns: 0 if the directory entry matches, more than 0 if it
* doesn't match or less than zero on error.
*/
int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
- const struct qstr *entry)
+ const struct qstr *entry, bool quick)
{
const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb);
const struct unicode_map *um = sbi->s_encoding;
int ret;
- ret = utf8_strncasecmp(um, name, entry);
+ if (quick)
+ ret = utf8_strncasecmp_folded(um, name, entry);
+ else
+ ret = utf8_strncasecmp(um, name, entry);
+
if (ret < 0) {
/* Handle invalid character sequence as either an error
* or as an opaque byte sequence.
@@ -1287,6 +1306,32 @@ int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
return ret;
}
+
+void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
+ struct fscrypt_str *cf_name)
+{
+ int len;
+
+ if (!IS_CASEFOLDED(dir)) {
+ cf_name->name = NULL;
+ return;
+ }
+
+ cf_name->name = kmalloc(EXT4_NAME_LEN, GFP_NOFS);
+ if (!cf_name->name)
+ return;
+
+ len = utf8_casefold(EXT4_SB(dir->i_sb)->s_encoding,
+ iname, cf_name->name,
+ EXT4_NAME_LEN);
+ if (len <= 0) {
+ kfree(cf_name->name);
+ cf_name->name = NULL;
+ return;
+ }
+ cf_name->len = (unsigned) len;
+
+}
#endif
/*
@@ -1313,8 +1358,15 @@ static inline bool ext4_match(const struct inode *parent,
#endif
#ifdef CONFIG_UNICODE
- if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent))
- return (ext4_ci_compare(parent, fname->usr_fname, &entry) == 0);
+ if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent)) {
+ if (fname->cf_name.name) {
+ struct qstr cf = {.name = fname->cf_name.name,
+ .len = fname->cf_name.len};
+ return !ext4_ci_compare(parent, &cf, &entry, true);
+ }
+ return !ext4_ci_compare(parent, fname->usr_fname, &entry,
+ false);
+ }
#endif
return fscrypt_match_name(&f, de->name, de->name_len);
@@ -1484,8 +1536,7 @@ restart:
if (!buffer_verified(bh) &&
!is_dx_internal_node(dir, block,
(struct ext4_dir_entry *)bh->b_data) &&
- !ext4_dirent_csum_verify(dir,
- (struct ext4_dir_entry *)bh->b_data)) {
+ !ext4_dirblock_csum_verify(dir, bh)) {
EXT4_ERROR_INODE(dir, "checksumming directory "
"block %lu", (unsigned long)block);
brelse(bh);
@@ -1586,7 +1637,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
return (struct buffer_head *) frame;
do {
block = dx_get_block(frame->at);
- bh = ext4_read_dirblock(dir, block, DIRENT);
+ bh = ext4_read_dirblock(dir, block, DIRENT_HTREE);
if (IS_ERR(bh))
goto errout;
@@ -1769,7 +1820,6 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
char *data1 = (*bh)->b_data, *data2;
unsigned split, move, size;
struct ext4_dir_entry_2 *de = NULL, *de2;
- struct ext4_dir_entry_tail *t;
int csum_size = 0;
int err = 0, i;
@@ -1830,11 +1880,8 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
(char *) de2,
blocksize);
if (csum_size) {
- t = EXT4_DIRENT_TAIL(data2, blocksize);
- initialize_dirent_tail(t, blocksize);
-
- t = EXT4_DIRENT_TAIL(data1, blocksize);
- initialize_dirent_tail(t, blocksize);
+ ext4_initialize_dirent_tail(*bh, blocksize);
+ ext4_initialize_dirent_tail(bh2, blocksize);
}
dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data1,
@@ -1848,7 +1895,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
de = de2;
}
dx_insert_block(frame, hash2 + continued, newblock);
- err = ext4_handle_dirty_dirent_node(handle, dir, bh2);
+ err = ext4_handle_dirty_dirblock(handle, dir, bh2);
if (err)
goto journal_error;
err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
@@ -1976,7 +2023,7 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
inode_inc_iversion(dir);
ext4_mark_inode_dirty(handle, dir);
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_dirent_node(handle, dir, bh);
+ err = ext4_handle_dirty_dirblock(handle, dir, bh);
if (err)
ext4_std_error(dir->i_sb, err);
return 0;
@@ -1995,8 +2042,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
struct dx_entry *entries;
struct ext4_dir_entry_2 *de, *de2;
- struct ext4_dir_entry_tail *t;
- char *data1, *top;
+ char *data2, *top;
unsigned len;
int retval;
unsigned blocksize;
@@ -2036,21 +2082,18 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
return PTR_ERR(bh2);
}
ext4_set_inode_flag(dir, EXT4_INODE_INDEX);
- data1 = bh2->b_data;
+ data2 = bh2->b_data;
- memcpy (data1, de, len);
- de = (struct ext4_dir_entry_2 *) data1;
- top = data1 + len;
+ memcpy(data2, de, len);
+ de = (struct ext4_dir_entry_2 *) data2;
+ top = data2 + len;
while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top)
de = de2;
- de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
- (char *) de,
- blocksize);
+ de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) -
+ (char *) de, blocksize);
- if (csum_size) {
- t = EXT4_DIRENT_TAIL(data1, blocksize);
- initialize_dirent_tail(t, blocksize);
- }
+ if (csum_size)
+ ext4_initialize_dirent_tail(bh2, blocksize);
/* Initialize the root; the dot dirents already exist */
de = (struct ext4_dir_entry_2 *) (&root->dotdot);
@@ -2080,7 +2123,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
if (retval)
goto out_frames;
- retval = ext4_handle_dirty_dirent_node(handle, dir, bh2);
+ retval = ext4_handle_dirty_dirblock(handle, dir, bh2);
if (retval)
goto out_frames;
@@ -2120,7 +2163,6 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
struct inode *dir = d_inode(dentry->d_parent);
struct buffer_head *bh = NULL;
struct ext4_dir_entry_2 *de;
- struct ext4_dir_entry_tail *t;
struct super_block *sb;
struct ext4_sb_info *sbi;
struct ext4_filename fname;
@@ -2170,6 +2212,11 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
blocks = dir->i_size >> sb->s_blocksize_bits;
for (block = 0; block < blocks; block++) {
bh = ext4_read_dirblock(dir, block, DIRENT);
+ if (bh == NULL) {
+ bh = ext4_bread(handle, dir, block,
+ EXT4_GET_BLOCKS_CREATE);
+ goto add_to_new_block;
+ }
if (IS_ERR(bh)) {
retval = PTR_ERR(bh);
bh = NULL;
@@ -2190,6 +2237,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
brelse(bh);
}
bh = ext4_append(handle, dir, &block);
+add_to_new_block:
if (IS_ERR(bh)) {
retval = PTR_ERR(bh);
bh = NULL;
@@ -2199,10 +2247,8 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
de->inode = 0;
de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize);
- if (csum_size) {
- t = EXT4_DIRENT_TAIL(bh->b_data, blocksize);
- initialize_dirent_tail(t, blocksize);
- }
+ if (csum_size)
+ ext4_initialize_dirent_tail(bh, blocksize);
retval = add_dirent_to_buf(handle, &fname, dir, inode, de, bh);
out:
@@ -2234,7 +2280,7 @@ again:
return PTR_ERR(frame);
entries = frame->entries;
at = frame->at;
- bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT);
+ bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT_HTREE);
if (IS_ERR(bh)) {
err = PTR_ERR(bh);
bh = NULL;
@@ -2460,7 +2506,7 @@ static int ext4_delete_entry(handle_t *handle,
goto out;
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_dirent_node(handle, dir, bh);
+ err = ext4_handle_dirty_dirblock(handle, dir, bh);
if (unlikely(err))
goto out;
@@ -2662,7 +2708,6 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
{
struct buffer_head *dir_block = NULL;
struct ext4_dir_entry_2 *de;
- struct ext4_dir_entry_tail *t;
ext4_lblk_t block = 0;
unsigned int blocksize = dir->i_sb->s_blocksize;
int csum_size = 0;
@@ -2686,13 +2731,11 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
de = (struct ext4_dir_entry_2 *)dir_block->b_data;
ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0);
set_nlink(inode, 2);
- if (csum_size) {
- t = EXT4_DIRENT_TAIL(dir_block->b_data, blocksize);
- initialize_dirent_tail(t, blocksize);
- }
+ if (csum_size)
+ ext4_initialize_dirent_tail(dir_block, blocksize);
BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
+ err = ext4_handle_dirty_dirblock(handle, inode, dir_block);
if (err)
goto out;
set_buffer_verified(dir_block);
@@ -2782,7 +2825,10 @@ bool ext4_empty_dir(struct inode *inode)
EXT4_ERROR_INODE(inode, "invalid size");
return true;
}
- bh = ext4_read_dirblock(inode, 0, EITHER);
+ /* The first directory block must not be a hole,
+ * so treat it as DIRENT_HTREE
+ */
+ bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
if (IS_ERR(bh))
return true;
@@ -2804,6 +2850,10 @@ bool ext4_empty_dir(struct inode *inode)
brelse(bh);
lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb);
bh = ext4_read_dirblock(inode, lblock, EITHER);
+ if (bh == NULL) {
+ offset += sb->s_blocksize;
+ continue;
+ }
if (IS_ERR(bh))
return true;
de = (struct ext4_dir_entry_2 *) bh->b_data;
@@ -3369,7 +3419,10 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
struct buffer_head *bh;
if (!ext4_has_inline_data(inode)) {
- bh = ext4_read_dirblock(inode, 0, EITHER);
+ /* The first directory block must not be a hole, so
+ * treat it as DIRENT_HTREE
+ */
+ bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
if (IS_ERR(bh)) {
*retval = PTR_ERR(bh);
return NULL;
@@ -3430,9 +3483,8 @@ static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent,
ent->inode,
ent->dir_bh);
} else {
- retval = ext4_handle_dirty_dirent_node(handle,
- ent->inode,
- ent->dir_bh);
+ retval = ext4_handle_dirty_dirblock(handle, ent->inode,
+ ent->dir_bh);
}
} else {
retval = ext4_mark_inode_dirty(handle, ent->inode);
@@ -3462,8 +3514,7 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent,
ext4_mark_inode_dirty(handle, ent->dir);
BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata");
if (!ent->inlined) {
- retval = ext4_handle_dirty_dirent_node(handle,
- ent->dir, ent->bh);
+ retval = ext4_handle_dirty_dirblock(handle, ent->dir, ent->bh);
if (unlikely(retval)) {
ext4_std_error(ent->dir->i_sb, retval);
return retval;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 56e287f5ee50..12ceadef32c5 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -66,9 +66,7 @@ static void ext4_finish_bio(struct bio *bio)
bio_for_each_segment_all(bvec, bio, iter_all) {
struct page *page = bvec->bv_page;
-#ifdef CONFIG_FS_ENCRYPTION
- struct page *data_page = NULL;
-#endif
+ struct page *bounce_page = NULL;
struct buffer_head *bh, *head;
unsigned bio_start = bvec->bv_offset;
unsigned bio_end = bio_start + bvec->bv_len;
@@ -78,13 +76,10 @@ static void ext4_finish_bio(struct bio *bio)
if (!page)
continue;
-#ifdef CONFIG_FS_ENCRYPTION
- if (!page->mapping) {
- /* The bounce data pages are unmapped. */
- data_page = page;
- fscrypt_pullback_bio_page(&page, false);
+ if (fscrypt_is_bounce_page(page)) {
+ bounce_page = page;
+ page = fscrypt_pagecache_page(bounce_page);
}
-#endif
if (bio->bi_status) {
SetPageError(page);
@@ -111,10 +106,7 @@ static void ext4_finish_bio(struct bio *bio)
bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
local_irq_restore(flags);
if (!under_io) {
-#ifdef CONFIG_FS_ENCRYPTION
- if (data_page)
- fscrypt_restore_control_page(data_page);
-#endif
+ fscrypt_free_bounce_page(bounce_page);
end_page_writeback(page);
}
}
@@ -415,7 +407,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
struct writeback_control *wbc,
bool keep_towrite)
{
- struct page *data_page = NULL;
+ struct page *bounce_page = NULL;
struct inode *inode = page->mapping->host;
unsigned block_start;
struct buffer_head *bh, *head;
@@ -475,14 +467,22 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
bh = head = page_buffers(page);
+ /*
+ * If any blocks are being written to an encrypted file, encrypt them
+ * into a bounce page. For simplicity, just encrypt until the last
+ * block which might be needed. This may cause some unneeded blocks
+ * (e.g. holes) to be unnecessarily encrypted, but this is rare and
+ * can't happen in the common case of blocksize == PAGE_SIZE.
+ */
if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode) && nr_to_submit) {
gfp_t gfp_flags = GFP_NOFS;
+ unsigned int enc_bytes = round_up(len, i_blocksize(inode));
retry_encrypt:
- data_page = fscrypt_encrypt_page(inode, page, PAGE_SIZE, 0,
- page->index, gfp_flags);
- if (IS_ERR(data_page)) {
- ret = PTR_ERR(data_page);
+ bounce_page = fscrypt_encrypt_pagecache_blocks(page, enc_bytes,
+ 0, gfp_flags);
+ if (IS_ERR(bounce_page)) {
+ ret = PTR_ERR(bounce_page);
if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) {
if (io->io_bio) {
ext4_io_submit(io);
@@ -491,7 +491,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
gfp_flags |= __GFP_NOFAIL;
goto retry_encrypt;
}
- data_page = NULL;
+ bounce_page = NULL;
goto out;
}
}
@@ -500,8 +500,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
do {
if (!buffer_async_write(bh))
continue;
- ret = io_submit_add_bh(io, inode,
- data_page ? data_page : page, bh);
+ ret = io_submit_add_bh(io, inode, bounce_page ?: page, bh);
if (ret) {
/*
* We only get here on ENOMEM. Not much else
@@ -517,8 +516,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
/* Error stopped previous loop? Clean up buffers... */
if (ret) {
out:
- if (data_page)
- fscrypt_restore_control_page(data_page);
+ fscrypt_free_bounce_page(bounce_page);
printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
redirty_page_for_writepage(wbc, page);
do {
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 04b4f53f0659..b3cd7655a6ff 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -230,6 +230,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(journal_task),
NULL,
};
+ATTRIBUTE_GROUPS(ext4);
/* Features this copy of ext4 supports */
EXT4_ATTR_FEATURE(lazy_itable_init);
@@ -256,6 +257,7 @@ static struct attribute *ext4_feat_attrs[] = {
ATTR_LIST(metadata_csum_seed),
NULL,
};
+ATTRIBUTE_GROUPS(ext4_feat);
static void *calc_ptr(struct ext4_attr *a, struct ext4_sb_info *sbi)
{
@@ -374,13 +376,13 @@ static const struct sysfs_ops ext4_attr_ops = {
};
static struct kobj_type ext4_sb_ktype = {
- .default_attrs = ext4_attrs,
+ .default_groups = ext4_groups,
.sysfs_ops = &ext4_attr_ops,
.release = ext4_sb_release,
};
static struct kobj_type ext4_feat_ktype = {
- .default_attrs = ext4_feat_attrs,
+ .default_groups = ext4_feat_groups,
.sysfs_ops = &ext4_attr_ops,
.release = (void (*)(struct kobject *))kfree,
};
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index ed70b68b2b38..a0eef95b9e0e 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -146,8 +146,8 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
exist = f2fs_test_bit(offset, se->cur_valid_map);
if (!exist && type == DATA_GENERIC_ENHANCE) {
- f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
- "blkaddr:%u, sit bitmap:%d", blkaddr, exist);
+ f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d",
+ blkaddr, exist);
set_sbi_flag(sbi, SBI_NEED_FSCK);
WARN_ON(1);
}
@@ -184,8 +184,8 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
case DATA_GENERIC_ENHANCE_READ:
if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
blkaddr < MAIN_BLKADDR(sbi))) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "access invalid blkaddr:%u", blkaddr);
+ f2fs_warn(sbi, "access invalid blkaddr:%u",
+ blkaddr);
set_sbi_flag(sbi, SBI_NEED_FSCK);
WARN_ON(1);
return false;
@@ -657,9 +657,8 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
err_out:
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: orphan failed (ino=%x), run fsck to fix.",
- __func__, ino);
+ f2fs_warn(sbi, "%s: orphan failed (ino=%x), run fsck to fix.",
+ __func__, ino);
return err;
}
@@ -676,13 +675,12 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
return 0;
if (bdev_read_only(sbi->sb->s_bdev)) {
- f2fs_msg(sbi->sb, KERN_INFO, "write access "
- "unavailable, skipping orphan cleanup");
+ f2fs_info(sbi, "write access unavailable, skipping orphan cleanup");
return 0;
}
if (s_flags & SB_RDONLY) {
- f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
+ f2fs_info(sbi, "orphan cleanup on readonly fs");
sbi->sb->s_flags &= ~SB_RDONLY;
}
@@ -827,26 +825,14 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
if (crc_offset < CP_MIN_CHKSUM_OFFSET ||
crc_offset > CP_CHKSUM_OFFSET) {
f2fs_put_page(*cp_page, 1);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "invalid crc_offset: %zu", crc_offset);
+ f2fs_warn(sbi, "invalid crc_offset: %zu", crc_offset);
return -EINVAL;
}
- if (__is_set_ckpt_flags(*cp_block, CP_LARGE_NAT_BITMAP_FLAG)) {
- if (crc_offset != CP_MIN_CHKSUM_OFFSET) {
- f2fs_put_page(*cp_page, 1);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "layout of large_nat_bitmap is deprecated, "
- "run fsck to repair, chksum_offset: %zu",
- crc_offset);
- return -EINVAL;
- }
- }
-
crc = f2fs_checkpoint_chksum(sbi, *cp_block);
if (crc != cur_cp_crc(*cp_block)) {
f2fs_put_page(*cp_page, 1);
- f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
+ f2fs_warn(sbi, "invalid crc value");
return -EINVAL;
}
@@ -869,9 +855,8 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
sbi->blocks_per_seg) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "invalid cp_pack_total_block_count:%u",
- le32_to_cpu(cp_block->cp_pack_total_block_count));
+ f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u",
+ le32_to_cpu(cp_block->cp_pack_total_block_count));
goto invalid_cp;
}
pre_version = *version;
@@ -905,6 +890,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
unsigned int cp_blks = 1 + __cp_payload(sbi);
block_t cp_blk_no;
int i;
+ int err;
sbi->ckpt = f2fs_kzalloc(sbi, array_size(blk_size, cp_blks),
GFP_KERNEL);
@@ -932,6 +918,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
} else if (cp2) {
cur_page = cp2;
} else {
+ err = -EFSCORRUPTED;
goto fail_no_cp;
}
@@ -944,8 +931,10 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
sbi->cur_cp_pack = 2;
/* Sanity checking of checkpoint */
- if (f2fs_sanity_check_ckpt(sbi))
+ if (f2fs_sanity_check_ckpt(sbi)) {
+ err = -EFSCORRUPTED;
goto free_fail_no_cp;
+ }
if (cp_blks <= 1)
goto done;
@@ -959,8 +948,10 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
unsigned char *ckpt = (unsigned char *)sbi->ckpt;
cur_page = f2fs_get_meta_page(sbi, cp_blk_no + i);
- if (IS_ERR(cur_page))
+ if (IS_ERR(cur_page)) {
+ err = PTR_ERR(cur_page);
goto free_fail_no_cp;
+ }
sit_bitmap_ptr = page_address(cur_page);
memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
f2fs_put_page(cur_page, 1);
@@ -975,7 +966,7 @@ free_fail_no_cp:
f2fs_put_page(cp2, 1);
fail_no_cp:
kvfree(sbi->ckpt);
- return -EINVAL;
+ return err;
}
static void __add_dirty_inode(struct inode *inode, enum inode_type type)
@@ -1142,17 +1133,24 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi)
static bool __need_flush_quota(struct f2fs_sb_info *sbi)
{
+ bool ret = false;
+
if (!is_journalled_quota(sbi))
return false;
- if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
- return false;
- if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
- return false;
- if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
- return true;
- if (get_pages(sbi, F2FS_DIRTY_QDATA))
- return true;
- return false;
+
+ down_write(&sbi->quota_sem);
+ if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) {
+ ret = false;
+ } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) {
+ ret = false;
+ } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH)) {
+ clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
+ ret = true;
+ } else if (get_pages(sbi, F2FS_DIRTY_QDATA)) {
+ ret = true;
+ }
+ up_write(&sbi->quota_sem);
+ return ret;
}
/*
@@ -1171,26 +1169,22 @@ static int block_operations(struct f2fs_sb_info *sbi)
blk_start_plug(&plug);
retry_flush_quotas:
+ f2fs_lock_all(sbi);
if (__need_flush_quota(sbi)) {
int locked;
if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) {
set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
- f2fs_lock_all(sbi);
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
goto retry_flush_dents;
}
- clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
+ f2fs_unlock_all(sbi);
/* only failed during mount/umount/freeze/quotactl */
locked = down_read_trylock(&sbi->sb->s_umount);
f2fs_quota_sync(sbi->sb, -1);
if (locked)
up_read(&sbi->sb->s_umount);
- }
-
- f2fs_lock_all(sbi);
- if (__need_flush_quota(sbi)) {
- f2fs_unlock_all(sbi);
cond_resched();
goto retry_flush_quotas;
}
@@ -1212,12 +1206,6 @@ retry_flush_dents:
*/
down_write(&sbi->node_change);
- if (__need_flush_quota(sbi)) {
- up_write(&sbi->node_change);
- f2fs_unlock_all(sbi);
- goto retry_flush_quotas;
- }
-
if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
@@ -1313,7 +1301,8 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
else
__clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
- if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
+ if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) ||
+ is_sbi_flag_set(sbi, SBI_IS_RESIZEFS))
__set_ckpt_flags(ckpt, CP_FSCK_FLAG);
if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
@@ -1328,10 +1317,8 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
- /*
- * TODO: we count on fsck.f2fs to clear this flag until we figure out
- * missing cases which clear it incorrectly.
- */
+ else
+ __clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
@@ -1571,8 +1558,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
if (cpc->reason != CP_PAUSE)
return 0;
- f2fs_msg(sbi->sb, KERN_WARNING,
- "Start checkpoint disabled!");
+ f2fs_warn(sbi, "Start checkpoint disabled!");
}
mutex_lock(&sbi->cp_mutex);
@@ -1638,8 +1624,7 @@ stop:
stat_inc_cp_count(sbi->stat_info);
if (cpc->reason & CP_RECOVERY)
- f2fs_msg(sbi->sb, KERN_NOTICE,
- "checkpoint: version = %llx", ckpt_ver);
+ f2fs_notice(sbi, "checkpoint: version = %llx", ckpt_ver);
/* do checkpoint periodically */
f2fs_update_time(sbi, CP_TIME);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index e1cab1717ac7..4eb2f3920140 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -14,6 +14,7 @@
#include <linux/pagevec.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
+#include <linux/swap.h>
#include <linux/prefetch.h>
#include <linux/uio.h>
#include <linux/cleancache.h>
@@ -54,7 +55,7 @@ static bool __is_cp_guaranteed(struct page *page)
static enum count_type __read_io_type(struct page *page)
{
- struct address_space *mapping = page->mapping;
+ struct address_space *mapping = page_file_mapping(page);
if (mapping) {
struct inode *inode = mapping->host;
@@ -185,7 +186,7 @@ static void f2fs_write_end_io(struct bio *bio)
continue;
}
- fscrypt_pullback_bio_page(&page, true);
+ fscrypt_finalize_bounce_page(&page);
if (unlikely(bio->bi_status)) {
mapping_set_error(page->mapping, -EIO);
@@ -347,25 +348,24 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
io->bio = NULL;
}
-static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
+static bool __has_merged_page(struct bio *bio, struct inode *inode,
struct page *page, nid_t ino)
{
struct bio_vec *bvec;
struct page *target;
struct bvec_iter_all iter_all;
- if (!io->bio)
+ if (!bio)
return false;
if (!inode && !page && !ino)
return true;
- bio_for_each_segment_all(bvec, io->bio, iter_all) {
+ bio_for_each_segment_all(bvec, bio, iter_all) {
- if (bvec->bv_page->mapping)
- target = bvec->bv_page;
- else
- target = fscrypt_control_page(bvec->bv_page);
+ target = bvec->bv_page;
+ if (fscrypt_is_bounce_page(target))
+ target = fscrypt_pagecache_page(target);
if (inode && inode == target->mapping->host)
return true;
@@ -411,7 +411,7 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
down_read(&io->io_rwsem);
- ret = __has_merged_page(io, inode, page, ino);
+ ret = __has_merged_page(io->bio, inode, page, ino);
up_read(&io->io_rwsem);
}
if (ret)
@@ -455,7 +455,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
fio->is_por ? META_POR : (__is_meta_io(fio) ?
META_GENERIC : DATA_GENERIC_ENHANCE)))
- return -EFAULT;
+ return -EFSCORRUPTED;
trace_f2fs_submit_page_bio(page, fio);
f2fs_trace_ios(fio, 0);
@@ -481,6 +481,61 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
return 0;
}
+int f2fs_merge_page_bio(struct f2fs_io_info *fio)
+{
+ struct bio *bio = *fio->bio;
+ struct page *page = fio->encrypted_page ?
+ fio->encrypted_page : fio->page;
+
+ if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
+ __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
+ return -EFSCORRUPTED;
+
+ trace_f2fs_submit_page_bio(page, fio);
+ f2fs_trace_ios(fio, 0);
+
+ if (bio && (*fio->last_block + 1 != fio->new_blkaddr ||
+ !__same_bdev(fio->sbi, fio->new_blkaddr, bio))) {
+ __submit_bio(fio->sbi, bio, fio->type);
+ bio = NULL;
+ }
+alloc_new:
+ if (!bio) {
+ bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc,
+ BIO_MAX_PAGES, false, fio->type, fio->temp);
+ bio_set_op_attrs(bio, fio->op, fio->op_flags);
+ }
+
+ if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+ __submit_bio(fio->sbi, bio, fio->type);
+ bio = NULL;
+ goto alloc_new;
+ }
+
+ if (fio->io_wbc)
+ wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
+
+ inc_page_count(fio->sbi, WB_DATA_TYPE(page));
+
+ *fio->last_block = fio->new_blkaddr;
+ *fio->bio = bio;
+
+ return 0;
+}
+
+static void f2fs_submit_ipu_bio(struct f2fs_sb_info *sbi, struct bio **bio,
+ struct page *page)
+{
+ if (!bio)
+ return;
+
+ if (!__has_merged_page(*bio, NULL, page, 0))
+ return;
+
+ __submit_bio(sbi, *bio, DATA);
+ *bio = NULL;
+}
+
void f2fs_submit_page_write(struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = fio->sbi;
@@ -734,7 +789,7 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
dn.data_blkaddr = ei.blk + index - ei.fofs;
if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
DATA_GENERIC_ENHANCE_READ)) {
- err = -EFAULT;
+ err = -EFSCORRUPTED;
goto put_err;
}
goto got_it;
@@ -754,7 +809,7 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
dn.data_blkaddr,
DATA_GENERIC_ENHANCE)) {
- err = -EFAULT;
+ err = -EFSCORRUPTED;
goto put_err;
}
got_it:
@@ -1100,7 +1155,7 @@ next_block:
if (__is_valid_data_blkaddr(blkaddr) &&
!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
- err = -EFAULT;
+ err = -EFSCORRUPTED;
goto sync_out;
}
@@ -1530,7 +1585,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page,
sector_t block_nr;
int ret = 0;
- block_in_file = (sector_t)page->index;
+ block_in_file = (sector_t)page_index(page);
last_block = block_in_file + nr_pages;
last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
blkbits;
@@ -1563,14 +1618,15 @@ got_it:
block_nr = map->m_pblk + block_in_file - map->m_lblk;
SetPageMappedToDisk(page);
- if (!PageUptodate(page) && !cleancache_get_page(page)) {
+ if (!PageUptodate(page) && (!PageSwapCache(page) &&
+ !cleancache_get_page(page))) {
SetPageUptodate(page);
goto confused;
}
if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
DATA_GENERIC_ENHANCE_READ)) {
- ret = -EFAULT;
+ ret = -EFSCORRUPTED;
goto out;
}
} else {
@@ -1661,7 +1717,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
prefetchw(&page->flags);
list_del(&page->lru);
if (add_to_page_cache_lru(page, mapping,
- page->index,
+ page_index(page),
readahead_gfp_mask(mapping)))
goto next_page;
}
@@ -1685,7 +1741,7 @@ next_page:
static int f2fs_read_data_page(struct file *file, struct page *page)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = page_file_mapping(page)->host;
int ret = -EAGAIN;
trace_f2fs_readpage(page, DATA);
@@ -1694,7 +1750,8 @@ static int f2fs_read_data_page(struct file *file, struct page *page)
if (f2fs_has_inline_data(inode))
ret = f2fs_read_inline_data(inode, page);
if (ret == -EAGAIN)
- ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1, false);
+ ret = f2fs_mpage_readpages(page_file_mapping(page),
+ NULL, page, 1, false);
return ret;
}
@@ -1727,8 +1784,9 @@ static int encrypt_one_page(struct f2fs_io_info *fio)
f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
retry_encrypt:
- fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
- PAGE_SIZE, 0, fio->page->index, gfp_flags);
+ fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(fio->page,
+ PAGE_SIZE, 0,
+ gfp_flags);
if (IS_ERR(fio->encrypted_page)) {
/* flush pending IOs and wait for a while in the ENOMEM case */
if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
@@ -1851,7 +1909,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
DATA_GENERIC_ENHANCE))
- return -EFAULT;
+ return -EFSCORRUPTED;
ipu_force = true;
fio->need_lock = LOCK_DONE;
@@ -1878,7 +1936,7 @@ got_it:
if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
DATA_GENERIC_ENHANCE)) {
- err = -EFAULT;
+ err = -EFSCORRUPTED;
goto out_writepage;
}
/*
@@ -1900,8 +1958,7 @@ got_it:
err = f2fs_inplace_write_data(fio);
if (err) {
if (f2fs_encrypted_file(inode))
- fscrypt_pullback_bio_page(&fio->encrypted_page,
- true);
+ fscrypt_finalize_bounce_page(&fio->encrypted_page);
if (PageWriteback(page))
end_page_writeback(page);
} else {
@@ -1947,6 +2004,8 @@ out:
}
static int __write_data_page(struct page *page, bool *submitted,
+ struct bio **bio,
+ sector_t *last_block,
struct writeback_control *wbc,
enum iostat_type io_type)
{
@@ -1972,6 +2031,8 @@ static int __write_data_page(struct page *page, bool *submitted,
.need_lock = LOCK_RETRY,
.io_type = io_type,
.io_wbc = wbc,
+ .bio = bio,
+ .last_block = last_block,
};
trace_f2fs_writepage(page, DATA);
@@ -2070,10 +2131,13 @@ out:
unlock_page(page);
if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
- !F2FS_I(inode)->cp_task)
+ !F2FS_I(inode)->cp_task) {
+ f2fs_submit_ipu_bio(sbi, bio, page);
f2fs_balance_fs(sbi, need_balance_fs);
+ }
if (unlikely(f2fs_cp_error(sbi))) {
+ f2fs_submit_ipu_bio(sbi, bio, page);
f2fs_submit_merged_write(sbi, DATA);
submitted = NULL;
}
@@ -2100,7 +2164,7 @@ redirty_out:
static int f2fs_write_data_page(struct page *page,
struct writeback_control *wbc)
{
- return __write_data_page(page, NULL, wbc, FS_DATA_IO);
+ return __write_data_page(page, NULL, NULL, NULL, wbc, FS_DATA_IO);
}
/*
@@ -2116,6 +2180,8 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
int done = 0;
struct pagevec pvec;
struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
+ struct bio *bio = NULL;
+ sector_t last_block;
int nr_pages;
pgoff_t uninitialized_var(writeback_index);
pgoff_t index;
@@ -2192,17 +2258,20 @@ continue_unlock:
}
if (PageWriteback(page)) {
- if (wbc->sync_mode != WB_SYNC_NONE)
+ if (wbc->sync_mode != WB_SYNC_NONE) {
f2fs_wait_on_page_writeback(page,
DATA, true, true);
- else
+ f2fs_submit_ipu_bio(sbi, &bio, page);
+ } else {
goto continue_unlock;
+ }
}
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
- ret = __write_data_page(page, &submitted, wbc, io_type);
+ ret = __write_data_page(page, &submitted, &bio,
+ &last_block, wbc, io_type);
if (unlikely(ret)) {
/*
* keep nr_to_write, since vfs uses this to
@@ -2251,6 +2320,9 @@ continue_unlock:
if (nwritten)
f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
NULL, 0, DATA);
+ /* submit cached bio of IPU write */
+ if (bio)
+ __submit_bio(sbi, bio, DATA);
return ret;
}
@@ -2262,6 +2334,9 @@ static inline bool __should_serialize_io(struct inode *inode,
return false;
if (IS_NOQUOTA(inode))
return false;
+ /* to avoid deadlock in path of data flush */
+ if (F2FS_I(inode)->cp_task)
+ return false;
if (wbc->sync_mode != WB_SYNC_ALL)
return true;
if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
@@ -2533,7 +2608,7 @@ repeat:
} else {
if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
DATA_GENERIC_ENHANCE_READ)) {
- err = -EFAULT;
+ err = -EFSCORRUPTED;
goto fail;
}
err = f2fs_submit_page_read(inode, page, blkaddr);
@@ -2778,13 +2853,14 @@ int f2fs_release_page(struct page *page, gfp_t wait)
static int f2fs_set_data_page_dirty(struct page *page)
{
- struct address_space *mapping = page->mapping;
- struct inode *inode = mapping->host;
+ struct inode *inode = page_file_mapping(page)->host;
trace_f2fs_set_page_dirty(page, DATA);
if (!PageUptodate(page))
SetPageUptodate(page);
+ if (PageSwapCache(page))
+ return __set_page_dirty_nobuffers(page);
if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
@@ -2876,6 +2952,126 @@ int f2fs_migrate_page(struct address_space *mapping,
}
#endif
+#ifdef CONFIG_SWAP
+/* Copied from generic_swapfile_activate() to check any holes */
+static int check_swap_activate(struct file *swap_file, unsigned int max)
+{
+ struct address_space *mapping = swap_file->f_mapping;
+ struct inode *inode = mapping->host;
+ unsigned blocks_per_page;
+ unsigned long page_no;
+ unsigned blkbits;
+ sector_t probe_block;
+ sector_t last_block;
+ sector_t lowest_block = -1;
+ sector_t highest_block = 0;
+
+ blkbits = inode->i_blkbits;
+ blocks_per_page = PAGE_SIZE >> blkbits;
+
+ /*
+ * Map all the blocks into the extent list. This code doesn't try
+ * to be very smart.
+ */
+ probe_block = 0;
+ page_no = 0;
+ last_block = i_size_read(inode) >> blkbits;
+ while ((probe_block + blocks_per_page) <= last_block && page_no < max) {
+ unsigned block_in_page;
+ sector_t first_block;
+
+ cond_resched();
+
+ first_block = bmap(inode, probe_block);
+ if (first_block == 0)
+ goto bad_bmap;
+
+ /*
+ * It must be PAGE_SIZE aligned on-disk
+ */
+ if (first_block & (blocks_per_page - 1)) {
+ probe_block++;
+ goto reprobe;
+ }
+
+ for (block_in_page = 1; block_in_page < blocks_per_page;
+ block_in_page++) {
+ sector_t block;
+
+ block = bmap(inode, probe_block + block_in_page);
+ if (block == 0)
+ goto bad_bmap;
+ if (block != first_block + block_in_page) {
+ /* Discontiguity */
+ probe_block++;
+ goto reprobe;
+ }
+ }
+
+ first_block >>= (PAGE_SHIFT - blkbits);
+ if (page_no) { /* exclude the header page */
+ if (first_block < lowest_block)
+ lowest_block = first_block;
+ if (first_block > highest_block)
+ highest_block = first_block;
+ }
+
+ page_no++;
+ probe_block += blocks_per_page;
+reprobe:
+ continue;
+ }
+ return 0;
+
+bad_bmap:
+ pr_err("swapon: swapfile has holes\n");
+ return -EINVAL;
+}
+
+static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
+ sector_t *span)
+{
+ struct inode *inode = file_inode(file);
+ int ret;
+
+ if (!S_ISREG(inode->i_mode))
+ return -EINVAL;
+
+ if (f2fs_readonly(F2FS_I_SB(inode)->sb))
+ return -EROFS;
+
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
+
+ ret = check_swap_activate(file, sis->max);
+ if (ret)
+ return ret;
+
+ set_inode_flag(inode, FI_PIN_FILE);
+ f2fs_precache_extents(inode);
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+ return 0;
+}
+
+static void f2fs_swap_deactivate(struct file *file)
+{
+ struct inode *inode = file_inode(file);
+
+ clear_inode_flag(inode, FI_PIN_FILE);
+}
+#else
+static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
+ sector_t *span)
+{
+ return -EOPNOTSUPP;
+}
+
+static void f2fs_swap_deactivate(struct file *file)
+{
+}
+#endif
+
const struct address_space_operations f2fs_dblock_aops = {
.readpage = f2fs_read_data_page,
.readpages = f2fs_read_data_pages,
@@ -2888,6 +3084,8 @@ const struct address_space_operations f2fs_dblock_aops = {
.releasepage = f2fs_release_page,
.direct_IO = f2fs_direct_IO,
.bmap = f2fs_bmap,
+ .swap_activate = f2fs_swap_activate,
+ .swap_deactivate = f2fs_swap_deactivate,
#ifdef CONFIG_MIGRATION
.migratepage = f2fs_migrate_page,
#endif
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 99e9a5c37b71..7706049d23bf 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -27,8 +27,15 @@ static DEFINE_MUTEX(f2fs_stat_mutex);
static void update_general_status(struct f2fs_sb_info *sbi)
{
struct f2fs_stat_info *si = F2FS_STAT(sbi);
+ struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
int i;
+ /* these will be changed if online resize is done */
+ si->main_area_segs = le32_to_cpu(raw_super->segment_count_main);
+ si->main_area_sections = le32_to_cpu(raw_super->section_count);
+ si->main_area_zones = si->main_area_sections /
+ le32_to_cpu(raw_super->secs_per_zone);
+
/* validation check of the segment numbers */
si->hit_largest = atomic64_read(&sbi->read_hit_largest);
si->hit_cached = atomic64_read(&sbi->read_hit_cached);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 59bc46017855..85a1528f319f 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -218,9 +218,8 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
max_depth = F2FS_I(dir)->i_current_depth;
if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) {
- f2fs_msg(F2FS_I_SB(dir)->sb, KERN_WARNING,
- "Corrupted max_depth of %lu: %u",
- dir->i_ino, max_depth);
+ f2fs_warn(F2FS_I_SB(dir), "Corrupted max_depth of %lu: %u",
+ dir->i_ino, max_depth);
max_depth = MAX_DIR_HASH_DEPTH;
f2fs_i_depth_write(dir, max_depth);
}
@@ -816,11 +815,10 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
if (unlikely(bit_pos > d->max ||
le16_to_cpu(de->name_len) > F2FS_NAME_LEN)) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: corrupted namelen=%d, run fsck to fix.",
- __func__, le16_to_cpu(de->name_len));
+ f2fs_warn(sbi, "%s: corrupted namelen=%d, run fsck to fix.",
+ __func__, le16_to_cpu(de->name_len));
set_sbi_flag(sbi, SBI_NEED_FSCK);
- err = -EINVAL;
+ err = -EFSCORRUPTED;
goto out;
}
@@ -828,8 +826,8 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
int save_len = fstr->len;
err = fscrypt_fname_disk_to_usr(d->inode,
- (u32)de->hash_code, 0,
- &de_name, fstr);
+ (u32)le32_to_cpu(de->hash_code),
+ 0, &de_name, fstr);
if (err)
goto out;
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index caf77fe8ac07..e60078460ad1 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -184,10 +184,9 @@ bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
next_re = rb_entry(next, struct rb_entry, rb_node);
if (cur_re->ofs + cur_re->len > next_re->ofs) {
- f2fs_msg(sbi->sb, KERN_INFO, "inconsistent rbtree, "
- "cur(%u, %u) next(%u, %u)",
- cur_re->ofs, cur_re->len,
- next_re->ofs, next_re->len);
+ f2fs_info(sbi, "inconsistent rbtree, cur(%u, %u) next(%u, %u)",
+ cur_re->ofs, cur_re->len,
+ next_re->ofs, next_re->len);
return false;
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 06b89a9862ab..17382da7f0bd 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -136,6 +136,9 @@ struct f2fs_mount_info {
int alloc_mode; /* segment allocation policy */
int fsync_mode; /* fsync policy */
bool test_dummy_encryption; /* test dummy encryption */
+ block_t unusable_cap; /* Amount of space allowed to be
+ * unusable when disabling checkpoint
+ */
};
#define F2FS_FEATURE_ENCRYPT 0x0001
@@ -412,6 +415,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
#define F2FS_IOC_SET_PIN_FILE _IOW(F2FS_IOCTL_MAGIC, 13, __u32)
#define F2FS_IOC_GET_PIN_FILE _IOR(F2FS_IOCTL_MAGIC, 14, __u32)
#define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15)
+#define F2FS_IOC_RESIZE_FS _IOW(F2FS_IOCTL_MAGIC, 16, __u64)
#define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY
#define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY
@@ -476,8 +480,8 @@ static inline int get_inline_xattr_addrs(struct inode *inode);
#define NR_INLINE_DENTRY(inode) (MAX_INLINE_DATA(inode) * BITS_PER_BYTE / \
((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \
BITS_PER_BYTE + 1))
-#define INLINE_DENTRY_BITMAP_SIZE(inode) ((NR_INLINE_DENTRY(inode) + \
- BITS_PER_BYTE - 1) / BITS_PER_BYTE)
+#define INLINE_DENTRY_BITMAP_SIZE(inode) \
+ DIV_ROUND_UP(NR_INLINE_DENTRY(inode), BITS_PER_BYTE)
#define INLINE_RESERVED_SIZE(inode) (MAX_INLINE_DATA(inode) - \
((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \
NR_INLINE_DENTRY(inode) + \
@@ -1052,6 +1056,8 @@ struct f2fs_io_info {
bool retry; /* need to reallocate block address */
enum iostat_type io_type; /* io type */
struct writeback_control *io_wbc; /* writeback control */
+ struct bio **bio; /* bio for ipu */
+ sector_t *last_block; /* last block number in bio */
unsigned char version; /* version of the node */
};
@@ -1111,6 +1117,7 @@ enum {
SBI_QUOTA_NEED_FLUSH, /* need to flush quota info in CP */
SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */
SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */
+ SBI_IS_RESIZEFS, /* resizefs is in process */
};
enum {
@@ -1207,6 +1214,7 @@ struct f2fs_sb_info {
/* for inode management */
struct list_head inode_list[NR_INODE_TYPE]; /* dirty inode list */
spinlock_t inode_lock[NR_INODE_TYPE]; /* for dirty inode list lock */
+ struct mutex flush_lock; /* for flush exclusion */
/* for extent tree cache */
struct radix_tree_root extent_tree_root;/* cache extent cache entries */
@@ -1230,6 +1238,7 @@ struct f2fs_sb_info {
unsigned int segs_per_sec; /* segments per section */
unsigned int secs_per_zone; /* sections per zone */
unsigned int total_sections; /* total section count */
+ struct mutex resize_mutex; /* for resize exclusion */
unsigned int total_node_count; /* total node block count */
unsigned int total_valid_node_count; /* valid node block count */
loff_t max_file_blocks; /* max block index of file */
@@ -1247,6 +1256,7 @@ struct f2fs_sb_info {
block_t unusable_block_count; /* # of blocks saved by last cp */
unsigned int nquota_files; /* # of quota sysfile */
+ struct rw_semaphore quota_sem; /* blocking cp for flags */
/* # of pages, see count_type */
atomic_t nr_pages[NR_COUNT_TYPE];
@@ -1488,7 +1498,7 @@ static inline struct f2fs_sb_info *F2FS_M_SB(struct address_space *mapping)
static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page)
{
- return F2FS_M_SB(page->mapping);
+ return F2FS_M_SB(page_file_mapping(page));
}
static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi)
@@ -1766,8 +1776,12 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
if (!__allow_reserved_blocks(sbi, inode, true))
avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
- if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
- avail_user_block_count -= sbi->unusable_block_count;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ if (avail_user_block_count > sbi->unusable_block_count)
+ avail_user_block_count -= sbi->unusable_block_count;
+ else
+ avail_user_block_count = 0;
+ }
if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
diff = sbi->total_valid_block_count - avail_user_block_count;
if (diff > *count)
@@ -1795,7 +1809,20 @@ enospc:
return -ENOSPC;
}
-void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
+__printf(2, 3)
+void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...);
+
+#define f2fs_err(sbi, fmt, ...) \
+ f2fs_printk(sbi, KERN_ERR fmt, ##__VA_ARGS__)
+#define f2fs_warn(sbi, fmt, ...) \
+ f2fs_printk(sbi, KERN_WARNING fmt, ##__VA_ARGS__)
+#define f2fs_notice(sbi, fmt, ...) \
+ f2fs_printk(sbi, KERN_NOTICE fmt, ##__VA_ARGS__)
+#define f2fs_info(sbi, fmt, ...) \
+ f2fs_printk(sbi, KERN_INFO fmt, ##__VA_ARGS__)
+#define f2fs_debug(sbi, fmt, ...) \
+ f2fs_printk(sbi, KERN_DEBUG fmt, ##__VA_ARGS__)
+
static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
struct inode *inode,
block_t count)
@@ -1811,11 +1838,10 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
sbi->current_reserved_blocks + count);
spin_unlock(&sbi->stat_lock);
if (unlikely(inode->i_blocks < sectors)) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "Inconsistent i_blocks, ino:%lu, iblocks:%llu, sectors:%llu",
- inode->i_ino,
- (unsigned long long)inode->i_blocks,
- (unsigned long long)sectors);
+ f2fs_warn(sbi, "Inconsistent i_blocks, ino:%lu, iblocks:%llu, sectors:%llu",
+ inode->i_ino,
+ (unsigned long long)inode->i_blocks,
+ (unsigned long long)sectors);
set_sbi_flag(sbi, SBI_NEED_FSCK);
return;
}
@@ -1967,7 +1993,7 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
struct inode *inode, bool is_inode)
{
block_t valid_block_count;
- unsigned int valid_node_count;
+ unsigned int valid_node_count, user_block_count;
int err;
if (is_inode) {
@@ -1994,10 +2020,11 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
if (!__allow_reserved_blocks(sbi, inode, false))
valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
+ user_block_count = sbi->user_block_count;
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
- valid_block_count += sbi->unusable_block_count;
+ user_block_count -= sbi->unusable_block_count;
- if (unlikely(valid_block_count > sbi->user_block_count)) {
+ if (unlikely(valid_block_count > user_block_count)) {
spin_unlock(&sbi->stat_lock);
goto enospc;
}
@@ -2052,10 +2079,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
dquot_free_inode(inode);
} else {
if (unlikely(inode->i_blocks == 0)) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "Inconsistent i_blocks, ino:%lu, iblocks:%llu",
- inode->i_ino,
- (unsigned long long)inode->i_blocks);
+ f2fs_warn(sbi, "Inconsistent i_blocks, ino:%lu, iblocks:%llu",
+ inode->i_ino,
+ (unsigned long long)inode->i_blocks);
set_sbi_flag(sbi, SBI_NEED_FSCK);
return;
}
@@ -2191,6 +2217,9 @@ static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi,
static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
{
+ if (sbi->gc_mode == GC_URGENT)
+ return true;
+
if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) ||
get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) ||
get_pages(sbi, F2FS_WB_CP_DATA) ||
@@ -2198,7 +2227,7 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
get_pages(sbi, F2FS_DIO_WRITE))
return false;
- if (SM_I(sbi) && SM_I(sbi)->dcc_info &&
+ if (type != DISCARD_TIME && SM_I(sbi) && SM_I(sbi)->dcc_info &&
atomic_read(&SM_I(sbi)->dcc_info->queued_discard))
return false;
@@ -2320,57 +2349,23 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr)
}
/*
- * Inode flags
+ * On-disk inode flags (f2fs_inode::i_flags)
*/
-#define F2FS_SECRM_FL 0x00000001 /* Secure deletion */
-#define F2FS_UNRM_FL 0x00000002 /* Undelete */
-#define F2FS_COMPR_FL 0x00000004 /* Compress file */
#define F2FS_SYNC_FL 0x00000008 /* Synchronous updates */
#define F2FS_IMMUTABLE_FL 0x00000010 /* Immutable file */
#define F2FS_APPEND_FL 0x00000020 /* writes to file may only append */
#define F2FS_NODUMP_FL 0x00000040 /* do not dump file */
#define F2FS_NOATIME_FL 0x00000080 /* do not update atime */
-/* Reserved for compression usage... */
-#define F2FS_DIRTY_FL 0x00000100
-#define F2FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
-#define F2FS_NOCOMPR_FL 0x00000400 /* Don't compress */
-#define F2FS_ENCRYPT_FL 0x00000800 /* encrypted file */
-/* End compression flags --- maybe not all used */
#define F2FS_INDEX_FL 0x00001000 /* hash-indexed directory */
-#define F2FS_IMAGIC_FL 0x00002000 /* AFS directory */
-#define F2FS_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */
-#define F2FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */
#define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
-#define F2FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
-#define F2FS_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
-#define F2FS_EXTENTS_FL 0x00080000 /* Inode uses extents */
-#define F2FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */
-#define F2FS_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
-#define F2FS_NOCOW_FL 0x00800000 /* Do not cow file */
-#define F2FS_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
#define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
-#define F2FS_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
-
-#define F2FS_FL_USER_VISIBLE 0x30CBDFFF /* User visible flags */
-#define F2FS_FL_USER_MODIFIABLE 0x204BC0FF /* User modifiable flags */
-
-/* Flags we can manipulate with through F2FS_IOC_FSSETXATTR */
-#define F2FS_FL_XFLAG_VISIBLE (F2FS_SYNC_FL | \
- F2FS_IMMUTABLE_FL | \
- F2FS_APPEND_FL | \
- F2FS_NODUMP_FL | \
- F2FS_NOATIME_FL | \
- F2FS_PROJINHERIT_FL)
/* Flags that should be inherited by new inodes from their parent. */
-#define F2FS_FL_INHERITED (F2FS_SECRM_FL | F2FS_UNRM_FL | F2FS_COMPR_FL |\
- F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL |\
- F2FS_NOCOMPR_FL | F2FS_JOURNAL_DATA_FL |\
- F2FS_NOTAIL_FL | F2FS_DIRSYNC_FL |\
- F2FS_PROJINHERIT_FL)
+#define F2FS_FL_INHERITED (F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL | \
+ F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL)
/* Flags that are appropriate for regular files (all but dir-specific ones). */
-#define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_TOPDIR_FL))
+#define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL))
/* Flags that are appropriate for non-directories/regular files. */
#define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL)
@@ -2856,9 +2851,8 @@ static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "invalid blkaddr: %u, type: %d, run fsck to fix.",
- blkaddr, type);
+ f2fs_err(sbi, "invalid blkaddr: %u, type: %d, run fsck to fix.",
+ blkaddr, type);
f2fs_bug_on(sbi, 1);
}
}
@@ -2989,8 +2983,6 @@ int f2fs_quota_sync(struct super_block *sb, int type);
void f2fs_quota_off_umount(struct super_block *sb);
int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
int f2fs_sync_fs(struct super_block *sb, int sync);
-extern __printf(3, 4)
-void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi);
/*
@@ -3074,9 +3066,12 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi);
void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
struct cp_control *cpc);
void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi);
-int f2fs_disable_cp_again(struct f2fs_sb_info *sbi);
+block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi);
+int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
+void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
+ unsigned int start, unsigned int end);
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
@@ -3169,6 +3164,7 @@ void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
nid_t ino, enum page_type type);
void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi);
int f2fs_submit_page_bio(struct f2fs_io_info *fio);
+int f2fs_merge_page_bio(struct f2fs_io_info *fio);
void f2fs_submit_page_write(struct f2fs_io_info *fio);
struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
block_t blk_addr, struct bio *bio);
@@ -3214,6 +3210,7 @@ block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
unsigned int segno);
void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
+int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count);
/*
* recovery.c
@@ -3686,7 +3683,8 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
if (test_opt(sbi, LFS) && (rw == WRITE) &&
block_unaligned_IO(inode, iocb, iter))
return true;
- if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED))
+ if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED) &&
+ !(inode->i_flags & S_SWAPFILE))
return true;
return false;
@@ -3712,4 +3710,7 @@ static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
return false;
}
+#define EFSBADCRC EBADMSG /* Bad CRC detected */
+#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */
+
#endif /* _LINUX_F2FS_H */
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 45b45f37d347..f8d46df8fa9e 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -707,11 +707,9 @@ int f2fs_getattr(const struct path *path, struct kstat *stat,
stat->btime.tv_nsec = fi->i_crtime.tv_nsec;
}
- flags = fi->i_flags & F2FS_FL_USER_VISIBLE;
+ flags = fi->i_flags;
if (flags & F2FS_APPEND_FL)
stat->attributes |= STATX_ATTR_APPEND;
- if (flags & F2FS_COMPR_FL)
- stat->attributes |= STATX_ATTR_COMPRESSED;
if (IS_ENCRYPTED(inode))
stat->attributes |= STATX_ATTR_ENCRYPTED;
if (flags & F2FS_IMMUTABLE_FL)
@@ -720,7 +718,6 @@ int f2fs_getattr(const struct path *path, struct kstat *stat,
stat->attributes |= STATX_ATTR_NODUMP;
stat->attributes_mask |= (STATX_ATTR_APPEND |
- STATX_ATTR_COMPRESSED |
STATX_ATTR_ENCRYPTED |
STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP);
@@ -1026,7 +1023,7 @@ next_dnode:
!f2fs_is_valid_blkaddr(sbi, *blkaddr,
DATA_GENERIC_ENHANCE)) {
f2fs_put_dnode(&dn);
- return -EFAULT;
+ return -EFSCORRUPTED;
}
if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) {
@@ -1214,7 +1211,7 @@ roll_back:
static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
+ pgoff_t nrpages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
pgoff_t start = offset >> PAGE_SHIFT;
pgoff_t end = (offset + len) >> PAGE_SHIFT;
int ret;
@@ -1467,7 +1464,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
pg_start = offset >> PAGE_SHIFT;
pg_end = (offset + len) >> PAGE_SHIFT;
delta = pg_end - pg_start;
- idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
+ idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
/* avoid gc operation during block exchange */
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@ -1531,7 +1528,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
if (off_end)
map.m_len++;
- err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
+ if (f2fs_is_pinned_file(inode))
+ map.m_seg_type = CURSEG_COLD_DATA;
+
+ err = f2fs_map_blocks(inode, &map, 1, (f2fs_is_pinned_file(inode) ?
+ F2FS_GET_BLOCK_PRE_DIO :
+ F2FS_GET_BLOCK_PRE_AIO));
if (err) {
pgoff_t last_off;
@@ -1648,44 +1650,22 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id)
return 0;
}
-static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
+static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
{
- struct inode *inode = file_inode(filp);
struct f2fs_inode_info *fi = F2FS_I(inode);
- unsigned int flags = fi->i_flags;
-
- if (IS_ENCRYPTED(inode))
- flags |= F2FS_ENCRYPT_FL;
- if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
- flags |= F2FS_INLINE_DATA_FL;
- if (is_inode_flag_set(inode, FI_PIN_FILE))
- flags |= F2FS_NOCOW_FL;
-
- flags &= F2FS_FL_USER_VISIBLE;
-
- return put_user(flags, (int __user *)arg);
-}
-
-static int __f2fs_ioc_setflags(struct inode *inode, unsigned int flags)
-{
- struct f2fs_inode_info *fi = F2FS_I(inode);
- unsigned int oldflags;
+ u32 oldflags;
/* Is it quota file? Do not allow user to mess with it */
if (IS_NOQUOTA(inode))
return -EPERM;
- flags = f2fs_mask_flags(inode->i_mode, flags);
-
oldflags = fi->i_flags;
- if ((flags ^ oldflags) & (F2FS_APPEND_FL | F2FS_IMMUTABLE_FL))
+ if ((iflags ^ oldflags) & (F2FS_APPEND_FL | F2FS_IMMUTABLE_FL))
if (!capable(CAP_LINUX_IMMUTABLE))
return -EPERM;
- flags = flags & F2FS_FL_USER_MODIFIABLE;
- flags |= oldflags & ~F2FS_FL_USER_MODIFIABLE;
- fi->i_flags = flags;
+ fi->i_flags = iflags | (oldflags & ~mask);
if (fi->i_flags & F2FS_PROJINHERIT_FL)
set_inode_flag(inode, FI_PROJ_INHERIT);
@@ -1698,26 +1678,124 @@ static int __f2fs_ioc_setflags(struct inode *inode, unsigned int flags)
return 0;
}
+/* FS_IOC_GETFLAGS and FS_IOC_SETFLAGS support */
+
+/*
+ * To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry
+ * for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to
+ * F2FS_GETTABLE_FS_FL. To also make it settable via FS_IOC_SETFLAGS, also add
+ * its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL.
+ */
+
+static const struct {
+ u32 iflag;
+ u32 fsflag;
+} f2fs_fsflags_map[] = {
+ { F2FS_SYNC_FL, FS_SYNC_FL },
+ { F2FS_IMMUTABLE_FL, FS_IMMUTABLE_FL },
+ { F2FS_APPEND_FL, FS_APPEND_FL },
+ { F2FS_NODUMP_FL, FS_NODUMP_FL },
+ { F2FS_NOATIME_FL, FS_NOATIME_FL },
+ { F2FS_INDEX_FL, FS_INDEX_FL },
+ { F2FS_DIRSYNC_FL, FS_DIRSYNC_FL },
+ { F2FS_PROJINHERIT_FL, FS_PROJINHERIT_FL },
+};
+
+#define F2FS_GETTABLE_FS_FL ( \
+ FS_SYNC_FL | \
+ FS_IMMUTABLE_FL | \
+ FS_APPEND_FL | \
+ FS_NODUMP_FL | \
+ FS_NOATIME_FL | \
+ FS_INDEX_FL | \
+ FS_DIRSYNC_FL | \
+ FS_PROJINHERIT_FL | \
+ FS_ENCRYPT_FL | \
+ FS_INLINE_DATA_FL | \
+ FS_NOCOW_FL)
+
+#define F2FS_SETTABLE_FS_FL ( \
+ FS_SYNC_FL | \
+ FS_IMMUTABLE_FL | \
+ FS_APPEND_FL | \
+ FS_NODUMP_FL | \
+ FS_NOATIME_FL | \
+ FS_DIRSYNC_FL | \
+ FS_PROJINHERIT_FL)
+
+/* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */
+static inline u32 f2fs_iflags_to_fsflags(u32 iflags)
+{
+ u32 fsflags = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++)
+ if (iflags & f2fs_fsflags_map[i].iflag)
+ fsflags |= f2fs_fsflags_map[i].fsflag;
+
+ return fsflags;
+}
+
+/* Convert FS_IOC_{GET,SET}FLAGS flags to f2fs on-disk i_flags */
+static inline u32 f2fs_fsflags_to_iflags(u32 fsflags)
+{
+ u32 iflags = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++)
+ if (fsflags & f2fs_fsflags_map[i].fsflag)
+ iflags |= f2fs_fsflags_map[i].iflag;
+
+ return iflags;
+}
+
+static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags);
+
+ if (IS_ENCRYPTED(inode))
+ fsflags |= FS_ENCRYPT_FL;
+ if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
+ fsflags |= FS_INLINE_DATA_FL;
+ if (is_inode_flag_set(inode, FI_PIN_FILE))
+ fsflags |= FS_NOCOW_FL;
+
+ fsflags &= F2FS_GETTABLE_FS_FL;
+
+ return put_user(fsflags, (int __user *)arg);
+}
+
static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
- unsigned int flags;
+ u32 fsflags;
+ u32 iflags;
int ret;
if (!inode_owner_or_capable(inode))
return -EACCES;
- if (get_user(flags, (int __user *)arg))
+ if (get_user(fsflags, (int __user *)arg))
return -EFAULT;
+ if (fsflags & ~F2FS_GETTABLE_FS_FL)
+ return -EOPNOTSUPP;
+ fsflags &= F2FS_SETTABLE_FS_FL;
+
+ iflags = f2fs_fsflags_to_iflags(fsflags);
+ if (f2fs_mask_flags(inode->i_mode, iflags) != iflags)
+ return -EOPNOTSUPP;
+
ret = mnt_want_write_file(filp);
if (ret)
return ret;
inode_lock(inode);
- ret = __f2fs_ioc_setflags(inode, flags);
-
+ ret = f2fs_setflags_common(inode, iflags,
+ f2fs_fsflags_to_iflags(F2FS_SETTABLE_FS_FL));
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
@@ -1764,9 +1842,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
* f2fs_is_atomic_file.
*/
if (get_dirty_pages(inode))
- f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
- "Unexpected flush for atomic writes: ino=%lu, npages=%u",
- inode->i_ino, get_dirty_pages(inode));
+ f2fs_warn(F2FS_I_SB(inode), "Unexpected flush for atomic writes: ino=%lu, npages=%u",
+ inode->i_ino, get_dirty_pages(inode));
ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
if (ret) {
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@ -2201,8 +2278,7 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
return -EROFS;
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "Skipping Checkpoint. Checkpoints currently disabled.");
+ f2fs_info(sbi, "Skipping Checkpoint. Checkpoints currently disabled.");
return -EINVAL;
}
@@ -2291,7 +2367,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
if (!fragmented)
goto out;
- sec_num = (total + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi);
+ sec_num = DIV_ROUND_UP(total, BLKS_PER_SEC(sbi));
/*
* make sure there are enough free section for LFS allocation, this can
@@ -2587,10 +2663,8 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num ||
__is_large_section(sbi)) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "Can't flush %u in %d for segs_per_sec %u != 1",
- range.dev_num, sbi->s_ndevs,
- sbi->segs_per_sec);
+ f2fs_warn(sbi, "Can't flush %u in %d for segs_per_sec %u != 1",
+ range.dev_num, sbi->s_ndevs, sbi->segs_per_sec);
return -EINVAL;
}
@@ -2727,47 +2801,56 @@ static int f2fs_ioc_setproject(struct file *filp, __u32 projid)
}
#endif
-/* Transfer internal flags to xflags */
-static inline __u32 f2fs_iflags_to_xflags(unsigned long iflags)
-{
- __u32 xflags = 0;
-
- if (iflags & F2FS_SYNC_FL)
- xflags |= FS_XFLAG_SYNC;
- if (iflags & F2FS_IMMUTABLE_FL)
- xflags |= FS_XFLAG_IMMUTABLE;
- if (iflags & F2FS_APPEND_FL)
- xflags |= FS_XFLAG_APPEND;
- if (iflags & F2FS_NODUMP_FL)
- xflags |= FS_XFLAG_NODUMP;
- if (iflags & F2FS_NOATIME_FL)
- xflags |= FS_XFLAG_NOATIME;
- if (iflags & F2FS_PROJINHERIT_FL)
- xflags |= FS_XFLAG_PROJINHERIT;
+/* FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR support */
+
+/*
+ * To make a new on-disk f2fs i_flag gettable via FS_IOC_FSGETXATTR and settable
+ * via FS_IOC_FSSETXATTR, add an entry for it to f2fs_xflags_map[], and add its
+ * FS_XFLAG_* equivalent to F2FS_SUPPORTED_XFLAGS.
+ */
+
+static const struct {
+ u32 iflag;
+ u32 xflag;
+} f2fs_xflags_map[] = {
+ { F2FS_SYNC_FL, FS_XFLAG_SYNC },
+ { F2FS_IMMUTABLE_FL, FS_XFLAG_IMMUTABLE },
+ { F2FS_APPEND_FL, FS_XFLAG_APPEND },
+ { F2FS_NODUMP_FL, FS_XFLAG_NODUMP },
+ { F2FS_NOATIME_FL, FS_XFLAG_NOATIME },
+ { F2FS_PROJINHERIT_FL, FS_XFLAG_PROJINHERIT },
+};
+
+#define F2FS_SUPPORTED_XFLAGS ( \
+ FS_XFLAG_SYNC | \
+ FS_XFLAG_IMMUTABLE | \
+ FS_XFLAG_APPEND | \
+ FS_XFLAG_NODUMP | \
+ FS_XFLAG_NOATIME | \
+ FS_XFLAG_PROJINHERIT)
+
+/* Convert f2fs on-disk i_flags to FS_IOC_FS{GET,SET}XATTR flags */
+static inline u32 f2fs_iflags_to_xflags(u32 iflags)
+{
+ u32 xflags = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(f2fs_xflags_map); i++)
+ if (iflags & f2fs_xflags_map[i].iflag)
+ xflags |= f2fs_xflags_map[i].xflag;
+
return xflags;
}
-#define F2FS_SUPPORTED_FS_XFLAGS (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | \
- FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \
- FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT)
-
-/* Transfer xflags flags to internal */
-static inline unsigned long f2fs_xflags_to_iflags(__u32 xflags)
+/* Convert FS_IOC_FS{GET,SET}XATTR flags to f2fs on-disk i_flags */
+static inline u32 f2fs_xflags_to_iflags(u32 xflags)
{
- unsigned long iflags = 0;
+ u32 iflags = 0;
+ int i;
- if (xflags & FS_XFLAG_SYNC)
- iflags |= F2FS_SYNC_FL;
- if (xflags & FS_XFLAG_IMMUTABLE)
- iflags |= F2FS_IMMUTABLE_FL;
- if (xflags & FS_XFLAG_APPEND)
- iflags |= F2FS_APPEND_FL;
- if (xflags & FS_XFLAG_NODUMP)
- iflags |= F2FS_NODUMP_FL;
- if (xflags & FS_XFLAG_NOATIME)
- iflags |= F2FS_NOATIME_FL;
- if (xflags & FS_XFLAG_PROJINHERIT)
- iflags |= F2FS_PROJINHERIT_FL;
+ for (i = 0; i < ARRAY_SIZE(f2fs_xflags_map); i++)
+ if (xflags & f2fs_xflags_map[i].xflag)
+ iflags |= f2fs_xflags_map[i].iflag;
return iflags;
}
@@ -2779,8 +2862,7 @@ static int f2fs_ioc_fsgetxattr(struct file *filp, unsigned long arg)
struct fsxattr fa;
memset(&fa, 0, sizeof(struct fsxattr));
- fa.fsx_xflags = f2fs_iflags_to_xflags(fi->i_flags &
- F2FS_FL_USER_VISIBLE);
+ fa.fsx_xflags = f2fs_iflags_to_xflags(fi->i_flags);
if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)))
fa.fsx_projid = (__u32)from_kprojid(&init_user_ns,
@@ -2818,9 +2900,8 @@ static int f2fs_ioctl_check_project(struct inode *inode, struct fsxattr *fa)
static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
- struct f2fs_inode_info *fi = F2FS_I(inode);
struct fsxattr fa;
- unsigned int flags;
+ u32 iflags;
int err;
if (copy_from_user(&fa, (struct fsxattr __user *)arg, sizeof(fa)))
@@ -2830,11 +2911,11 @@ static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg)
if (!inode_owner_or_capable(inode))
return -EACCES;
- if (fa.fsx_xflags & ~F2FS_SUPPORTED_FS_XFLAGS)
+ if (fa.fsx_xflags & ~F2FS_SUPPORTED_XFLAGS)
return -EOPNOTSUPP;
- flags = f2fs_xflags_to_iflags(fa.fsx_xflags);
- if (f2fs_mask_flags(inode->i_mode, flags) != flags)
+ iflags = f2fs_xflags_to_iflags(fa.fsx_xflags);
+ if (f2fs_mask_flags(inode->i_mode, iflags) != iflags)
return -EOPNOTSUPP;
err = mnt_want_write_file(filp);
@@ -2845,9 +2926,8 @@ static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg)
err = f2fs_ioctl_check_project(inode, &fa);
if (err)
goto out;
- flags = (fi->i_flags & ~F2FS_FL_XFLAG_VISIBLE) |
- (flags & F2FS_FL_XFLAG_VISIBLE);
- err = __f2fs_ioc_setflags(inode, flags);
+ err = f2fs_setflags_common(inode, iflags,
+ f2fs_xflags_to_iflags(F2FS_SUPPORTED_XFLAGS));
if (err)
goto out;
@@ -2869,10 +2949,9 @@ int f2fs_pin_file_control(struct inode *inode, bool inc)
fi->i_gc_failures[GC_FAILURE_PIN] + 1);
if (fi->i_gc_failures[GC_FAILURE_PIN] > sbi->gc_pin_file_threshold) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: Enable GC = ino %lx after %x GC trials",
- __func__, inode->i_ino,
- fi->i_gc_failures[GC_FAILURE_PIN]);
+ f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials",
+ __func__, inode->i_ino,
+ fi->i_gc_failures[GC_FAILURE_PIN]);
clear_inode_flag(inode, FI_PIN_FILE);
return -EAGAIN;
}
@@ -2885,9 +2964,6 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
__u32 pin;
int ret = 0;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
if (get_user(pin, (__u32 __user *)arg))
return -EFAULT;
@@ -2980,6 +3056,27 @@ static int f2fs_ioc_precache_extents(struct file *filp, unsigned long arg)
return f2fs_precache_extents(file_inode(filp));
}
+static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
+ __u64 block_count;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (f2fs_readonly(sbi->sb))
+ return -EROFS;
+
+ if (copy_from_user(&block_count, (void __user *)arg,
+ sizeof(block_count)))
+ return -EFAULT;
+
+ ret = f2fs_resize_fs(sbi, block_count);
+
+ return ret;
+}
+
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
@@ -3036,6 +3133,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return f2fs_ioc_set_pin_file(filp, arg);
case F2FS_IOC_PRECACHE_EXTENTS:
return f2fs_ioc_precache_extents(filp, arg);
+ case F2FS_IOC_RESIZE_FS:
+ return f2fs_ioc_resize_fs(filp, arg);
default:
return -ENOTTY;
}
@@ -3149,6 +3248,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case F2FS_IOC_GET_PIN_FILE:
case F2FS_IOC_SET_PIN_FILE:
case F2FS_IOC_PRECACHE_EXTENTS:
+ case F2FS_IOC_RESIZE_FS:
break;
default:
return -ENOIOCTLCMD;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 963fb4571fd9..6691f526fa40 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -311,10 +311,11 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
struct sit_info *sm = SIT_I(sbi);
struct victim_sel_policy p;
unsigned int secno, last_victim;
- unsigned int last_segment = MAIN_SEGS(sbi);
+ unsigned int last_segment;
unsigned int nsearched = 0;
mutex_lock(&dirty_i->seglist_lock);
+ last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec;
p.alloc_mode = alloc_mode;
select_policy(sbi, gc_type, type, &p);
@@ -387,7 +388,8 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
goto next;
/* Don't touch checkpointed data */
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
- get_ckpt_valid_blocks(sbi, segno)))
+ get_ckpt_valid_blocks(sbi, segno) &&
+ p.alloc_mode != SSR))
goto next;
if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
goto next;
@@ -404,7 +406,8 @@ next:
sm->last_victim[p.gc_mode] = last_victim + 1;
else
sm->last_victim[p.gc_mode] = segno + 1;
- sm->last_victim[p.gc_mode] %= MAIN_SEGS(sbi);
+ sm->last_victim[p.gc_mode] %=
+ (MAIN_SECS(sbi) * sbi->segs_per_sec);
break;
}
}
@@ -615,9 +618,8 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
}
if (sum->version != dni->version) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: valid data with mismatched node version.",
- __func__);
+ f2fs_warn(sbi, "%s: valid data with mismatched node version.",
+ __func__);
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
@@ -658,7 +660,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
dn.data_blkaddr = ei.blk + index - ei.fofs;
if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
DATA_GENERIC_ENHANCE_READ))) {
- err = -EFAULT;
+ err = -EFSCORRUPTED;
goto put_page;
}
goto got_it;
@@ -676,7 +678,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
}
if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
DATA_GENERIC_ENHANCE))) {
- err = -EFAULT;
+ err = -EFSCORRUPTED;
goto put_page;
}
got_it:
@@ -1180,9 +1182,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
sum = page_address(sum_page);
if (type != GET_SUM_TYPE((&sum->footer))) {
- f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent segment (%u) "
- "type [%d, %d] in SSA and SIT",
- segno, type, GET_SUM_TYPE((&sum->footer)));
+ f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT",
+ segno, type, GET_SUM_TYPE((&sum->footer)));
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_stop_checkpoint(sbi, false);
goto skip;
@@ -1360,3 +1361,176 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
SIT_I(sbi)->last_victim[ALLOC_NEXT] =
GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
}
+
+static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start,
+ unsigned int end)
+{
+ int type;
+ unsigned int segno, next_inuse;
+ int err = 0;
+
+ /* Move out cursegs from the target range */
+ for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
+ allocate_segment_for_resize(sbi, type, start, end);
+
+ /* do GC to move out valid blocks in the range */
+ for (segno = start; segno <= end; segno += sbi->segs_per_sec) {
+ struct gc_inode_list gc_list = {
+ .ilist = LIST_HEAD_INIT(gc_list.ilist),
+ .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
+ };
+
+ mutex_lock(&sbi->gc_mutex);
+ do_garbage_collect(sbi, segno, &gc_list, FG_GC);
+ mutex_unlock(&sbi->gc_mutex);
+ put_gc_inode(&gc_list);
+
+ if (get_valid_blocks(sbi, segno, true))
+ return -EAGAIN;
+ }
+
+ err = f2fs_sync_fs(sbi->sb, 1);
+ if (err)
+ return err;
+
+ next_inuse = find_next_inuse(FREE_I(sbi), end + 1, start);
+ if (next_inuse <= end) {
+ f2fs_err(sbi, "segno %u should be free but still inuse!",
+ next_inuse);
+ f2fs_bug_on(sbi, 1);
+ }
+ return err;
+}
+
+static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs)
+{
+ struct f2fs_super_block *raw_sb = F2FS_RAW_SUPER(sbi);
+ int section_count = le32_to_cpu(raw_sb->section_count);
+ int segment_count = le32_to_cpu(raw_sb->segment_count);
+ int segment_count_main = le32_to_cpu(raw_sb->segment_count_main);
+ long long block_count = le64_to_cpu(raw_sb->block_count);
+ int segs = secs * sbi->segs_per_sec;
+
+ raw_sb->section_count = cpu_to_le32(section_count + secs);
+ raw_sb->segment_count = cpu_to_le32(segment_count + segs);
+ raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs);
+ raw_sb->block_count = cpu_to_le64(block_count +
+ (long long)segs * sbi->blocks_per_seg);
+}
+
+static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
+{
+ int segs = secs * sbi->segs_per_sec;
+ long long user_block_count =
+ le64_to_cpu(F2FS_CKPT(sbi)->user_block_count);
+
+ SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs;
+ MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs;
+ FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs;
+ FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs;
+ F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count +
+ (long long)segs * sbi->blocks_per_seg);
+}
+
+int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
+{
+ __u64 old_block_count, shrunk_blocks;
+ unsigned int secs;
+ int gc_mode, gc_type;
+ int err = 0;
+ __u32 rem;
+
+ old_block_count = le64_to_cpu(F2FS_RAW_SUPER(sbi)->block_count);
+ if (block_count > old_block_count)
+ return -EINVAL;
+
+ /* new fs size should align to section size */
+ div_u64_rem(block_count, BLKS_PER_SEC(sbi), &rem);
+ if (rem)
+ return -EINVAL;
+
+ if (block_count == old_block_count)
+ return 0;
+
+ if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
+ f2fs_err(sbi, "Should run fsck to repair first.");
+ return -EFSCORRUPTED;
+ }
+
+ if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+ f2fs_err(sbi, "Checkpoint should be enabled.");
+ return -EINVAL;
+ }
+
+ freeze_bdev(sbi->sb->s_bdev);
+
+ shrunk_blocks = old_block_count - block_count;
+ secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi));
+ spin_lock(&sbi->stat_lock);
+ if (shrunk_blocks + valid_user_blocks(sbi) +
+ sbi->current_reserved_blocks + sbi->unusable_block_count +
+ F2FS_OPTION(sbi).root_reserved_blocks > sbi->user_block_count)
+ err = -ENOSPC;
+ else
+ sbi->user_block_count -= shrunk_blocks;
+ spin_unlock(&sbi->stat_lock);
+ if (err) {
+ thaw_bdev(sbi->sb->s_bdev, sbi->sb);
+ return err;
+ }
+
+ mutex_lock(&sbi->resize_mutex);
+ set_sbi_flag(sbi, SBI_IS_RESIZEFS);
+
+ mutex_lock(&DIRTY_I(sbi)->seglist_lock);
+
+ MAIN_SECS(sbi) -= secs;
+
+ for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++)
+ if (SIT_I(sbi)->last_victim[gc_mode] >=
+ MAIN_SECS(sbi) * sbi->segs_per_sec)
+ SIT_I(sbi)->last_victim[gc_mode] = 0;
+
+ for (gc_type = BG_GC; gc_type <= FG_GC; gc_type++)
+ if (sbi->next_victim_seg[gc_type] >=
+ MAIN_SECS(sbi) * sbi->segs_per_sec)
+ sbi->next_victim_seg[gc_type] = NULL_SEGNO;
+
+ mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
+
+ err = free_segment_range(sbi, MAIN_SECS(sbi) * sbi->segs_per_sec,
+ MAIN_SEGS(sbi) - 1);
+ if (err)
+ goto out;
+
+ update_sb_metadata(sbi, -secs);
+
+ err = f2fs_commit_super(sbi, false);
+ if (err) {
+ update_sb_metadata(sbi, secs);
+ goto out;
+ }
+
+ update_fs_metadata(sbi, -secs);
+ clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
+ err = f2fs_sync_fs(sbi->sb, 1);
+ if (err) {
+ update_fs_metadata(sbi, secs);
+ update_sb_metadata(sbi, secs);
+ f2fs_commit_super(sbi, false);
+ }
+out:
+ if (err) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_err(sbi, "resize_fs failed, should run fsck to repair!");
+
+ MAIN_SECS(sbi) += secs;
+ spin_lock(&sbi->stat_lock);
+ sbi->user_block_count += shrunk_blocks;
+ spin_unlock(&sbi->stat_lock);
+ }
+ clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
+ mutex_unlock(&sbi->resize_mutex);
+ thaw_bdev(sbi->sb->s_bdev, sbi->sb);
+ return err;
+}
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 404d2462a0fe..3613efca8c00 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -140,11 +140,9 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
if (unlikely(dn->data_blkaddr != NEW_ADDR)) {
f2fs_put_dnode(dn);
set_sbi_flag(fio.sbi, SBI_NEED_FSCK);
- f2fs_msg(fio.sbi->sb, KERN_WARNING,
- "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, "
- "run fsck to fix.",
- __func__, dn->inode->i_ino, dn->data_blkaddr);
- return -EINVAL;
+ f2fs_warn(fio.sbi, "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.",
+ __func__, dn->inode->i_ino, dn->data_blkaddr);
+ return -EFSCORRUPTED;
}
f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page));
@@ -383,11 +381,9 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
if (unlikely(dn.data_blkaddr != NEW_ADDR)) {
f2fs_put_dnode(&dn);
set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK);
- f2fs_msg(F2FS_P_SB(page)->sb, KERN_WARNING,
- "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, "
- "run fsck to fix.",
- __func__, dir->i_ino, dn.data_blkaddr);
- err = -EINVAL;
+ f2fs_warn(F2FS_P_SB(page), "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.",
+ __func__, dir->i_ino, dn.data_blkaddr);
+ err = -EFSCORRUPTED;
goto out;
}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index ccb02226dd2c..a33d7a849b2d 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -74,7 +74,7 @@ static int __written_first_block(struct f2fs_sb_info *sbi,
if (!__is_valid_data_blkaddr(addr))
return 1;
if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC_ENHANCE))
- return -EFAULT;
+ return -EFSCORRUPTED;
return 0;
}
@@ -176,9 +176,8 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page)
calculated = f2fs_inode_chksum(sbi, page);
if (provided != calculated)
- f2fs_msg(sbi->sb, KERN_WARNING,
- "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x",
- page->index, ino_of_node(page), provided, calculated);
+ f2fs_warn(sbi, "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x",
+ page->index, ino_of_node(page), provided, calculated);
return provided == calculated;
}
@@ -202,50 +201,41 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks);
if (!iblocks) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, "
- "run fsck to fix.",
- __func__, inode->i_ino, iblocks);
+ f2fs_warn(sbi, "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, run fsck to fix.",
+ __func__, inode->i_ino, iblocks);
return false;
}
if (ino_of_node(node_page) != nid_of_node(node_page)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: corrupted inode footer i_ino=%lx, ino,nid: "
- "[%u, %u] run fsck to fix.",
- __func__, inode->i_ino,
- ino_of_node(node_page), nid_of_node(node_page));
+ f2fs_warn(sbi, "%s: corrupted inode footer i_ino=%lx, ino,nid: [%u, %u] run fsck to fix.",
+ __func__, inode->i_ino,
+ ino_of_node(node_page), nid_of_node(node_page));
return false;
}
if (f2fs_sb_has_flexible_inline_xattr(sbi)
&& !f2fs_has_extra_attr(inode)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: corrupted inode ino=%lx, run fsck to fix.",
- __func__, inode->i_ino);
+ f2fs_warn(sbi, "%s: corrupted inode ino=%lx, run fsck to fix.",
+ __func__, inode->i_ino);
return false;
}
if (f2fs_has_extra_attr(inode) &&
!f2fs_sb_has_extra_attr(sbi)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: inode (ino=%lx) is with extra_attr, "
- "but extra_attr feature is off",
- __func__, inode->i_ino);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) is with extra_attr, but extra_attr feature is off",
+ __func__, inode->i_ino);
return false;
}
if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE ||
fi->i_extra_isize % sizeof(__le32)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, "
- "max: %zu",
- __func__, inode->i_ino, fi->i_extra_isize,
- F2FS_TOTAL_EXTRA_ATTR_SIZE);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, max: %zu",
+ __func__, inode->i_ino, fi->i_extra_isize,
+ F2FS_TOTAL_EXTRA_ATTR_SIZE);
return false;
}
@@ -255,11 +245,9 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
(!fi->i_inline_xattr_size ||
fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: inode (ino=%lx) has corrupted "
- "i_inline_xattr_size: %d, max: %zu",
- __func__, inode->i_ino, fi->i_inline_xattr_size,
- MAX_INLINE_XATTR_SIZE);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %zu",
+ __func__, inode->i_ino, fi->i_inline_xattr_size,
+ MAX_INLINE_XATTR_SIZE);
return false;
}
@@ -272,11 +260,9 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
!f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
DATA_GENERIC_ENHANCE))) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: inode (ino=%lx) extent info [%u, %u, %u] "
- "is incorrect, run fsck to fix",
- __func__, inode->i_ino,
- ei->blk, ei->fofs, ei->len);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
+ __func__, inode->i_ino,
+ ei->blk, ei->fofs, ei->len);
return false;
}
}
@@ -284,19 +270,15 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
if (f2fs_has_inline_data(inode) &&
(!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: inode (ino=%lx, mode=%u) should not have "
- "inline_data, run fsck to fix",
- __func__, inode->i_ino, inode->i_mode);
+ f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix",
+ __func__, inode->i_ino, inode->i_mode);
return false;
}
if (f2fs_has_inline_dentry(inode) && !S_ISDIR(inode->i_mode)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: inode (ino=%lx, mode=%u) should not have "
- "inline_dentry, run fsck to fix",
- __func__, inode->i_ino, inode->i_mode);
+ f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_dentry, run fsck to fix",
+ __func__, inode->i_ino, inode->i_mode);
return false;
}
@@ -343,6 +325,8 @@ static int do_read_inode(struct inode *inode)
le16_to_cpu(ri->i_gc_failures);
fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid);
fi->i_flags = le32_to_cpu(ri->i_flags);
+ if (S_ISREG(inode->i_mode))
+ fi->i_flags &= ~F2FS_PROJINHERIT_FL;
fi->flags = 0;
fi->i_advise = ri->i_advise;
fi->i_pino = le32_to_cpu(ri->i_pino);
@@ -374,7 +358,7 @@ static int do_read_inode(struct inode *inode)
if (!sanity_check_inode(inode, node_page)) {
f2fs_put_page(node_page, 1);
- return -EINVAL;
+ return -EFSCORRUPTED;
}
/* check data exist */
@@ -783,8 +767,7 @@ void f2fs_handle_failed_inode(struct inode *inode)
err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "May loss orphan inode, run fsck to fix.");
+ f2fs_warn(sbi, "May loss orphan inode, run fsck to fix.");
goto out;
}
@@ -792,8 +775,7 @@ void f2fs_handle_failed_inode(struct inode *inode)
err = f2fs_acquire_orphan_inode(sbi);
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "Too many orphan inodes, run fsck to fix.");
+ f2fs_warn(sbi, "Too many orphan inodes, run fsck to fix.");
} else {
f2fs_add_orphan_inode(inode);
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 0f77f9242751..c5b99042e6f2 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -385,9 +385,8 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino)
int err = 0;
if (f2fs_readonly(sbi->sb)) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "skip recovering inline_dots inode (ino:%lu, pino:%u) "
- "in readonly mountpoint", dir->i_ino, pino);
+ f2fs_info(sbi, "skip recovering inline_dots inode (ino:%lu, pino:%u) in readonly mountpoint",
+ dir->i_ino, pino);
return 0;
}
@@ -484,9 +483,8 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
if (IS_ENCRYPTED(dir) &&
(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
!fscrypt_has_permitted_context(dir, inode)) {
- f2fs_msg(inode->i_sb, KERN_WARNING,
- "Inconsistent encryption contexts: %lu/%lu",
- dir->i_ino, inode->i_ino);
+ f2fs_warn(F2FS_I_SB(inode), "Inconsistent encryption contexts: %lu/%lu",
+ dir->i_ino, inode->i_ino);
err = -EPERM;
goto out_iput;
}
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 18a038a2a9fa..a18b2a895771 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -34,10 +34,9 @@ int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
{
if (unlikely(nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: out-of-range nid=%x, run fsck to fix.",
- __func__, nid);
- return -EINVAL;
+ f2fs_warn(sbi, "%s: out-of-range nid=%x, run fsck to fix.",
+ __func__, nid);
+ return -EFSCORRUPTED;
}
return 0;
}
@@ -1189,10 +1188,8 @@ int f2fs_remove_inode_page(struct inode *inode)
}
if (unlikely(inode->i_blocks != 0 && inode->i_blocks != 8)) {
- f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
- "Inconsistent i_blocks, ino:%lu, iblocks:%llu",
- inode->i_ino,
- (unsigned long long)inode->i_blocks);
+ f2fs_warn(F2FS_I_SB(inode), "Inconsistent i_blocks, ino:%lu, iblocks:%llu",
+ inode->i_ino, (unsigned long long)inode->i_blocks);
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
}
@@ -1291,7 +1288,7 @@ static int read_node_page(struct page *page, int op_flags)
if (PageUptodate(page)) {
if (!f2fs_inode_chksum_verify(sbi, page)) {
ClearPageUptodate(page);
- return -EBADMSG;
+ return -EFSBADCRC;
}
return LOCKED_PAGE;
}
@@ -1375,16 +1372,15 @@ repeat:
}
if (!f2fs_inode_chksum_verify(sbi, page)) {
- err = -EBADMSG;
+ err = -EFSBADCRC;
goto out_err;
}
page_hit:
if(unlikely(nid != nid_of_node(page))) {
- f2fs_msg(sbi->sb, KERN_WARNING, "inconsistent node block, "
- "nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
- nid, nid_of_node(page), ino_of_node(page),
- ofs_of_node(page), cpver_of_node(page),
- next_blkaddr_of_node(page));
+ f2fs_warn(sbi, "inconsistent node block, nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
+ nid, nid_of_node(page), ino_of_node(page),
+ ofs_of_node(page), cpver_of_node(page),
+ next_blkaddr_of_node(page));
err = -EINVAL;
out_err:
ClearPageUptodate(page);
@@ -1752,9 +1748,8 @@ continue_unlock:
break;
}
if (!ret && atomic && !marked) {
- f2fs_msg(sbi->sb, KERN_DEBUG,
- "Retry to write fsync mark: ino=%u, idx=%lx",
- ino, last_page->index);
+ f2fs_debug(sbi, "Retry to write fsync mark: ino=%u, idx=%lx",
+ ino, last_page->index);
lock_page(last_page);
f2fs_wait_on_page_writeback(last_page, NODE, true, true);
set_page_dirty(last_page);
@@ -2304,8 +2299,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
if (ret) {
up_read(&nm_i->nat_tree_lock);
f2fs_bug_on(sbi, !mount);
- f2fs_msg(sbi->sb, KERN_ERR,
- "NAT is corrupt, run fsck to fix it");
+ f2fs_err(sbi, "NAT is corrupt, run fsck to fix it");
return ret;
}
}
@@ -2725,7 +2719,7 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
i = 1;
}
for (; i < NAT_ENTRY_PER_BLOCK; i++) {
- if (nat_blk->entries[i].block_addr != NULL_ADDR)
+ if (le32_to_cpu(nat_blk->entries[i].block_addr) != NULL_ADDR)
valid++;
}
if (valid == 0) {
@@ -2915,7 +2909,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
nm_i->full_nat_bits = nm_i->nat_bits + 8;
nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
- f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
+ f2fs_notice(sbi, "Found nat_bits in checkpoint");
return 0;
}
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index e04f82b3f4fc..783773e4560d 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -188,10 +188,9 @@ out:
name = "<encrypted>";
else
name = raw_inode->i_name;
- f2fs_msg(inode->i_sb, KERN_NOTICE,
- "%s: ino = %x, name = %s, dir = %lx, err = %d",
- __func__, ino_of_node(ipage), name,
- IS_ERR(dir) ? 0 : dir->i_ino, err);
+ f2fs_notice(F2FS_I_SB(inode), "%s: ino = %x, name = %s, dir = %lx, err = %d",
+ __func__, ino_of_node(ipage), name,
+ IS_ERR(dir) ? 0 : dir->i_ino, err);
return err;
}
@@ -292,9 +291,8 @@ static int recover_inode(struct inode *inode, struct page *page)
else
name = F2FS_INODE(page)->i_name;
- f2fs_msg(inode->i_sb, KERN_NOTICE,
- "recover_inode: ino = %x, name = %s, inline = %x",
- ino_of_node(page), name, raw->i_inline);
+ f2fs_notice(F2FS_I_SB(inode), "recover_inode: ino = %x, name = %s, inline = %x",
+ ino_of_node(page), name, raw->i_inline);
return 0;
}
@@ -371,10 +369,9 @@ next:
/* sanity check in order to detect looped node chain */
if (++loop_cnt >= free_blocks ||
blkaddr == next_blkaddr_of_node(page)) {
- f2fs_msg(sbi->sb, KERN_NOTICE,
- "%s: detect looped node chain, "
- "blkaddr:%u, next:%u",
- __func__, blkaddr, next_blkaddr_of_node(page));
+ f2fs_notice(sbi, "%s: detect looped node chain, blkaddr:%u, next:%u",
+ __func__, blkaddr,
+ next_blkaddr_of_node(page));
f2fs_put_page(page, 1);
err = -EINVAL;
break;
@@ -553,11 +550,10 @@ retry_dn:
f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
if (ofs_of_node(dn.node_page) != ofs_of_node(page)) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u",
- inode->i_ino, ofs_of_node(dn.node_page),
- ofs_of_node(page));
- err = -EFAULT;
+ f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u",
+ inode->i_ino, ofs_of_node(dn.node_page),
+ ofs_of_node(page));
+ err = -EFSCORRUPTED;
goto err;
}
@@ -569,13 +565,13 @@ retry_dn:
if (__is_valid_data_blkaddr(src) &&
!f2fs_is_valid_blkaddr(sbi, src, META_POR)) {
- err = -EFAULT;
+ err = -EFSCORRUPTED;
goto err;
}
if (__is_valid_data_blkaddr(dest) &&
!f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
- err = -EFAULT;
+ err = -EFSCORRUPTED;
goto err;
}
@@ -642,11 +638,9 @@ retry_prev:
err:
f2fs_put_dnode(&dn);
out:
- f2fs_msg(sbi->sb, KERN_NOTICE,
- "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d",
- inode->i_ino,
- file_keep_isize(inode) ? "keep" : "recover",
- recovered, err);
+ f2fs_notice(sbi, "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d",
+ inode->i_ino, file_keep_isize(inode) ? "keep" : "recover",
+ recovered, err);
return err;
}
@@ -734,8 +728,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
#endif
if (s_flags & SB_RDONLY) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "recover fsync data on readonly fs");
+ f2fs_info(sbi, "recover fsync data on readonly fs");
sbi->sb->s_flags &= ~SB_RDONLY;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 8dee063c833f..a661ac32e829 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -546,9 +546,13 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
if (test_opt(sbi, DATA_FLUSH)) {
struct blk_plug plug;
+ mutex_lock(&sbi->flush_lock);
+
blk_start_plug(&plug);
f2fs_sync_dirty_inodes(sbi, FILE_INODE);
blk_finish_plug(&plug);
+
+ mutex_unlock(&sbi->flush_lock);
}
f2fs_sync_fs(sbi->sb, true);
stat_inc_bg_cp_count(sbi->stat_info);
@@ -869,11 +873,14 @@ void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
mutex_unlock(&dirty_i->seglist_lock);
}
-int f2fs_disable_cp_again(struct f2fs_sb_info *sbi)
+block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
{
+ int ovp_hole_segs =
+ (overprovision_segments(sbi) - reserved_segments(sbi));
+ block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg;
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- block_t ovp = overprovision_segments(sbi) << sbi->log_blocks_per_seg;
block_t holes[2] = {0, 0}; /* DATA and NODE */
+ block_t unusable;
struct seg_entry *se;
unsigned int segno;
@@ -887,10 +894,20 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi)
}
mutex_unlock(&dirty_i->seglist_lock);
- if (holes[DATA] > ovp || holes[NODE] > ovp)
+ unusable = holes[DATA] > holes[NODE] ? holes[DATA] : holes[NODE];
+ if (unusable > ovp_holes)
+ return unusable - ovp_holes;
+ return 0;
+}
+
+int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)
+{
+ int ovp_hole_segs =
+ (overprovision_segments(sbi) - reserved_segments(sbi));
+ if (unusable > F2FS_OPTION(sbi).unusable_cap)
return -EAGAIN;
if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
- dirty_segments(sbi) > overprovision_segments(sbi))
+ dirty_segments(sbi) > ovp_hole_segs)
return -EAGAIN;
return 0;
}
@@ -1480,6 +1497,10 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
list_for_each_entry_safe(dc, tmp, pend_list, list) {
f2fs_bug_on(sbi, dc->state != D_PREP);
+ if (dpolicy->timeout != 0 &&
+ f2fs_time_over(sbi, dpolicy->timeout))
+ break;
+
if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
!is_idle(sbi, DISCARD_TIME)) {
io_interrupted = true;
@@ -1740,8 +1761,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
devi = f2fs_target_device_index(sbi, blkstart);
if (blkstart < FDEV(devi).start_blk ||
blkstart > FDEV(devi).end_blk) {
- f2fs_msg(sbi->sb, KERN_ERR, "Invalid block %x",
- blkstart);
+ f2fs_err(sbi, "Invalid block %x", blkstart);
return -EIO;
}
blkstart -= FDEV(devi).start_blk;
@@ -1754,10 +1774,9 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
if (sector & (bdev_zone_sectors(bdev) - 1) ||
nr_sects != bdev_zone_sectors(bdev)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
- devi, sbi->s_ndevs ? FDEV(devi).path: "",
- blkstart, blklen);
+ f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
+ devi, sbi->s_ndevs ? FDEV(devi).path : "",
+ blkstart, blklen);
return -EIO;
}
trace_f2fs_issue_reset_zone(bdev, blkstart);
@@ -2121,15 +2140,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
mir_exist = f2fs_test_and_set_bit(offset,
se->cur_valid_map_mir);
if (unlikely(exist != mir_exist)) {
- f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
- "when setting bitmap, blk:%u, old bit:%d",
- blkaddr, exist);
+ f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d",
+ blkaddr, exist);
f2fs_bug_on(sbi, 1);
}
#endif
if (unlikely(exist)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Bitmap was wrongly set, blk:%u", blkaddr);
+ f2fs_err(sbi, "Bitmap was wrongly set, blk:%u",
+ blkaddr);
f2fs_bug_on(sbi, 1);
se->valid_blocks--;
del = 0;
@@ -2150,15 +2168,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
mir_exist = f2fs_test_and_clear_bit(offset,
se->cur_valid_map_mir);
if (unlikely(exist != mir_exist)) {
- f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
- "when clearing bitmap, blk:%u, old bit:%d",
- blkaddr, exist);
+ f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d",
+ blkaddr, exist);
f2fs_bug_on(sbi, 1);
}
#endif
if (unlikely(!exist)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Bitmap was wrongly cleared, blk:%u", blkaddr);
+ f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u",
+ blkaddr);
f2fs_bug_on(sbi, 1);
se->valid_blocks++;
del = 0;
@@ -2640,6 +2657,39 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
stat_inc_seg_type(sbi, curseg);
}
+void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
+ unsigned int start, unsigned int end)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ unsigned int segno;
+
+ down_read(&SM_I(sbi)->curseg_lock);
+ mutex_lock(&curseg->curseg_mutex);
+ down_write(&SIT_I(sbi)->sentry_lock);
+
+ segno = CURSEG_I(sbi, type)->segno;
+ if (segno < start || segno > end)
+ goto unlock;
+
+ if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
+ change_curseg(sbi, type);
+ else
+ new_curseg(sbi, type, true);
+
+ stat_inc_seg_type(sbi, curseg);
+
+ locate_dirty_segment(sbi, segno);
+unlock:
+ up_write(&SIT_I(sbi)->sentry_lock);
+
+ if (segno != curseg->segno)
+ f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u",
+ type, segno, curseg->segno);
+
+ mutex_unlock(&curseg->curseg_mutex);
+ up_read(&SM_I(sbi)->curseg_lock);
+}
+
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
{
struct curseg_info *curseg;
@@ -2772,9 +2822,8 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
goto out;
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "Found FS corruption, run fsck to fix.");
- return -EIO;
+ f2fs_warn(sbi, "Found FS corruption, run fsck to fix.");
+ return -EFSCORRUPTED;
}
/* start/end segment number in main_area */
@@ -3197,12 +3246,17 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- return -EFAULT;
+ f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
+ __func__, segno);
+ return -EFSCORRUPTED;
}
stat_inc_inplace_blocks(fio->sbi);
- err = f2fs_submit_page_bio(fio);
+ if (fio->bio)
+ err = f2fs_merge_page_bio(fio);
+ else
+ err = f2fs_submit_page_bio(fio);
if (!err) {
update_device_state(fio);
f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
@@ -3393,6 +3447,11 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi)
seg_i = CURSEG_I(sbi, i);
segno = le32_to_cpu(ckpt->cur_data_segno[i]);
blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
+ if (blk_off > ENTRIES_IN_SUM) {
+ f2fs_bug_on(sbi, 1);
+ f2fs_put_page(page, 1);
+ return -EFAULT;
+ }
seg_i->next_segno = segno;
reset_curseg(sbi, i, 0);
seg_i->alloc_type = ckpt->alloc_type[i];
@@ -3530,8 +3589,11 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
/* sanity check for summary blocks */
if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
- sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES)
+ sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) {
+ f2fs_err(sbi, "invalid journal entries nats %u sits %u\n",
+ nats_in_cursum(nat_j), sits_in_cursum(sit_j));
return -EINVAL;
+ }
return 0;
}
@@ -3762,7 +3824,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
struct f2fs_journal *journal = curseg->journal;
struct sit_entry_set *ses, *tmp;
struct list_head *head = &SM_I(sbi)->sit_entry_set;
- bool to_journal = true;
+ bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS);
struct seg_entry *se;
down_write(&sit_i->sentry_lock);
@@ -3781,7 +3843,8 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
* entries, remove all entries from journal and add and account
* them in sit entry set.
*/
- if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL))
+ if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) ||
+ !to_journal)
remove_sits_in_journal(sbi);
/*
@@ -4096,11 +4159,10 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
start = le32_to_cpu(segno_in_journal(journal, i));
if (start >= MAIN_SEGS(sbi)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong journal entry on segno %u",
- start);
+ f2fs_err(sbi, "Wrong journal entry on segno %u",
+ start);
set_sbi_flag(sbi, SBI_NEED_FSCK);
- err = -EINVAL;
+ err = -EFSCORRUPTED;
break;
}
@@ -4137,11 +4199,10 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
up_read(&curseg->journal_rwsem);
if (!err && total_node_blocks != valid_node_count(sbi)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "SIT is corrupted node# %u vs %u",
- total_node_blocks, valid_node_count(sbi));
+ f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
+ total_node_blocks, valid_node_count(sbi));
set_sbi_flag(sbi, SBI_NEED_FSCK);
- err = -EINVAL;
+ err = -EFSCORRUPTED;
}
return err;
@@ -4232,6 +4293,39 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi)
return init_victim_secmap(sbi);
}
+static int sanity_check_curseg(struct f2fs_sb_info *sbi)
+{
+ int i;
+
+ /*
+ * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
+ * In LFS curseg, all blkaddr after .next_blkoff should be unused.
+ */
+ for (i = 0; i < NO_CHECK_TYPE; i++) {
+ struct curseg_info *curseg = CURSEG_I(sbi, i);
+ struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
+ unsigned int blkofs = curseg->next_blkoff;
+
+ if (f2fs_test_bit(blkofs, se->cur_valid_map))
+ goto out;
+
+ if (curseg->alloc_type == SSR)
+ continue;
+
+ for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) {
+ if (!f2fs_test_bit(blkofs, se->cur_valid_map))
+ continue;
+out:
+ f2fs_err(sbi,
+ "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
+ i, curseg->segno, curseg->alloc_type,
+ curseg->next_blkoff, blkofs);
+ return -EFSCORRUPTED;
+ }
+ }
+ return 0;
+}
+
/*
* Update min, max modified time for cost-benefit GC algorithm
*/
@@ -4327,6 +4421,10 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
if (err)
return err;
+ err = sanity_check_curseg(sbi);
+ if (err)
+ return err;
+
init_min_max_mtime(sbi);
return 0;
}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 429007b8036e..b74602813a05 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -109,7 +109,7 @@
#define START_SEGNO(segno) \
(SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK)
#define SIT_BLK_CNT(sbi) \
- ((MAIN_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK)
+ DIV_ROUND_UP(MAIN_SEGS(sbi), SIT_ENTRY_PER_BLOCK)
#define f2fs_bitmap_size(nr) \
(BITS_TO_LONGS(nr) * sizeof(unsigned long))
@@ -693,21 +693,19 @@ static inline int check_block_count(struct f2fs_sb_info *sbi,
} while (cur_pos < sbi->blocks_per_seg);
if (unlikely(GET_SIT_VBLOCKS(raw_sit) != valid_blocks)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Mismatch valid blocks %d vs. %d",
- GET_SIT_VBLOCKS(raw_sit), valid_blocks);
+ f2fs_err(sbi, "Mismatch valid blocks %d vs. %d",
+ GET_SIT_VBLOCKS(raw_sit), valid_blocks);
set_sbi_flag(sbi, SBI_NEED_FSCK);
- return -EINVAL;
+ return -EFSCORRUPTED;
}
/* check segment usage, and check boundary of a given segment number */
if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg
|| segno > TOTAL_SEGS(sbi) - 1)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong valid blocks %d or segno %u",
- GET_SIT_VBLOCKS(raw_sit), segno);
+ f2fs_err(sbi, "Wrong valid blocks %d or segno %u",
+ GET_SIT_VBLOCKS(raw_sit), segno);
set_sbi_flag(sbi, SBI_NEED_FSCK);
- return -EINVAL;
+ return -EFSCORRUPTED;
}
return 0;
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 4e91ba6c8a2e..6de6cda44031 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -136,7 +136,10 @@ enum {
Opt_alloc,
Opt_fsync,
Opt_test_dummy_encryption,
- Opt_checkpoint,
+ Opt_checkpoint_disable,
+ Opt_checkpoint_disable_cap,
+ Opt_checkpoint_disable_cap_perc,
+ Opt_checkpoint_enable,
Opt_err,
};
@@ -195,45 +198,52 @@ static match_table_t f2fs_tokens = {
{Opt_alloc, "alloc_mode=%s"},
{Opt_fsync, "fsync_mode=%s"},
{Opt_test_dummy_encryption, "test_dummy_encryption"},
- {Opt_checkpoint, "checkpoint=%s"},
+ {Opt_checkpoint_disable, "checkpoint=disable"},
+ {Opt_checkpoint_disable_cap, "checkpoint=disable:%u"},
+ {Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"},
+ {Opt_checkpoint_enable, "checkpoint=enable"},
{Opt_err, NULL},
};
-void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
+void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...)
{
struct va_format vaf;
va_list args;
+ int level;
va_start(args, fmt);
- vaf.fmt = fmt;
+
+ level = printk_get_level(fmt);
+ vaf.fmt = printk_skip_level(fmt);
vaf.va = &args;
- printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf);
+ printk("%c%cF2FS-fs (%s): %pV\n",
+ KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf);
+
va_end(args);
}
static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
{
- block_t limit = (sbi->user_block_count << 1) / 1000;
+ block_t limit = min((sbi->user_block_count << 1) / 1000,
+ sbi->user_block_count - sbi->reserved_blocks);
/* limit is 0.2% */
if (test_opt(sbi, RESERVE_ROOT) &&
F2FS_OPTION(sbi).root_reserved_blocks > limit) {
F2FS_OPTION(sbi).root_reserved_blocks = limit;
- f2fs_msg(sbi->sb, KERN_INFO,
- "Reduce reserved blocks for root = %u",
- F2FS_OPTION(sbi).root_reserved_blocks);
+ f2fs_info(sbi, "Reduce reserved blocks for root = %u",
+ F2FS_OPTION(sbi).root_reserved_blocks);
}
if (!test_opt(sbi, RESERVE_ROOT) &&
(!uid_eq(F2FS_OPTION(sbi).s_resuid,
make_kuid(&init_user_ns, F2FS_DEF_RESUID)) ||
!gid_eq(F2FS_OPTION(sbi).s_resgid,
make_kgid(&init_user_ns, F2FS_DEF_RESGID))))
- f2fs_msg(sbi->sb, KERN_INFO,
- "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root",
- from_kuid_munged(&init_user_ns,
- F2FS_OPTION(sbi).s_resuid),
- from_kgid_munged(&init_user_ns,
- F2FS_OPTION(sbi).s_resgid));
+ f2fs_info(sbi, "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root",
+ from_kuid_munged(&init_user_ns,
+ F2FS_OPTION(sbi).s_resuid),
+ from_kgid_munged(&init_user_ns,
+ F2FS_OPTION(sbi).s_resgid));
}
static void init_once(void *foo)
@@ -254,35 +264,29 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype,
int ret = -EINVAL;
if (sb_any_quota_loaded(sb) && !F2FS_OPTION(sbi).s_qf_names[qtype]) {
- f2fs_msg(sb, KERN_ERR,
- "Cannot change journaled "
- "quota options when quota turned on");
+ f2fs_err(sbi, "Cannot change journaled quota options when quota turned on");
return -EINVAL;
}
if (f2fs_sb_has_quota_ino(sbi)) {
- f2fs_msg(sb, KERN_INFO,
- "QUOTA feature is enabled, so ignore qf_name");
+ f2fs_info(sbi, "QUOTA feature is enabled, so ignore qf_name");
return 0;
}
qname = match_strdup(args);
if (!qname) {
- f2fs_msg(sb, KERN_ERR,
- "Not enough memory for storing quotafile name");
+ f2fs_err(sbi, "Not enough memory for storing quotafile name");
return -ENOMEM;
}
if (F2FS_OPTION(sbi).s_qf_names[qtype]) {
if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0)
ret = 0;
else
- f2fs_msg(sb, KERN_ERR,
- "%s quota file already specified",
+ f2fs_err(sbi, "%s quota file already specified",
QTYPE2NAME(qtype));
goto errout;
}
if (strchr(qname, '/')) {
- f2fs_msg(sb, KERN_ERR,
- "quotafile must be on filesystem root");
+ f2fs_err(sbi, "quotafile must be on filesystem root");
goto errout;
}
F2FS_OPTION(sbi).s_qf_names[qtype] = qname;
@@ -298,8 +302,7 @@ static int f2fs_clear_qf_name(struct super_block *sb, int qtype)
struct f2fs_sb_info *sbi = F2FS_SB(sb);
if (sb_any_quota_loaded(sb) && F2FS_OPTION(sbi).s_qf_names[qtype]) {
- f2fs_msg(sb, KERN_ERR, "Cannot change journaled quota options"
- " when quota turned on");
+ f2fs_err(sbi, "Cannot change journaled quota options when quota turned on");
return -EINVAL;
}
kvfree(F2FS_OPTION(sbi).s_qf_names[qtype]);
@@ -315,8 +318,7 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
* to support legacy quotas in quota files.
*/
if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi)) {
- f2fs_msg(sbi->sb, KERN_ERR, "Project quota feature not enabled. "
- "Cannot enable project quota enforcement.");
+ f2fs_err(sbi, "Project quota feature not enabled. Cannot enable project quota enforcement.");
return -1;
}
if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
@@ -336,21 +338,18 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
if (test_opt(sbi, GRPQUOTA) || test_opt(sbi, USRQUOTA) ||
test_opt(sbi, PRJQUOTA)) {
- f2fs_msg(sbi->sb, KERN_ERR, "old and new quota "
- "format mixing");
+ f2fs_err(sbi, "old and new quota format mixing");
return -1;
}
if (!F2FS_OPTION(sbi).s_jquota_fmt) {
- f2fs_msg(sbi->sb, KERN_ERR, "journaled quota format "
- "not specified");
+ f2fs_err(sbi, "journaled quota format not specified");
return -1;
}
}
if (f2fs_sb_has_quota_ino(sbi) && F2FS_OPTION(sbi).s_jquota_fmt) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "QUOTA feature is enabled, so ignore jquota_fmt");
+ f2fs_info(sbi, "QUOTA feature is enabled, so ignore jquota_fmt");
F2FS_OPTION(sbi).s_jquota_fmt = 0;
}
return 0;
@@ -418,8 +417,7 @@ static int parse_options(struct super_block *sb, char *options)
break;
case Opt_nodiscard:
if (f2fs_sb_has_blkzoned(sbi)) {
- f2fs_msg(sb, KERN_WARNING,
- "discard is required for zoned block devices");
+ f2fs_warn(sbi, "discard is required for zoned block devices");
return -EINVAL;
}
clear_opt(sbi, DISCARD);
@@ -451,20 +449,16 @@ static int parse_options(struct super_block *sb, char *options)
break;
#else
case Opt_user_xattr:
- f2fs_msg(sb, KERN_INFO,
- "user_xattr options not supported");
+ f2fs_info(sbi, "user_xattr options not supported");
break;
case Opt_nouser_xattr:
- f2fs_msg(sb, KERN_INFO,
- "nouser_xattr options not supported");
+ f2fs_info(sbi, "nouser_xattr options not supported");
break;
case Opt_inline_xattr:
- f2fs_msg(sb, KERN_INFO,
- "inline_xattr options not supported");
+ f2fs_info(sbi, "inline_xattr options not supported");
break;
case Opt_noinline_xattr:
- f2fs_msg(sb, KERN_INFO,
- "noinline_xattr options not supported");
+ f2fs_info(sbi, "noinline_xattr options not supported");
break;
#endif
#ifdef CONFIG_F2FS_FS_POSIX_ACL
@@ -476,10 +470,10 @@ static int parse_options(struct super_block *sb, char *options)
break;
#else
case Opt_acl:
- f2fs_msg(sb, KERN_INFO, "acl options not supported");
+ f2fs_info(sbi, "acl options not supported");
break;
case Opt_noacl:
- f2fs_msg(sb, KERN_INFO, "noacl options not supported");
+ f2fs_info(sbi, "noacl options not supported");
break;
#endif
case Opt_active_logs:
@@ -529,9 +523,8 @@ static int parse_options(struct super_block *sb, char *options)
if (args->from && match_int(args, &arg))
return -EINVAL;
if (test_opt(sbi, RESERVE_ROOT)) {
- f2fs_msg(sb, KERN_INFO,
- "Preserve previous reserve_root=%u",
- F2FS_OPTION(sbi).root_reserved_blocks);
+ f2fs_info(sbi, "Preserve previous reserve_root=%u",
+ F2FS_OPTION(sbi).root_reserved_blocks);
} else {
F2FS_OPTION(sbi).root_reserved_blocks = arg;
set_opt(sbi, RESERVE_ROOT);
@@ -542,8 +535,7 @@ static int parse_options(struct super_block *sb, char *options)
return -EINVAL;
uid = make_kuid(current_user_ns(), arg);
if (!uid_valid(uid)) {
- f2fs_msg(sb, KERN_ERR,
- "Invalid uid value %d", arg);
+ f2fs_err(sbi, "Invalid uid value %d", arg);
return -EINVAL;
}
F2FS_OPTION(sbi).s_resuid = uid;
@@ -553,8 +545,7 @@ static int parse_options(struct super_block *sb, char *options)
return -EINVAL;
gid = make_kgid(current_user_ns(), arg);
if (!gid_valid(gid)) {
- f2fs_msg(sb, KERN_ERR,
- "Invalid gid value %d", arg);
+ f2fs_err(sbi, "Invalid gid value %d", arg);
return -EINVAL;
}
F2FS_OPTION(sbi).s_resgid = gid;
@@ -567,9 +558,7 @@ static int parse_options(struct super_block *sb, char *options)
if (strlen(name) == 8 &&
!strncmp(name, "adaptive", 8)) {
if (f2fs_sb_has_blkzoned(sbi)) {
- f2fs_msg(sb, KERN_WARNING,
- "adaptive mode is not allowed with "
- "zoned block device feature");
+ f2fs_warn(sbi, "adaptive mode is not allowed with zoned block device feature");
kvfree(name);
return -EINVAL;
}
@@ -587,9 +576,8 @@ static int parse_options(struct super_block *sb, char *options)
if (args->from && match_int(args, &arg))
return -EINVAL;
if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_PAGES)) {
- f2fs_msg(sb, KERN_WARNING,
- "Not support %d, larger than %d",
- 1 << arg, BIO_MAX_PAGES);
+ f2fs_warn(sbi, "Not support %d, larger than %d",
+ 1 << arg, BIO_MAX_PAGES);
return -EINVAL;
}
F2FS_OPTION(sbi).write_io_size_bits = arg;
@@ -610,13 +598,11 @@ static int parse_options(struct super_block *sb, char *options)
break;
#else
case Opt_fault_injection:
- f2fs_msg(sb, KERN_INFO,
- "fault_injection options not supported");
+ f2fs_info(sbi, "fault_injection options not supported");
break;
case Opt_fault_type:
- f2fs_msg(sb, KERN_INFO,
- "fault_type options not supported");
+ f2fs_info(sbi, "fault_type options not supported");
break;
#endif
case Opt_lazytime:
@@ -696,8 +682,7 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_jqfmt_vfsv0:
case Opt_jqfmt_vfsv1:
case Opt_noquota:
- f2fs_msg(sb, KERN_INFO,
- "quota operations not supported");
+ f2fs_info(sbi, "quota operations not supported");
break;
#endif
case Opt_whint:
@@ -759,39 +744,44 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_test_dummy_encryption:
#ifdef CONFIG_FS_ENCRYPTION
if (!f2fs_sb_has_encrypt(sbi)) {
- f2fs_msg(sb, KERN_ERR, "Encrypt feature is off");
+ f2fs_err(sbi, "Encrypt feature is off");
return -EINVAL;
}
F2FS_OPTION(sbi).test_dummy_encryption = true;
- f2fs_msg(sb, KERN_INFO,
- "Test dummy encryption mode enabled");
+ f2fs_info(sbi, "Test dummy encryption mode enabled");
#else
- f2fs_msg(sb, KERN_INFO,
- "Test dummy encryption mount option ignored");
+ f2fs_info(sbi, "Test dummy encryption mount option ignored");
#endif
break;
- case Opt_checkpoint:
- name = match_strdup(&args[0]);
- if (!name)
- return -ENOMEM;
-
- if (strlen(name) == 6 &&
- !strncmp(name, "enable", 6)) {
- clear_opt(sbi, DISABLE_CHECKPOINT);
- } else if (strlen(name) == 7 &&
- !strncmp(name, "disable", 7)) {
- set_opt(sbi, DISABLE_CHECKPOINT);
- } else {
- kvfree(name);
+ case Opt_checkpoint_disable_cap_perc:
+ if (args->from && match_int(args, &arg))
return -EINVAL;
- }
- kvfree(name);
+ if (arg < 0 || arg > 100)
+ return -EINVAL;
+ if (arg == 100)
+ F2FS_OPTION(sbi).unusable_cap =
+ sbi->user_block_count;
+ else
+ F2FS_OPTION(sbi).unusable_cap =
+ (sbi->user_block_count / 100) * arg;
+ set_opt(sbi, DISABLE_CHECKPOINT);
+ break;
+ case Opt_checkpoint_disable_cap:
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+ F2FS_OPTION(sbi).unusable_cap = arg;
+ set_opt(sbi, DISABLE_CHECKPOINT);
+ break;
+ case Opt_checkpoint_disable:
+ set_opt(sbi, DISABLE_CHECKPOINT);
+ break;
+ case Opt_checkpoint_enable:
+ clear_opt(sbi, DISABLE_CHECKPOINT);
break;
default:
- f2fs_msg(sb, KERN_ERR,
- "Unrecognized mount option \"%s\" or missing value",
- p);
+ f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
+ p);
return -EINVAL;
}
}
@@ -800,23 +790,18 @@ static int parse_options(struct super_block *sb, char *options)
return -EINVAL;
#else
if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sbi->sb)) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "Filesystem with quota feature cannot be mounted RDWR "
- "without CONFIG_QUOTA");
+ f2fs_info(sbi, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA");
return -EINVAL;
}
if (f2fs_sb_has_project_quota(sbi) && !f2fs_readonly(sbi->sb)) {
- f2fs_msg(sb, KERN_ERR,
- "Filesystem with project quota feature cannot be "
- "mounted RDWR without CONFIG_QUOTA");
+ f2fs_err(sbi, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA");
return -EINVAL;
}
#endif
if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
- f2fs_msg(sb, KERN_ERR,
- "Should set mode=lfs with %uKB-sized IO",
- F2FS_IO_SIZE_KB(sbi));
+ f2fs_err(sbi, "Should set mode=lfs with %uKB-sized IO",
+ F2FS_IO_SIZE_KB(sbi));
return -EINVAL;
}
@@ -825,15 +810,11 @@ static int parse_options(struct super_block *sb, char *options)
if (!f2fs_sb_has_extra_attr(sbi) ||
!f2fs_sb_has_flexible_inline_xattr(sbi)) {
- f2fs_msg(sb, KERN_ERR,
- "extra_attr or flexible_inline_xattr "
- "feature is off");
+ f2fs_err(sbi, "extra_attr or flexible_inline_xattr feature is off");
return -EINVAL;
}
if (!test_opt(sbi, INLINE_XATTR)) {
- f2fs_msg(sb, KERN_ERR,
- "inline_xattr_size option should be "
- "set with inline_xattr option");
+ f2fs_err(sbi, "inline_xattr_size option should be set with inline_xattr option");
return -EINVAL;
}
@@ -842,16 +823,14 @@ static int parse_options(struct super_block *sb, char *options)
if (F2FS_OPTION(sbi).inline_xattr_size < min_size ||
F2FS_OPTION(sbi).inline_xattr_size > max_size) {
- f2fs_msg(sb, KERN_ERR,
- "inline xattr size is out of range: %d ~ %d",
- min_size, max_size);
+ f2fs_err(sbi, "inline xattr size is out of range: %d ~ %d",
+ min_size, max_size);
return -EINVAL;
}
}
if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) {
- f2fs_msg(sb, KERN_ERR,
- "LFS not compatible with checkpoint=disable\n");
+ f2fs_err(sbi, "LFS not compatible with checkpoint=disable\n");
return -EINVAL;
}
@@ -1313,6 +1292,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",disable_roll_forward");
if (test_opt(sbi, DISCARD))
seq_puts(seq, ",discard");
+ else
+ seq_puts(seq, ",nodiscard");
if (test_opt(sbi, NOHEAP))
seq_puts(seq, ",no_heap");
else
@@ -1409,8 +1390,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_printf(seq, ",alloc_mode=%s", "reuse");
if (test_opt(sbi, DISABLE_CHECKPOINT))
- seq_puts(seq, ",checkpoint=disable");
-
+ seq_printf(seq, ",checkpoint=disable:%u",
+ F2FS_OPTION(sbi).unusable_cap);
if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX)
seq_printf(seq, ",fsync_mode=%s", "posix");
else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
@@ -1439,6 +1420,7 @@ static void default_options(struct f2fs_sb_info *sbi)
set_opt(sbi, EXTENT_CACHE);
set_opt(sbi, NOHEAP);
clear_opt(sbi, DISABLE_CHECKPOINT);
+ F2FS_OPTION(sbi).unusable_cap = 0;
sbi->sb->s_flags |= SB_LAZYTIME;
set_opt(sbi, FLUSH_MERGE);
set_opt(sbi, DISCARD);
@@ -1467,10 +1449,10 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
struct cp_control cpc;
int err = 0;
int ret;
+ block_t unusable;
if (s_flags & SB_RDONLY) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "checkpoint=disable on readonly fs");
+ f2fs_err(sbi, "checkpoint=disable on readonly fs");
return -EINVAL;
}
sbi->sb->s_flags |= SB_ACTIVE;
@@ -1494,7 +1476,8 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
goto restore_flag;
}
- if (f2fs_disable_cp_again(sbi)) {
+ unusable = f2fs_get_unusable_blocks(sbi);
+ if (f2fs_disable_cp_again(sbi, unusable)) {
err = -EAGAIN;
goto restore_flag;
}
@@ -1507,7 +1490,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
goto out_unlock;
spin_lock(&sbi->stat_lock);
- sbi->unusable_block_count = 0;
+ sbi->unusable_block_count = unusable;
spin_unlock(&sbi->stat_lock);
out_unlock:
@@ -1572,8 +1555,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
/* recover superblocks we couldn't write due to previous RO mount */
if (!(*flags & SB_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) {
err = f2fs_commit_super(sbi, false);
- f2fs_msg(sb, KERN_INFO,
- "Try to recover all the superblocks, ret: %d", err);
+ f2fs_info(sbi, "Try to recover all the superblocks, ret: %d",
+ err);
if (!err)
clear_sbi_flag(sbi, SBI_NEED_SB_WRITE);
}
@@ -1614,15 +1597,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
/* disallow enable/disable extent_cache dynamically */
if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
err = -EINVAL;
- f2fs_msg(sbi->sb, KERN_WARNING,
- "switch extent_cache option is not allowed");
+ f2fs_warn(sbi, "switch extent_cache option is not allowed");
goto restore_opts;
}
if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
err = -EINVAL;
- f2fs_msg(sbi->sb, KERN_WARNING,
- "disabling checkpoint not compatible with read-only");
+ f2fs_warn(sbi, "disabling checkpoint not compatible with read-only");
goto restore_opts;
}
@@ -1692,8 +1673,7 @@ skip:
restore_gc:
if (need_restart_gc) {
if (f2fs_start_gc_thread(sbi))
- f2fs_msg(sbi->sb, KERN_WARNING,
- "background gc thread has stopped");
+ f2fs_warn(sbi, "background gc thread has stopped");
} else if (need_stop_gc) {
f2fs_stop_gc_thread(sbi);
}
@@ -1832,8 +1812,7 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode)
static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
{
if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "quota sysfile may be corrupted, skip loading it");
+ f2fs_err(sbi, "quota sysfile may be corrupted, skip loading it");
return 0;
}
@@ -1849,8 +1828,7 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly)
if (f2fs_sb_has_quota_ino(sbi) && rdonly) {
err = f2fs_enable_quotas(sbi->sb);
if (err) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Cannot turn on quota_ino: %d", err);
+ f2fs_err(sbi, "Cannot turn on quota_ino: %d", err);
return 0;
}
return 1;
@@ -1863,8 +1841,8 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly)
enabled = 1;
continue;
}
- f2fs_msg(sbi->sb, KERN_ERR,
- "Cannot turn on quotas: %d on %d", err, i);
+ f2fs_err(sbi, "Cannot turn on quotas: %d on %d",
+ err, i);
}
}
return enabled;
@@ -1885,8 +1863,7 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id,
qf_inode = f2fs_iget(sb, qf_inum);
if (IS_ERR(qf_inode)) {
- f2fs_msg(sb, KERN_ERR,
- "Bad quota inode %u:%lu", type, qf_inum);
+ f2fs_err(F2FS_SB(sb), "Bad quota inode %u:%lu", type, qf_inum);
return PTR_ERR(qf_inode);
}
@@ -1899,17 +1876,17 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id,
static int f2fs_enable_quotas(struct super_block *sb)
{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
int type, err = 0;
unsigned long qf_inum;
bool quota_mopt[MAXQUOTAS] = {
- test_opt(F2FS_SB(sb), USRQUOTA),
- test_opt(F2FS_SB(sb), GRPQUOTA),
- test_opt(F2FS_SB(sb), PRJQUOTA),
+ test_opt(sbi, USRQUOTA),
+ test_opt(sbi, GRPQUOTA),
+ test_opt(sbi, PRJQUOTA),
};
if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) {
- f2fs_msg(sb, KERN_ERR,
- "quota file may be corrupted, skip loading it");
+ f2fs_err(sbi, "quota file may be corrupted, skip loading it");
return 0;
}
@@ -1922,10 +1899,8 @@ static int f2fs_enable_quotas(struct super_block *sb)
DQUOT_USAGE_ENABLED |
(quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
if (err) {
- f2fs_msg(sb, KERN_ERR,
- "Failed to enable quota tracking "
- "(type=%d, err=%d). Please run "
- "fsck to fix.", type, err);
+ f2fs_err(sbi, "Failed to enable quota tracking (type=%d, err=%d). Please run fsck to fix.",
+ type, err);
for (type--; type >= 0; type--)
dquot_quota_off(sb, type);
set_sbi_flag(F2FS_SB(sb),
@@ -1944,6 +1919,18 @@ int f2fs_quota_sync(struct super_block *sb, int type)
int cnt;
int ret;
+ /*
+ * do_quotactl
+ * f2fs_quota_sync
+ * down_read(quota_sem)
+ * dquot_writeback_dquots()
+ * f2fs_dquot_commit
+ * block_operation
+ * down_read(quota_sem)
+ */
+ f2fs_lock_op(sbi);
+
+ down_read(&sbi->quota_sem);
ret = dquot_writeback_dquots(sb, type);
if (ret)
goto out;
@@ -1981,6 +1968,8 @@ int f2fs_quota_sync(struct super_block *sb, int type)
out:
if (ret)
set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
+ up_read(&sbi->quota_sem);
+ f2fs_unlock_op(sbi);
return ret;
}
@@ -2045,10 +2034,8 @@ void f2fs_quota_off_umount(struct super_block *sb)
if (err) {
int ret = dquot_quota_off(sb, type);
- f2fs_msg(sb, KERN_ERR,
- "Fail to turn off disk quota "
- "(type: %d, err: %d, ret:%d), Please "
- "run fsck to fix it.", type, err, ret);
+ f2fs_err(F2FS_SB(sb), "Fail to turn off disk quota (type: %d, err: %d, ret:%d), Please run fsck to fix it.",
+ type, err, ret);
set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
}
}
@@ -2074,32 +2061,40 @@ static void f2fs_truncate_quota_inode_pages(struct super_block *sb)
static int f2fs_dquot_commit(struct dquot *dquot)
{
+ struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb);
int ret;
+ down_read(&sbi->quota_sem);
ret = dquot_commit(dquot);
if (ret < 0)
- set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ up_read(&sbi->quota_sem);
return ret;
}
static int f2fs_dquot_acquire(struct dquot *dquot)
{
+ struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb);
int ret;
+ down_read(&sbi->quota_sem);
ret = dquot_acquire(dquot);
if (ret < 0)
- set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
-
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ up_read(&sbi->quota_sem);
return ret;
}
static int f2fs_dquot_release(struct dquot *dquot)
{
+ struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb);
int ret;
+ down_read(&sbi->quota_sem);
ret = dquot_release(dquot);
if (ret < 0)
- set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ up_read(&sbi->quota_sem);
return ret;
}
@@ -2109,22 +2104,27 @@ static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
struct f2fs_sb_info *sbi = F2FS_SB(sb);
int ret;
+ down_read(&sbi->quota_sem);
ret = dquot_mark_dquot_dirty(dquot);
/* if we are using journalled quota */
if (is_journalled_quota(sbi))
set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
+ up_read(&sbi->quota_sem);
return ret;
}
static int f2fs_dquot_commit_info(struct super_block *sb, int type)
{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
int ret;
+ down_read(&sbi->quota_sem);
ret = dquot_commit_info(sb, type);
if (ret < 0)
- set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ up_read(&sbi->quota_sem);
return ret;
}
@@ -2341,55 +2341,49 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
(segment_count << log_blocks_per_seg);
if (segment0_blkaddr != cp_blkaddr) {
- f2fs_msg(sb, KERN_INFO,
- "Mismatch start address, segment0(%u) cp_blkaddr(%u)",
- segment0_blkaddr, cp_blkaddr);
+ f2fs_info(sbi, "Mismatch start address, segment0(%u) cp_blkaddr(%u)",
+ segment0_blkaddr, cp_blkaddr);
return true;
}
if (cp_blkaddr + (segment_count_ckpt << log_blocks_per_seg) !=
sit_blkaddr) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong CP boundary, start(%u) end(%u) blocks(%u)",
- cp_blkaddr, sit_blkaddr,
- segment_count_ckpt << log_blocks_per_seg);
+ f2fs_info(sbi, "Wrong CP boundary, start(%u) end(%u) blocks(%u)",
+ cp_blkaddr, sit_blkaddr,
+ segment_count_ckpt << log_blocks_per_seg);
return true;
}
if (sit_blkaddr + (segment_count_sit << log_blocks_per_seg) !=
nat_blkaddr) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong SIT boundary, start(%u) end(%u) blocks(%u)",
- sit_blkaddr, nat_blkaddr,
- segment_count_sit << log_blocks_per_seg);
+ f2fs_info(sbi, "Wrong SIT boundary, start(%u) end(%u) blocks(%u)",
+ sit_blkaddr, nat_blkaddr,
+ segment_count_sit << log_blocks_per_seg);
return true;
}
if (nat_blkaddr + (segment_count_nat << log_blocks_per_seg) !=
ssa_blkaddr) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong NAT boundary, start(%u) end(%u) blocks(%u)",
- nat_blkaddr, ssa_blkaddr,
- segment_count_nat << log_blocks_per_seg);
+ f2fs_info(sbi, "Wrong NAT boundary, start(%u) end(%u) blocks(%u)",
+ nat_blkaddr, ssa_blkaddr,
+ segment_count_nat << log_blocks_per_seg);
return true;
}
if (ssa_blkaddr + (segment_count_ssa << log_blocks_per_seg) !=
main_blkaddr) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong SSA boundary, start(%u) end(%u) blocks(%u)",
- ssa_blkaddr, main_blkaddr,
- segment_count_ssa << log_blocks_per_seg);
+ f2fs_info(sbi, "Wrong SSA boundary, start(%u) end(%u) blocks(%u)",
+ ssa_blkaddr, main_blkaddr,
+ segment_count_ssa << log_blocks_per_seg);
return true;
}
if (main_end_blkaddr > seg_end_blkaddr) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)",
- main_blkaddr,
- segment0_blkaddr +
- (segment_count << log_blocks_per_seg),
- segment_count_main << log_blocks_per_seg);
+ f2fs_info(sbi, "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)",
+ main_blkaddr,
+ segment0_blkaddr +
+ (segment_count << log_blocks_per_seg),
+ segment_count_main << log_blocks_per_seg);
return true;
} else if (main_end_blkaddr < seg_end_blkaddr) {
int err = 0;
@@ -2406,12 +2400,11 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
err = __f2fs_commit_super(bh, NULL);
res = err ? "failed" : "done";
}
- f2fs_msg(sb, KERN_INFO,
- "Fix alignment : %s, start(%u) end(%u) block(%u)",
- res, main_blkaddr,
- segment0_blkaddr +
- (segment_count << log_blocks_per_seg),
- segment_count_main << log_blocks_per_seg);
+ f2fs_info(sbi, "Fix alignment : %s, start(%u) end(%u) block(%u)",
+ res, main_blkaddr,
+ segment0_blkaddr +
+ (segment_count << log_blocks_per_seg),
+ segment_count_main << log_blocks_per_seg);
if (err)
return true;
}
@@ -2425,7 +2418,6 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
block_t total_sections, blocks_per_seg;
struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
(bh->b_data + F2FS_SUPER_OFFSET);
- struct super_block *sb = sbi->sb;
unsigned int blocksize;
size_t crc_offset = 0;
__u32 crc = 0;
@@ -2435,48 +2427,42 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
crc_offset = le32_to_cpu(raw_super->checksum_offset);
if (crc_offset !=
offsetof(struct f2fs_super_block, crc)) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid SB checksum offset: %zu",
- crc_offset);
+ f2fs_info(sbi, "Invalid SB checksum offset: %zu",
+ crc_offset);
return 1;
}
crc = le32_to_cpu(raw_super->crc);
if (!f2fs_crc_valid(sbi, crc, raw_super, crc_offset)) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid SB checksum value: %u", crc);
+ f2fs_info(sbi, "Invalid SB checksum value: %u", crc);
return 1;
}
}
if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) {
- f2fs_msg(sb, KERN_INFO,
- "Magic Mismatch, valid(0x%x) - read(0x%x)",
- F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic));
+ f2fs_info(sbi, "Magic Mismatch, valid(0x%x) - read(0x%x)",
+ F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic));
return 1;
}
/* Currently, support only 4KB page cache size */
if (F2FS_BLKSIZE != PAGE_SIZE) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid page_cache_size (%lu), supports only 4KB",
- PAGE_SIZE);
+ f2fs_info(sbi, "Invalid page_cache_size (%lu), supports only 4KB",
+ PAGE_SIZE);
return 1;
}
/* Currently, support only 4KB block size */
blocksize = 1 << le32_to_cpu(raw_super->log_blocksize);
if (blocksize != F2FS_BLKSIZE) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid blocksize (%u), supports only 4KB",
- blocksize);
+ f2fs_info(sbi, "Invalid blocksize (%u), supports only 4KB",
+ blocksize);
return 1;
}
/* check log blocks per segment */
if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid log blocks per segment (%u)",
- le32_to_cpu(raw_super->log_blocks_per_seg));
+ f2fs_info(sbi, "Invalid log blocks per segment (%u)",
+ le32_to_cpu(raw_super->log_blocks_per_seg));
return 1;
}
@@ -2485,17 +2471,16 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
F2FS_MAX_LOG_SECTOR_SIZE ||
le32_to_cpu(raw_super->log_sectorsize) <
F2FS_MIN_LOG_SECTOR_SIZE) {
- f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize (%u)",
- le32_to_cpu(raw_super->log_sectorsize));
+ f2fs_info(sbi, "Invalid log sectorsize (%u)",
+ le32_to_cpu(raw_super->log_sectorsize));
return 1;
}
if (le32_to_cpu(raw_super->log_sectors_per_block) +
le32_to_cpu(raw_super->log_sectorsize) !=
F2FS_MAX_LOG_SECTOR_SIZE) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid log sectors per block(%u) log sectorsize(%u)",
- le32_to_cpu(raw_super->log_sectors_per_block),
- le32_to_cpu(raw_super->log_sectorsize));
+ f2fs_info(sbi, "Invalid log sectors per block(%u) log sectorsize(%u)",
+ le32_to_cpu(raw_super->log_sectors_per_block),
+ le32_to_cpu(raw_super->log_sectorsize));
return 1;
}
@@ -2509,59 +2494,51 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
if (segment_count > F2FS_MAX_SEGMENT ||
segment_count < F2FS_MIN_SEGMENTS) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid segment count (%u)",
- segment_count);
+ f2fs_info(sbi, "Invalid segment count (%u)", segment_count);
return 1;
}
if (total_sections > segment_count ||
total_sections < F2FS_MIN_SEGMENTS ||
segs_per_sec > segment_count || !segs_per_sec) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid segment/section count (%u, %u x %u)",
- segment_count, total_sections, segs_per_sec);
+ f2fs_info(sbi, "Invalid segment/section count (%u, %u x %u)",
+ segment_count, total_sections, segs_per_sec);
return 1;
}
if ((segment_count / segs_per_sec) < total_sections) {
- f2fs_msg(sb, KERN_INFO,
- "Small segment_count (%u < %u * %u)",
- segment_count, segs_per_sec, total_sections);
+ f2fs_info(sbi, "Small segment_count (%u < %u * %u)",
+ segment_count, segs_per_sec, total_sections);
return 1;
}
if (segment_count > (le64_to_cpu(raw_super->block_count) >> 9)) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong segment_count / block_count (%u > %llu)",
- segment_count, le64_to_cpu(raw_super->block_count));
+ f2fs_info(sbi, "Wrong segment_count / block_count (%u > %llu)",
+ segment_count, le64_to_cpu(raw_super->block_count));
return 1;
}
if (secs_per_zone > total_sections || !secs_per_zone) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong secs_per_zone / total_sections (%u, %u)",
- secs_per_zone, total_sections);
+ f2fs_info(sbi, "Wrong secs_per_zone / total_sections (%u, %u)",
+ secs_per_zone, total_sections);
return 1;
}
if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION ||
raw_super->hot_ext_count > F2FS_MAX_EXTENSION ||
(le32_to_cpu(raw_super->extension_count) +
raw_super->hot_ext_count) > F2FS_MAX_EXTENSION) {
- f2fs_msg(sb, KERN_INFO,
- "Corrupted extension count (%u + %u > %u)",
- le32_to_cpu(raw_super->extension_count),
- raw_super->hot_ext_count,
- F2FS_MAX_EXTENSION);
+ f2fs_info(sbi, "Corrupted extension count (%u + %u > %u)",
+ le32_to_cpu(raw_super->extension_count),
+ raw_super->hot_ext_count,
+ F2FS_MAX_EXTENSION);
return 1;
}
if (le32_to_cpu(raw_super->cp_payload) >
(blocks_per_seg - F2FS_CP_PACKS)) {
- f2fs_msg(sb, KERN_INFO,
- "Insane cp_payload (%u > %u)",
- le32_to_cpu(raw_super->cp_payload),
- blocks_per_seg - F2FS_CP_PACKS);
+ f2fs_info(sbi, "Insane cp_payload (%u > %u)",
+ le32_to_cpu(raw_super->cp_payload),
+ blocks_per_seg - F2FS_CP_PACKS);
return 1;
}
@@ -2569,11 +2546,10 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
if (le32_to_cpu(raw_super->node_ino) != 1 ||
le32_to_cpu(raw_super->meta_ino) != 2 ||
le32_to_cpu(raw_super->root_ino) != 3) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)",
- le32_to_cpu(raw_super->node_ino),
- le32_to_cpu(raw_super->meta_ino),
- le32_to_cpu(raw_super->root_ino));
+ f2fs_info(sbi, "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)",
+ le32_to_cpu(raw_super->node_ino),
+ le32_to_cpu(raw_super->meta_ino),
+ le32_to_cpu(raw_super->root_ino));
return 1;
}
@@ -2617,8 +2593,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
if (unlikely(fsmeta < F2FS_MIN_SEGMENTS ||
ovp_segments == 0 || reserved_segments == 0)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong layout: check mkfs.f2fs version");
+ f2fs_err(sbi, "Wrong layout: check mkfs.f2fs version");
return 1;
}
@@ -2627,16 +2602,15 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
if (!user_block_count || user_block_count >=
segment_count_main << log_blocks_per_seg) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong user_block_count: %u", user_block_count);
+ f2fs_err(sbi, "Wrong user_block_count: %u",
+ user_block_count);
return 1;
}
valid_user_blocks = le64_to_cpu(ckpt->valid_block_count);
if (valid_user_blocks > user_block_count) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong valid_user_blocks: %u, user_block_count: %u",
- valid_user_blocks, user_block_count);
+ f2fs_err(sbi, "Wrong valid_user_blocks: %u, user_block_count: %u",
+ valid_user_blocks, user_block_count);
return 1;
}
@@ -2644,9 +2618,8 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
avail_node_count = sbi->total_node_count - sbi->nquota_files -
F2FS_RESERVED_NODE_NUM;
if (valid_node_count > avail_node_count) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong valid_node_count: %u, avail_node_count: %u",
- valid_node_count, avail_node_count);
+ f2fs_err(sbi, "Wrong valid_node_count: %u, avail_node_count: %u",
+ valid_node_count, avail_node_count);
return 1;
}
@@ -2660,10 +2633,9 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
for (j = i + 1; j < NR_CURSEG_NODE_TYPE; j++) {
if (le32_to_cpu(ckpt->cur_node_segno[i]) ==
le32_to_cpu(ckpt->cur_node_segno[j])) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Node segment (%u, %u) has the same "
- "segno: %u", i, j,
- le32_to_cpu(ckpt->cur_node_segno[i]));
+ f2fs_err(sbi, "Node segment (%u, %u) has the same segno: %u",
+ i, j,
+ le32_to_cpu(ckpt->cur_node_segno[i]));
return 1;
}
}
@@ -2675,10 +2647,9 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
for (j = i + 1; j < NR_CURSEG_DATA_TYPE; j++) {
if (le32_to_cpu(ckpt->cur_data_segno[i]) ==
le32_to_cpu(ckpt->cur_data_segno[j])) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Data segment (%u, %u) has the same "
- "segno: %u", i, j,
- le32_to_cpu(ckpt->cur_data_segno[i]));
+ f2fs_err(sbi, "Data segment (%u, %u) has the same segno: %u",
+ i, j,
+ le32_to_cpu(ckpt->cur_data_segno[i]));
return 1;
}
}
@@ -2687,10 +2658,9 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
for (j = i; j < NR_CURSEG_DATA_TYPE; j++) {
if (le32_to_cpu(ckpt->cur_node_segno[i]) ==
le32_to_cpu(ckpt->cur_data_segno[j])) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Data segment (%u) and Data segment (%u)"
- " has the same segno: %u", i, j,
- le32_to_cpu(ckpt->cur_node_segno[i]));
+ f2fs_err(sbi, "Data segment (%u) and Data segment (%u) has the same segno: %u",
+ i, j,
+ le32_to_cpu(ckpt->cur_node_segno[i]));
return 1;
}
}
@@ -2701,9 +2671,8 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 ||
nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong bitmap size: sit: %u, nat:%u",
- sit_bitmap_size, nat_bitmap_size);
+ f2fs_err(sbi, "Wrong bitmap size: sit: %u, nat:%u",
+ sit_bitmap_size, nat_bitmap_size);
return 1;
}
@@ -2712,14 +2681,22 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
if (cp_pack_start_sum < cp_payload + 1 ||
cp_pack_start_sum > blocks_per_seg - 1 -
NR_CURSEG_TYPE) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong cp_pack_start_sum: %u",
- cp_pack_start_sum);
+ f2fs_err(sbi, "Wrong cp_pack_start_sum: %u",
+ cp_pack_start_sum);
+ return 1;
+ }
+
+ if (__is_set_ckpt_flags(ckpt, CP_LARGE_NAT_BITMAP_FLAG) &&
+ le32_to_cpu(ckpt->checksum_offset) != CP_MIN_CHKSUM_OFFSET) {
+ f2fs_warn(sbi, "using deprecated layout of large_nat_bitmap, "
+ "please run fsck v1.13.0 or higher to repair, chksum_offset: %u, "
+ "fixed with patch: \"f2fs-tools: relocate chksum_offset for large_nat_bitmap feature\"",
+ le32_to_cpu(ckpt->checksum_offset));
return 1;
}
if (unlikely(f2fs_cp_error(sbi))) {
- f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
+ f2fs_err(sbi, "A bug case: need to run fsck");
return 1;
}
return 0;
@@ -2886,18 +2863,17 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi,
for (block = 0; block < 2; block++) {
bh = sb_bread(sb, block);
if (!bh) {
- f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock",
- block + 1);
+ f2fs_err(sbi, "Unable to read %dth superblock",
+ block + 1);
err = -EIO;
continue;
}
/* sanity checking of raw super */
if (sanity_check_raw_super(sbi, bh)) {
- f2fs_msg(sb, KERN_ERR,
- "Can't find valid F2FS filesystem in %dth superblock",
- block + 1);
- err = -EINVAL;
+ f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock",
+ block + 1);
+ err = -EFSCORRUPTED;
brelse(bh);
continue;
}
@@ -3026,36 +3002,32 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
#ifdef CONFIG_BLK_DEV_ZONED
if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
!f2fs_sb_has_blkzoned(sbi)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Zoned block device feature not enabled\n");
+ f2fs_err(sbi, "Zoned block device feature not enabled\n");
return -EINVAL;
}
if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) {
if (init_blkz_info(sbi, i)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Failed to initialize F2FS blkzone information");
+ f2fs_err(sbi, "Failed to initialize F2FS blkzone information");
return -EINVAL;
}
if (max_devices == 1)
break;
- f2fs_msg(sbi->sb, KERN_INFO,
- "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
- i, FDEV(i).path,
- FDEV(i).total_segments,
- FDEV(i).start_blk, FDEV(i).end_blk,
- bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ?
- "Host-aware" : "Host-managed");
+ f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
+ i, FDEV(i).path,
+ FDEV(i).total_segments,
+ FDEV(i).start_blk, FDEV(i).end_blk,
+ bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ?
+ "Host-aware" : "Host-managed");
continue;
}
#endif
- f2fs_msg(sbi->sb, KERN_INFO,
- "Mount Device [%2d]: %20s, %8u, %8x - %8x",
- i, FDEV(i).path,
- FDEV(i).total_segments,
- FDEV(i).start_blk, FDEV(i).end_blk);
- }
- f2fs_msg(sbi->sb, KERN_INFO,
- "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi));
+ f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x",
+ i, FDEV(i).path,
+ FDEV(i).total_segments,
+ FDEV(i).start_blk, FDEV(i).end_blk);
+ }
+ f2fs_info(sbi,
+ "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi));
return 0;
}
@@ -3101,7 +3073,7 @@ try_onemore:
/* Load the checksum driver */
sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0);
if (IS_ERR(sbi->s_chksum_driver)) {
- f2fs_msg(sb, KERN_ERR, "Cannot load crc32 driver.");
+ f2fs_err(sbi, "Cannot load crc32 driver.");
err = PTR_ERR(sbi->s_chksum_driver);
sbi->s_chksum_driver = NULL;
goto free_sbi;
@@ -3109,7 +3081,7 @@ try_onemore:
/* set a block size */
if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) {
- f2fs_msg(sb, KERN_ERR, "unable to set blocksize");
+ f2fs_err(sbi, "unable to set blocksize");
goto free_sbi;
}
@@ -3133,8 +3105,7 @@ try_onemore:
*/
#ifndef CONFIG_BLK_DEV_ZONED
if (f2fs_sb_has_blkzoned(sbi)) {
- f2fs_msg(sb, KERN_ERR,
- "Zoned block device support is not enabled");
+ f2fs_err(sbi, "Zoned block device support is not enabled");
err = -EOPNOTSUPP;
goto free_sb_buf;
}
@@ -3158,10 +3129,7 @@ try_onemore:
#ifdef CONFIG_QUOTA
sb->dq_op = &f2fs_quota_operations;
- if (f2fs_sb_has_quota_ino(sbi))
- sb->s_qcop = &dquot_quotactl_sysfile_ops;
- else
- sb->s_qcop = &f2fs_quotactl_ops;
+ sb->s_qcop = &f2fs_quotactl_ops;
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
if (f2fs_sb_has_quota_ino(sbi)) {
@@ -3190,6 +3158,7 @@ try_onemore:
mutex_init(&sbi->gc_mutex);
mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
+ mutex_init(&sbi->resize_mutex);
init_rwsem(&sbi->node_write);
init_rwsem(&sbi->node_change);
@@ -3225,6 +3194,7 @@ try_onemore:
}
init_rwsem(&sbi->cp_rwsem);
+ init_rwsem(&sbi->quota_sem);
init_waitqueue_head(&sbi->cp_wait);
init_sb_info(sbi);
@@ -3244,14 +3214,14 @@ try_onemore:
/* get an inode for meta space */
sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
if (IS_ERR(sbi->meta_inode)) {
- f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
+ f2fs_err(sbi, "Failed to read F2FS meta data inode");
err = PTR_ERR(sbi->meta_inode);
goto free_io_dummy;
}
err = f2fs_get_valid_checkpoint(sbi);
if (err) {
- f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint");
+ f2fs_err(sbi, "Failed to get valid F2FS checkpoint");
goto free_meta_inode;
}
@@ -3262,10 +3232,13 @@ try_onemore:
sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_QUICK_INTERVAL;
}
+ if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_FSCK_FLAG))
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+
/* Initialize device list */
err = f2fs_scan_devices(sbi);
if (err) {
- f2fs_msg(sb, KERN_ERR, "Failed to find devices");
+ f2fs_err(sbi, "Failed to find devices");
goto free_devices;
}
@@ -3285,6 +3258,7 @@ try_onemore:
INIT_LIST_HEAD(&sbi->inode_list[i]);
spin_lock_init(&sbi->inode_lock[i]);
}
+ mutex_init(&sbi->flush_lock);
f2fs_init_extent_cache_info(sbi);
@@ -3295,14 +3269,14 @@ try_onemore:
/* setup f2fs internal modules */
err = f2fs_build_segment_manager(sbi);
if (err) {
- f2fs_msg(sb, KERN_ERR,
- "Failed to initialize F2FS segment manager");
+ f2fs_err(sbi, "Failed to initialize F2FS segment manager (%d)",
+ err);
goto free_sm;
}
err = f2fs_build_node_manager(sbi);
if (err) {
- f2fs_msg(sb, KERN_ERR,
- "Failed to initialize F2FS node manager");
+ f2fs_err(sbi, "Failed to initialize F2FS node manager (%d)",
+ err);
goto free_nm;
}
@@ -3327,7 +3301,7 @@ try_onemore:
/* get an inode for node space */
sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi));
if (IS_ERR(sbi->node_inode)) {
- f2fs_msg(sb, KERN_ERR, "Failed to read node inode");
+ f2fs_err(sbi, "Failed to read node inode");
err = PTR_ERR(sbi->node_inode);
goto free_stats;
}
@@ -3335,7 +3309,7 @@ try_onemore:
/* read root inode and dentry */
root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
if (IS_ERR(root)) {
- f2fs_msg(sb, KERN_ERR, "Failed to read root inode");
+ f2fs_err(sbi, "Failed to read root inode");
err = PTR_ERR(root);
goto free_node_inode;
}
@@ -3361,8 +3335,7 @@ try_onemore:
if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb)) {
err = f2fs_enable_quotas(sb);
if (err)
- f2fs_msg(sb, KERN_ERR,
- "Cannot turn on quotas: error %d", err);
+ f2fs_err(sbi, "Cannot turn on quotas: error %d", err);
}
#endif
/* if there are nt orphan nodes free them */
@@ -3382,13 +3355,10 @@ try_onemore:
if (f2fs_hw_is_readonly(sbi)) {
if (!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
err = -EROFS;
- f2fs_msg(sb, KERN_ERR,
- "Need to recover fsync data, but "
- "write access unavailable");
+ f2fs_err(sbi, "Need to recover fsync data, but write access unavailable");
goto free_meta;
}
- f2fs_msg(sbi->sb, KERN_INFO, "write access "
- "unavailable, skipping recovery");
+ f2fs_info(sbi, "write access unavailable, skipping recovery");
goto reset_checkpoint;
}
@@ -3403,8 +3373,8 @@ try_onemore:
if (err != -ENOMEM)
skip_recovery = true;
need_fsck = true;
- f2fs_msg(sb, KERN_ERR,
- "Cannot recover all fsync data errno=%d", err);
+ f2fs_err(sbi, "Cannot recover all fsync data errno=%d",
+ err);
goto free_meta;
}
} else {
@@ -3412,8 +3382,7 @@ try_onemore:
if (!f2fs_readonly(sb) && err > 0) {
err = -EINVAL;
- f2fs_msg(sb, KERN_ERR,
- "Need to recover fsync data");
+ f2fs_err(sbi, "Need to recover fsync data");
goto free_meta;
}
}
@@ -3444,17 +3413,16 @@ reset_checkpoint:
/* recover broken superblock */
if (recovery) {
err = f2fs_commit_super(sbi, true);
- f2fs_msg(sb, KERN_INFO,
- "Try to recover %dth superblock, ret: %d",
- sbi->valid_super_block ? 1 : 2, err);
+ f2fs_info(sbi, "Try to recover %dth superblock, ret: %d",
+ sbi->valid_super_block ? 1 : 2, err);
}
f2fs_join_shrinker(sbi);
f2fs_tuning_parameters(sbi);
- f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx",
- cur_cp_version(F2FS_CKPT(sbi)));
+ f2fs_notice(sbi, "Mounted with checkpoint version = %llx",
+ cur_cp_version(F2FS_CKPT(sbi)));
f2fs_update_time(sbi, CP_TIME);
f2fs_update_time(sbi, REQ_TIME);
clear_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 729f46a3c9ee..3aeacd0aacfd 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -68,6 +68,20 @@ static ssize_t dirty_segments_show(struct f2fs_attr *a,
(unsigned long long)(dirty_segments(sbi)));
}
+static ssize_t unusable_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ block_t unusable;
+
+ if (test_opt(sbi, DISABLE_CHECKPOINT))
+ unusable = sbi->unusable_block_count;
+ else
+ unusable = f2fs_get_unusable_blocks(sbi);
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)unusable);
+}
+
+
static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
@@ -440,6 +454,7 @@ F2FS_GENERAL_RO_ATTR(dirty_segments);
F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
F2FS_GENERAL_RO_ATTR(features);
F2FS_GENERAL_RO_ATTR(current_reserved_blocks);
+F2FS_GENERAL_RO_ATTR(unusable);
#ifdef CONFIG_FS_ENCRYPTION
F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO);
@@ -495,12 +510,14 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(inject_type),
#endif
ATTR_LIST(dirty_segments),
+ ATTR_LIST(unusable),
ATTR_LIST(lifetime_write_kbytes),
ATTR_LIST(features),
ATTR_LIST(reserved_blocks),
ATTR_LIST(current_reserved_blocks),
NULL,
};
+ATTRIBUTE_GROUPS(f2fs);
static struct attribute *f2fs_feat_attrs[] = {
#ifdef CONFIG_FS_ENCRYPTION
@@ -520,6 +537,7 @@ static struct attribute *f2fs_feat_attrs[] = {
ATTR_LIST(sb_checksum),
NULL,
};
+ATTRIBUTE_GROUPS(f2fs_feat);
static const struct sysfs_ops f2fs_attr_ops = {
.show = f2fs_attr_show,
@@ -527,7 +545,7 @@ static const struct sysfs_ops f2fs_attr_ops = {
};
static struct kobj_type f2fs_sb_ktype = {
- .default_attrs = f2fs_attrs,
+ .default_groups = f2fs_groups,
.sysfs_ops = &f2fs_attr_ops,
.release = f2fs_sb_release,
};
@@ -541,7 +559,7 @@ static struct kset f2fs_kset = {
};
static struct kobj_type f2fs_feat_ktype = {
- .default_attrs = f2fs_feat_attrs,
+ .default_groups = f2fs_feat_groups,
.sysfs_ops = &f2fs_attr_ops,
};
@@ -566,8 +584,7 @@ static int __maybe_unused segment_info_seq_show(struct seq_file *seq,
if ((i % 10) == 0)
seq_printf(seq, "%-10d", i);
- seq_printf(seq, "%d|%-3u", se->type,
- get_valid_blocks(sbi, i, false));
+ seq_printf(seq, "%d|%-3u", se->type, se->valid_blocks);
if ((i % 10) == 9 || i == (total_segs - 1))
seq_putc(seq, '\n');
else
@@ -593,8 +610,7 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq,
struct seg_entry *se = get_seg_entry(sbi, i);
seq_printf(seq, "%-10d", i);
- seq_printf(seq, "%d|%-3u|", se->type,
- get_valid_blocks(sbi, i, false));
+ seq_printf(seq, "%d|%-3u|", se->type, se->valid_blocks);
for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++)
seq_printf(seq, " %.2x", se->cur_valid_map[j]);
seq_putc(seq, '\n');
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index e791741d193b..b32c45621679 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -346,7 +346,10 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
*xe = __find_xattr(cur_addr, last_txattr_addr, index, len, name);
if (!*xe) {
- err = -EFAULT;
+ f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr",
+ inode->i_ino);
+ set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
+ err = -EFSCORRUPTED;
goto out;
}
check:
@@ -622,7 +625,10 @@ static int __f2fs_setxattr(struct inode *inode, int index,
/* find entry with wanted name. */
here = __find_xattr(base_addr, last_base_addr, index, len, name);
if (!here) {
- error = -EFAULT;
+ f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr",
+ inode->i_ino);
+ set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
+ error = -EFSCORRUPTED;
goto exit;
}
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
index 67b7bda5647a..72ebfe578f40 100644
--- a/fs/fscache/object-list.c
+++ b/fs/fscache/object-list.c
@@ -317,7 +317,7 @@ static void fscache_objlist_config(struct fscache_objlist_data *data)
const char *buf;
int len;
- key = request_key(&key_type_user, "fscache:objlist", NULL, NULL);
+ key = request_key(&key_type_user, "fscache:objlist", NULL);
if (IS_ERR(key))
goto no_config;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index b8f9c83835d5..5ae2828beb00 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -3112,9 +3112,9 @@ out:
return err;
}
-static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- size_t len, unsigned int flags)
+static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t len, unsigned int flags)
{
struct fuse_file *ff_in = file_in->private_data;
struct fuse_file *ff_out = file_out->private_data;
@@ -3142,6 +3142,9 @@ static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in,
if (fc->no_copy_file_range)
return -EOPNOTSUPP;
+ if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
+ return -EXDEV;
+
if (fc->writeback_cache) {
inode_lock(inode_in);
err = fuse_writeback_range(inode_in, pos_in, pos_in + len);
@@ -3152,6 +3155,10 @@ static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in,
inode_lock(inode_out);
+ err = file_modified(file_out);
+ if (err)
+ goto out;
+
if (fc->writeback_cache) {
err = fuse_writeback_range(inode_out, pos_out, pos_out + len);
if (err)
@@ -3190,10 +3197,26 @@ out:
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
inode_unlock(inode_out);
+ file_accessed(file_in);
return err;
}
+static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off,
+ struct file *dst_file, loff_t dst_off,
+ size_t len, unsigned int flags)
+{
+ ssize_t ret;
+
+ ret = __fuse_copy_file_range(src_file, src_off, dst_file, dst_off,
+ len, flags);
+
+ if (ret == -EOPNOTSUPP || ret == -EXDEV)
+ ret = generic_copy_file_range(src_file, src_off, dst_file,
+ dst_off, len, flags);
+ return ret;
+}
+
static const struct file_operations fuse_file_operations = {
.llseek = fuse_file_llseek,
.read_iter = fuse_file_read_iter,
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index abeac61cfed3..f42048cc5454 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -82,15 +82,11 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
}
/**
- * gfs2_writepage_common - Common bits of writepage
- * @page: The page to be written
+ * gfs2_writepage - Write page for writeback mappings
+ * @page: The page
* @wbc: The writeback control
- *
- * Returns: 1 if writepage is ok, otherwise an error code or zero if no error.
*/
-
-static int gfs2_writepage_common(struct page *page,
- struct writeback_control *wbc)
+static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
@@ -109,7 +105,9 @@ static int gfs2_writepage_common(struct page *page,
page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
goto out;
}
- return 1;
+
+ return nobh_writepage(page, gfs2_get_block_noalloc, wbc);
+
redirty:
redirty_page_for_writepage(wbc, page);
out:
@@ -117,24 +115,6 @@ out:
return 0;
}
-/**
- * gfs2_writepage - Write page for writeback mappings
- * @page: The page
- * @wbc: The writeback control
- *
- */
-
-static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
-{
- int ret;
-
- ret = gfs2_writepage_common(page, wbc);
- if (ret <= 0)
- return ret;
-
- return nobh_writepage(page, gfs2_get_block_noalloc, wbc);
-}
-
/* This is the same as calling block_write_full_page, but it also
* writes pages outside of i_size
*/
@@ -454,8 +434,7 @@ static int gfs2_jdata_writepages(struct address_space *mapping,
*
* Returns: errno
*/
-
-int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
+static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
{
struct buffer_head *dibh;
u64 dsize = i_size_read(&ip->i_inode);
@@ -518,7 +497,7 @@ static int __gfs2_readpage(void *file, struct page *page)
error = mpage_readpage(page, gfs2_block_map);
}
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)))
return -EIO;
return error;
@@ -635,7 +614,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
gfs2_glock_dq(&gh);
out_uninit:
gfs2_holder_uninit(&gh);
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)))
ret = -EIO;
return ret;
}
@@ -686,47 +665,6 @@ out:
}
/**
- * gfs2_stuffed_write_end - Write end for stuffed files
- * @inode: The inode
- * @dibh: The buffer_head containing the on-disk inode
- * @pos: The file position
- * @copied: How much was actually copied by the VFS
- * @page: The page
- *
- * This copies the data from the page into the inode block after
- * the inode data structure itself.
- *
- * Returns: copied bytes or errno
- */
-int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
- loff_t pos, unsigned copied,
- struct page *page)
-{
- struct gfs2_inode *ip = GFS2_I(inode);
- u64 to = pos + copied;
- void *kaddr;
- unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
-
- BUG_ON(pos + copied > gfs2_max_stuffed_size(ip));
-
- kaddr = kmap_atomic(page);
- memcpy(buf + pos, kaddr + pos, copied);
- flush_dcache_page(page);
- kunmap_atomic(kaddr);
-
- WARN_ON(!PageUptodate(page));
- unlock_page(page);
- put_page(page);
-
- if (copied) {
- if (inode->i_size < to)
- i_size_write(inode, to);
- mark_inode_dirty(inode);
- }
- return copied;
-}
-
-/**
* jdata_set_page_dirty - Page dirtying function
* @page: The page to dirty
*
@@ -759,7 +697,7 @@ static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
return 0;
if (!gfs2_is_stuffed(ip))
- dblock = generic_block_bmap(mapping, lblock, gfs2_block_map);
+ dblock = iomap_bmap(mapping, lblock, &gfs2_iomap_ops);
gfs2_glock_dq_uninit(&i_gh);
@@ -888,7 +826,7 @@ cannot_release:
return 0;
}
-static const struct address_space_operations gfs2_writeback_aops = {
+static const struct address_space_operations gfs2_aops = {
.writepage = gfs2_writepage,
.writepages = gfs2_writepages,
.readpage = gfs2_readpage,
@@ -902,21 +840,6 @@ static const struct address_space_operations gfs2_writeback_aops = {
.error_remove_page = generic_error_remove_page,
};
-static const struct address_space_operations gfs2_ordered_aops = {
- .writepage = gfs2_writepage,
- .writepages = gfs2_writepages,
- .readpage = gfs2_readpage,
- .readpages = gfs2_readpages,
- .set_page_dirty = __set_page_dirty_buffers,
- .bmap = gfs2_bmap,
- .invalidatepage = gfs2_invalidatepage,
- .releasepage = gfs2_releasepage,
- .direct_IO = noop_direct_IO,
- .migratepage = buffer_migrate_page,
- .is_partially_uptodate = block_is_partially_uptodate,
- .error_remove_page = generic_error_remove_page,
-};
-
static const struct address_space_operations gfs2_jdata_aops = {
.writepage = gfs2_jdata_writepage,
.writepages = gfs2_jdata_writepages,
@@ -932,15 +855,8 @@ static const struct address_space_operations gfs2_jdata_aops = {
void gfs2_set_aops(struct inode *inode)
{
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_sbd *sdp = GFS2_SB(inode);
-
- if (gfs2_is_jdata(ip))
+ if (gfs2_is_jdata(GFS2_I(inode)))
inode->i_mapping->a_ops = &gfs2_jdata_aops;
- else if (gfs2_is_writeback(sdp))
- inode->i_mapping->a_ops = &gfs2_writeback_aops;
- else if (gfs2_is_ordered(sdp))
- inode->i_mapping->a_ops = &gfs2_ordered_aops;
else
- BUG();
+ inode->i_mapping->a_ops = &gfs2_aops;
}
diff --git a/fs/gfs2/aops.h b/fs/gfs2/aops.h
index fa8e5d0144dd..ff9877a68780 100644
--- a/fs/gfs2/aops.h
+++ b/fs/gfs2/aops.h
@@ -8,10 +8,6 @@
#include "incore.h"
-extern int stuffed_readpage(struct gfs2_inode *ip, struct page *page);
-extern int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
- loff_t pos, unsigned copied,
- struct page *page);
extern void adjust_fs_space(struct inode *inode);
extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
unsigned int from, unsigned int len);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 93ea1d529aa3..79581b9bdebb 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -595,7 +595,6 @@ enum alloc_state {
* gfs2_iomap_alloc - Build a metadata tree of the requested height
* @inode: The GFS2 inode
* @iomap: The iomap structure
- * @flags: iomap flags
* @mp: The metapath, with proper height information calculated
*
* In this routine we may have to alloc:
@@ -622,7 +621,7 @@ enum alloc_state {
*/
static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
- unsigned flags, struct metapath *mp)
+ struct metapath *mp)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -1088,7 +1087,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
}
if (iomap->type == IOMAP_HOLE) {
- ret = gfs2_iomap_alloc(inode, iomap, flags, mp);
+ ret = gfs2_iomap_alloc(inode, iomap, mp);
if (ret) {
gfs2_trans_end(sdp);
gfs2_inplace_release(ip);
@@ -1182,6 +1181,8 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
if (ip->i_qadata && ip->i_qadata->qa_qd_num)
gfs2_quota_unlock(ip);
+ if (iomap->flags & IOMAP_F_SIZE_CHANGED)
+ mark_inode_dirty(inode);
gfs2_write_unlock(inode);
out:
@@ -1232,7 +1233,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
if (create) {
ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, &iomap, &mp);
if (!ret && iomap.type == IOMAP_HOLE)
- ret = gfs2_iomap_alloc(inode, &iomap, IOMAP_WRITE, &mp);
+ ret = gfs2_iomap_alloc(inode, &iomap, &mp);
release_metapath(&mp);
} else {
ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp);
@@ -1462,7 +1463,7 @@ int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length,
ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp);
if (!ret && iomap->type == IOMAP_HOLE)
- ret = gfs2_iomap_alloc(inode, iomap, IOMAP_WRITE, &mp);
+ ret = gfs2_iomap_alloc(inode, iomap, &mp);
release_metapath(&mp);
return ret;
}
@@ -1862,9 +1863,8 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
gfs2_assert_withdraw(sdp, bh);
if (gfs2_assert_withdraw(sdp,
prev_bnr != bh->b_blocknr)) {
- printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, "
- "block:%llu, i_h:%u, s_h:%u, mp_h:%u\n",
- sdp->sd_fsname,
+ fs_emerg(sdp, "inode %llu, block:%llu, i_h:%u,"
+ "s_h:%u, mp_h:%u\n",
(unsigned long long)ip->i_no_addr,
prev_bnr, ip->i_height, strip_h, mp_h);
}
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 88e4f955c518..6f35d19eec25 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -750,7 +750,7 @@ static struct gfs2_dirent *gfs2_dirent_split_alloc(struct inode *inode,
struct gfs2_dirent *dent;
dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
gfs2_dirent_find_offset, name, ptr);
- if (!dent || IS_ERR(dent))
+ if (IS_ERR_OR_NULL(dent))
return dent;
return do_init_dirent(inode, dent, name, bh,
(unsigned)(ptr - (void *)dent));
@@ -854,7 +854,7 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode,
return ERR_PTR(error);
dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, scan, name, NULL);
got_dent:
- if (unlikely(dent == NULL || IS_ERR(dent))) {
+ if (IS_ERR_OR_NULL(dent)) {
brelse(bh);
bh = NULL;
}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index d174b1f8fd08..52fa1ef8400b 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -136,27 +136,36 @@ static struct {
{FS_JOURNAL_DATA_FL, GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA},
};
+static inline u32 gfs2_gfsflags_to_fsflags(struct inode *inode, u32 gfsflags)
+{
+ int i;
+ u32 fsflags = 0;
+
+ if (S_ISDIR(inode->i_mode))
+ gfsflags &= ~GFS2_DIF_JDATA;
+ else
+ gfsflags &= ~GFS2_DIF_INHERIT_JDATA;
+
+ for (i = 0; i < ARRAY_SIZE(fsflag_gfs2flag); i++)
+ if (gfsflags & fsflag_gfs2flag[i].gfsflag)
+ fsflags |= fsflag_gfs2flag[i].fsflag;
+ return fsflags;
+}
+
static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
{
struct inode *inode = file_inode(filp);
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder gh;
- int i, error;
- u32 gfsflags, fsflags = 0;
+ int error;
+ u32 fsflags;
gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
error = gfs2_glock_nq(&gh);
if (error)
goto out_uninit;
- gfsflags = ip->i_diskflags;
- if (S_ISDIR(inode->i_mode))
- gfsflags &= ~GFS2_DIF_JDATA;
- else
- gfsflags &= ~GFS2_DIF_INHERIT_JDATA;
- for (i = 0; i < ARRAY_SIZE(fsflag_gfs2flag); i++)
- if (gfsflags & fsflag_gfs2flag[i].gfsflag)
- fsflags |= fsflag_gfs2flag[i].fsflag;
+ fsflags = gfs2_gfsflags_to_fsflags(inode, ip->i_diskflags);
if (put_user(fsflags, ptr))
error = -EFAULT;
@@ -200,9 +209,11 @@ void gfs2_set_inode_flags(struct inode *inode)
* @filp: file pointer
* @reqflags: The flags to set
* @mask: Indicates which flags are valid
+ * @fsflags: The FS_* inode flags passed in
*
*/
-static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
+static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask,
+ const u32 fsflags)
{
struct inode *inode = file_inode(filp);
struct gfs2_inode *ip = GFS2_I(inode);
@@ -210,7 +221,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
struct buffer_head *bh;
struct gfs2_holder gh;
int error;
- u32 new_flags, flags;
+ u32 new_flags, flags, oldflags;
error = mnt_want_write_file(filp);
if (error)
@@ -220,6 +231,11 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
if (error)
goto out_drop_write;
+ oldflags = gfs2_gfsflags_to_fsflags(inode, ip->i_diskflags);
+ error = vfs_ioc_setflags_prepare(inode, oldflags, fsflags);
+ if (error)
+ goto out;
+
error = -EACCES;
if (!inode_owner_or_capable(inode))
goto out;
@@ -308,7 +324,7 @@ static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
mask &= ~(GFS2_DIF_TOPDIR | GFS2_DIF_INHERIT_JDATA);
}
- return do_gfs2_set_flags(filp, gfsflags, mask);
+ return do_gfs2_set_flags(filp, gfsflags, mask, fsflags);
}
static int gfs2_getlabel(struct file *filp, char __user *label)
@@ -363,31 +379,30 @@ static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size)
}
/**
- * gfs2_allocate_page_backing - Use bmap to allocate blocks
+ * gfs2_allocate_page_backing - Allocate blocks for a write fault
* @page: The (locked) page to allocate backing for
*
- * We try to allocate all the blocks required for the page in
- * one go. This might fail for various reasons, so we keep
- * trying until all the blocks to back this page are allocated.
- * If some of the blocks are already allocated, thats ok too.
+ * We try to allocate all the blocks required for the page in one go. This
+ * might fail for various reasons, so we keep trying until all the blocks to
+ * back this page are allocated. If some of the blocks are already allocated,
+ * that is ok too.
*/
-
static int gfs2_allocate_page_backing(struct page *page)
{
- struct inode *inode = page->mapping->host;
- struct buffer_head bh;
- unsigned long size = PAGE_SIZE;
- u64 lblock = page->index << (PAGE_SHIFT - inode->i_blkbits);
+ u64 pos = page_offset(page);
+ u64 size = PAGE_SIZE;
do {
- bh.b_state = 0;
- bh.b_size = size;
- gfs2_block_map(inode, lblock, &bh, 1);
- if (!buffer_mapped(&bh))
+ struct iomap iomap = { };
+
+ if (gfs2_iomap_get_alloc(page->mapping->host, pos, 1, &iomap))
return -EIO;
- size -= bh.b_size;
- lblock += (bh.b_size >> inode->i_blkbits);
- } while(size > 0);
+
+ iomap.length = min(iomap.length, size);
+ size -= iomap.length;
+ pos += iomap.length;
+ } while (size > 0);
+
return 0;
}
@@ -408,7 +423,7 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_alloc_parms ap = { .aflags = 0, };
unsigned long last_index;
- u64 pos = page->index << PAGE_SHIFT;
+ u64 pos = page_offset(page);
unsigned int data_blocks, ind_blocks, rblocks;
struct gfs2_holder gh;
loff_t size;
@@ -1166,7 +1181,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
cmd = F_SETLK;
fl->fl_type = F_UNLCK;
}
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags))) {
if (fl->fl_type == F_UNLCK)
locks_lock_file_wait(file, fl);
return -EIO;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f1ebcb42cbf5..e23fb8b7b020 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -544,7 +544,7 @@ __acquires(&gl->gl_lockref.lock)
unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0);
int ret;
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) &&
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) &&
target != LM_ST_UNLOCKED)
return;
lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
@@ -581,7 +581,7 @@ __acquires(&gl->gl_lockref.lock)
}
else if (ret) {
fs_err(sdp, "lm_lock ret %d\n", ret);
- GLOCK_BUG_ON(gl, !test_bit(SDF_SHUTDOWN,
+ GLOCK_BUG_ON(gl, !test_bit(SDF_WITHDRAWN,
&sdp->sd_flags));
}
} else { /* lock_nolock */
@@ -681,7 +681,7 @@ static void delete_work_func(struct work_struct *work)
goto out;
inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
- if (inode && !IS_ERR(inode)) {
+ if (!IS_ERR_OR_NULL(inode)) {
d_prune_aliases(inode);
iput(inode);
}
@@ -1075,7 +1075,7 @@ trap_recursive:
fs_err(sdp, "pid: %d\n", pid_nr(gh->gh_owner_pid));
fs_err(sdp, "lock type: %d req lock state : %d\n",
gh->gh_gl->gl_name.ln_type, gh->gh_state);
- gfs2_dump_glock(NULL, gl);
+ gfs2_dump_glock(NULL, gl, true);
BUG();
}
@@ -1094,7 +1094,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh)
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
int error = 0;
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)))
return -EIO;
if (test_bit(GLF_LRU, &gl->gl_flags))
@@ -1610,16 +1610,16 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp)
glock_hash_walk(thaw_glock, sdp);
}
-static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
+static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
{
spin_lock(&gl->gl_lockref.lock);
- gfs2_dump_glock(seq, gl);
+ gfs2_dump_glock(seq, gl, fsid);
spin_unlock(&gl->gl_lockref.lock);
}
static void dump_glock_func(struct gfs2_glock *gl)
{
- dump_glock(NULL, gl);
+ dump_glock(NULL, gl, true);
}
/**
@@ -1704,10 +1704,12 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
* dump_holder - print information about a glock holder
* @seq: the seq_file struct
* @gh: the glock holder
+ * @fs_id_buf: pointer to file system id (if requested)
*
*/
-static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
+static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh,
+ const char *fs_id_buf)
{
struct task_struct *gh_owner = NULL;
char flags_buf[32];
@@ -1715,8 +1717,8 @@ static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
rcu_read_lock();
if (gh->gh_owner_pid)
gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
- gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
- state2str(gh->gh_state),
+ gfs2_print_dbg(seq, "%s H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
+ fs_id_buf, state2str(gh->gh_state),
hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
gh->gh_error,
gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
@@ -1766,6 +1768,7 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
* gfs2_dump_glock - print information about a glock
* @seq: The seq_file struct
* @gl: the glock
+ * @fsid: If true, also dump the file system id
*
* The file format is as follows:
* One line per object, capital letters are used to indicate objects
@@ -1779,19 +1782,24 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
*
*/
-void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
+void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
{
const struct gfs2_glock_operations *glops = gl->gl_ops;
unsigned long long dtime;
const struct gfs2_holder *gh;
char gflags_buf[32];
+ char fs_id_buf[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2];
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+ memset(fs_id_buf, 0, sizeof(fs_id_buf));
+ if (fsid && sdp) /* safety precaution */
+ sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
dtime = jiffies - gl->gl_demote_time;
dtime *= 1000000/HZ; /* demote time in uSec */
if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
dtime = 0;
- gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d m:%ld\n",
- state2str(gl->gl_state),
+ gfs2_print_dbg(seq, "%sG: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d "
+ "v:%d r:%d m:%ld\n", fs_id_buf, state2str(gl->gl_state),
gl->gl_name.ln_type,
(unsigned long long)gl->gl_name.ln_number,
gflags2str(gflags_buf, gl),
@@ -1802,10 +1810,10 @@ void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
(int)gl->gl_lockref.count, gl->gl_hold_time);
list_for_each_entry(gh, &gl->gl_holders, gh_list)
- dump_holder(seq, gh);
+ dump_holder(seq, gh, fs_id_buf);
if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump)
- glops->go_dump(seq, gl);
+ glops->go_dump(seq, gl, fs_id_buf);
}
static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr)
@@ -2006,7 +2014,7 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
{
- dump_glock(seq, iter_ptr);
+ dump_glock(seq, iter_ptr, false);
return 0;
}
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 149d7f6af085..e4e0bed5257c 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -199,8 +199,11 @@ extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
struct gfs2_holder *gh);
extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
-extern void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl);
-#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { gfs2_dump_glock(NULL, gl); BUG(); } } while(0)
+extern void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl,
+ bool fsid);
+#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { \
+ gfs2_dump_glock(NULL, gl, true); \
+ BUG(); } } while(0)
extern __printf(2, 3)
void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
@@ -266,7 +269,7 @@ static inline void glock_set_object(struct gfs2_glock *gl, void *object)
{
spin_lock(&gl->gl_lockref.lock);
if (gfs2_assert_warn(gl->gl_name.ln_sbd, gl->gl_object == NULL))
- gfs2_dump_glock(NULL, gl);
+ gfs2_dump_glock(NULL, gl, true);
gl->gl_object = object;
spin_unlock(&gl->gl_lockref.lock);
}
@@ -278,7 +281,7 @@ static inline void glock_set_object(struct gfs2_glock *gl, void *object)
*
* I'd love to similarly add this:
* else if (gfs2_assert_warn(gl->gl_sbd, gl->gl_object == object))
- * gfs2_dump_glock(NULL, gl);
+ * gfs2_dump_glock(NULL, gl, true);
* Unfortunately, that's not possible because as soon as gfs2_delete_inode
* frees the block in the rgrp, another process can reassign it for an I_NEW
* inode in gfs2_create_inode because that calls new_inode, not gfs2_iget.
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index cf4c767005b1..ff213690e364 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -461,10 +461,12 @@ static int inode_go_lock(struct gfs2_holder *gh)
* inode_go_dump - print information about an inode
* @seq: The iterator
* @ip: the inode
+ * @fs_id_buf: file system id (may be empty)
*
*/
-static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl)
+static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl,
+ const char *fs_id_buf)
{
struct gfs2_inode *ip = gl->gl_object;
struct inode *inode = &ip->i_inode;
@@ -477,7 +479,8 @@ static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl)
nrpages = inode->i_data.nrpages;
xa_unlock_irq(&inode->i_data.i_pages);
- gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu p:%lu\n",
+ gfs2_print_dbg(seq, "%s I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu "
+ "p:%lu\n", fs_id_buf,
(unsigned long long)ip->i_no_formal_ino,
(unsigned long long)ip->i_no_addr,
IF2DT(ip->i_inode.i_mode), ip->i_flags,
@@ -503,7 +506,8 @@ static void freeze_go_sync(struct gfs2_glock *gl)
atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE);
error = freeze_super(sdp->sd_vfs);
if (error) {
- printk(KERN_INFO "GFS2: couldn't freeze filesystem: %d\n", error);
+ fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n",
+ error);
gfs2_assert_withdraw(sdp, 0);
}
queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work);
@@ -536,7 +540,7 @@ static int freeze_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh)
gfs2_consist(sdp);
/* Initialize some head of the log stuff */
- if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
+ if (!test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) {
sdp->sd_log_sequence = head.lh_sequence + 1;
gfs2_log_pointers_init(sdp, head.lh_blkno);
}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index c9af93ac6c73..7a993d7c022e 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -240,7 +240,8 @@ struct gfs2_glock_operations {
int (*go_demote_ok) (const struct gfs2_glock *gl);
int (*go_lock) (struct gfs2_holder *gh);
void (*go_unlock) (struct gfs2_holder *gh);
- void (*go_dump)(struct seq_file *seq, struct gfs2_glock *gl);
+ void (*go_dump)(struct seq_file *seq, struct gfs2_glock *gl,
+ const char *fs_id_buf);
void (*go_callback)(struct gfs2_glock *gl, bool remote);
const int go_type;
const unsigned long go_flags;
@@ -504,7 +505,6 @@ struct gfs2_trans {
unsigned int tr_num_buf_rm;
unsigned int tr_num_databuf_rm;
unsigned int tr_num_revoke;
- unsigned int tr_num_revoke_rm;
struct list_head tr_list;
struct list_head tr_databuf;
@@ -609,7 +609,7 @@ struct gfs2_tune {
enum {
SDF_JOURNAL_CHECKED = 0,
SDF_JOURNAL_LIVE = 1,
- SDF_SHUTDOWN = 2,
+ SDF_WITHDRAWN = 2,
SDF_NOBARRIERS = 3,
SDF_NORECOVERY = 4,
SDF_DEMOTE = 5,
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index b296c59832a7..2e2a8a2fb51d 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -793,7 +793,7 @@ fail_free_acls:
fail_gunlock:
gfs2_dir_no_add(&da);
gfs2_glock_dq_uninit(ghs);
- if (inode && !IS_ERR(inode)) {
+ if (!IS_ERR_OR_NULL(inode)) {
clear_nlink(inode);
if (!free_vfs_inode)
mark_inode_dirty(inode);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index c4c9700c366e..58e237fba565 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -882,7 +882,6 @@ static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new)
old->tr_num_buf_rm += new->tr_num_buf_rm;
old->tr_num_databuf_rm += new->tr_num_databuf_rm;
old->tr_num_revoke += new->tr_num_revoke;
- old->tr_num_revoke_rm += new->tr_num_revoke_rm;
list_splice_tail_init(&new->tr_databuf, &old->tr_databuf);
list_splice_tail_init(&new->tr_buf, &old->tr_buf);
@@ -904,7 +903,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
set_bit(TR_ATTACHED, &tr->tr_flags);
}
- sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
+ sdp->sd_log_commited_revoke += tr->tr_num_revoke;
reserved = calc_reserved(sdp);
maxres = sdp->sd_log_blks_reserved + tr->tr_reserved;
gfs2_assert_withdraw(sdp, maxres >= reserved);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 1921cda034fd..5b17979af539 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -759,9 +759,27 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, u32 start,
if (gfs2_meta_check(sdp, bh_ip))
error = -EIO;
- else
+ else {
+ struct gfs2_meta_header *mh =
+ (struct gfs2_meta_header *)bh_ip->b_data;
+
+ if (mh->mh_type == cpu_to_be32(GFS2_METATYPE_RG)) {
+ struct gfs2_rgrpd *rgd;
+
+ rgd = gfs2_blk2rgrpd(sdp, blkno, false);
+ if (rgd && rgd->rd_addr == blkno &&
+ rgd->rd_bits && rgd->rd_bits->bi_bh) {
+ fs_info(sdp, "Replaying 0x%llx but we "
+ "already have a bh!\n",
+ (unsigned long long)blkno);
+ fs_info(sdp, "busy:%d, pinned:%d\n",
+ buffer_busy(rgd->rd_bits->bi_bh) ? 1 : 0,
+ buffer_pinned(rgd->rd_bits->bi_bh));
+ gfs2_dump_glock(NULL, rgd->rd_gl, true);
+ }
+ }
mark_buffer_dirty(bh_ip);
-
+ }
brelse(bh_log);
brelse(bh_ip);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 456763e18def..662ef36c1874 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -251,7 +251,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
struct buffer_head *bh, *bhs[2];
int num = 0;
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags))) {
*bhp = NULL;
return -EIO;
}
@@ -309,7 +309,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
{
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)))
return -EIO;
wait_on_buffer(bh);
@@ -320,7 +320,7 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
gfs2_io_error_bh_wd(sdp, bh);
return -EIO;
}
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)))
return -EIO;
return 0;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 08823bb3b2d0..4a8e5a7310f0 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -61,6 +61,13 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
gt->gt_complain_secs = 10;
}
+void free_sbd(struct gfs2_sbd *sdp)
+{
+ if (sdp->sd_lkstats)
+ free_percpu(sdp->sd_lkstats);
+ kfree(sdp);
+}
+
static struct gfs2_sbd *init_sbd(struct super_block *sb)
{
struct gfs2_sbd *sdp;
@@ -72,10 +79,8 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
sdp->sd_vfs = sb;
sdp->sd_lkstats = alloc_percpu(struct gfs2_pcpu_lkstats);
- if (!sdp->sd_lkstats) {
- kfree(sdp);
- return NULL;
- }
+ if (!sdp->sd_lkstats)
+ goto fail;
sb->s_fs_info = sdp;
set_bit(SDF_NOJOURNALID, &sdp->sd_flags);
@@ -134,8 +139,11 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
mutex_init(&sdp->sd_freeze_mutex);
return sdp;
-}
+fail:
+ free_sbd(sdp);
+ return NULL;
+}
/**
* gfs2_check_sb - Check superblock
@@ -568,7 +576,7 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
INIT_WORK(&jd->jd_work, gfs2_recover_func);
jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1);
- if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
+ if (IS_ERR_OR_NULL(jd->jd_inode)) {
if (!jd->jd_inode)
error = -ENOENT;
else
@@ -996,7 +1004,7 @@ hostdata_error:
void gfs2_lm_unmount(struct gfs2_sbd *sdp)
{
const struct lm_lockops *lm = sdp->sd_lockstruct.ls_ops;
- if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) &&
+ if (likely(!test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) &&
lm->lm_unmount)
lm->lm_unmount(sdp);
}
@@ -1086,8 +1094,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
if (error) {
/* In this case, we haven't initialized sysfs, so we have to
manually free the sdp. */
- free_percpu(sdp->sd_lkstats);
- kfree(sdp);
+ free_sbd(sdp);
sb->s_fs_info = NULL;
return error;
}
@@ -1190,7 +1197,6 @@ fail_lm:
gfs2_lm_unmount(sdp);
fail_debug:
gfs2_delete_debugfs_file(sdp);
- free_percpu(sdp->sd_lkstats);
/* gfs2_sys_fs_del must be the last thing we do, since it causes
* sysfs to call function gfs2_sbd_release, which frees sdp. */
gfs2_sys_fs_del(sdp);
@@ -1370,7 +1376,6 @@ static void gfs2_kill_sb(struct super_block *sb)
sdp->sd_root_dir = NULL;
sdp->sd_master_dir = NULL;
shrink_dcache_sb(sb);
- free_percpu(sdp->sd_lkstats);
kill_block_super(sb);
}
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 8189b581236d..69c4b77f127b 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1475,7 +1475,7 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error)
{
if (error == 0 || error == -EROFS)
return;
- if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
+ if (!test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) {
fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error);
sdp->sd_log_error = error;
wake_up(&sdp->sd_logd_waitq);
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 2299a3fa1911..c529f8749a89 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -388,7 +388,8 @@ void gfs2_recover_func(struct work_struct *work)
}
t_tlck = ktime_get();
- fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid);
+ fs_info(sdp, "jid=%u: Replaying journal...0x%x to 0x%x\n",
+ jd->jd_jid, head.lh_tail, head.lh_blkno);
for (pass = 0; pass < 2; pass++) {
lops_before_scan(jd, &head, pass);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 36f20a89d0c2..49ac0a5e74ea 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -610,11 +610,12 @@ int gfs2_rsqa_alloc(struct gfs2_inode *ip)
return gfs2_qa_alloc(ip);
}
-static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
+static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs,
+ const char *fs_id_buf)
{
struct gfs2_inode *ip = container_of(rs, struct gfs2_inode, i_res);
- gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n",
+ gfs2_print_dbg(seq, "%s B: n:%llu s:%llu b:%u f:%u\n", fs_id_buf,
(unsigned long long)ip->i_no_addr,
(unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
rs->rs_rbm.offset, rs->rs_free);
@@ -1111,32 +1112,33 @@ static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
{
struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
+ struct gfs2_sbd *sdp = rgd->rd_sbd;
int valid = 1;
if (rgl->rl_flags != str->rg_flags) {
- printk(KERN_WARNING "GFS2: rgd: %llu lvb flag mismatch %u/%u",
- (unsigned long long)rgd->rd_addr,
+ fs_warn(sdp, "GFS2: rgd: %llu lvb flag mismatch %u/%u",
+ (unsigned long long)rgd->rd_addr,
be32_to_cpu(rgl->rl_flags), be32_to_cpu(str->rg_flags));
valid = 0;
}
if (rgl->rl_free != str->rg_free) {
- printk(KERN_WARNING "GFS2: rgd: %llu lvb free mismatch %u/%u",
- (unsigned long long)rgd->rd_addr,
- be32_to_cpu(rgl->rl_free), be32_to_cpu(str->rg_free));
+ fs_warn(sdp, "GFS2: rgd: %llu lvb free mismatch %u/%u",
+ (unsigned long long)rgd->rd_addr,
+ be32_to_cpu(rgl->rl_free), be32_to_cpu(str->rg_free));
valid = 0;
}
if (rgl->rl_dinodes != str->rg_dinodes) {
- printk(KERN_WARNING "GFS2: rgd: %llu lvb dinode mismatch %u/%u",
- (unsigned long long)rgd->rd_addr,
- be32_to_cpu(rgl->rl_dinodes),
- be32_to_cpu(str->rg_dinodes));
+ fs_warn(sdp, "GFS2: rgd: %llu lvb dinode mismatch %u/%u",
+ (unsigned long long)rgd->rd_addr,
+ be32_to_cpu(rgl->rl_dinodes),
+ be32_to_cpu(str->rg_dinodes));
valid = 0;
}
if (rgl->rl_igeneration != str->rg_igeneration) {
- printk(KERN_WARNING "GFS2: rgd: %llu lvb igen mismatch "
- "%llu/%llu", (unsigned long long)rgd->rd_addr,
- (unsigned long long)be64_to_cpu(rgl->rl_igeneration),
- (unsigned long long)be64_to_cpu(str->rg_igeneration));
+ fs_warn(sdp, "GFS2: rgd: %llu lvb igen mismatch %llu/%llu",
+ (unsigned long long)rgd->rd_addr,
+ (unsigned long long)be64_to_cpu(rgl->rl_igeneration),
+ (unsigned long long)be64_to_cpu(str->rg_igeneration));
valid = 0;
}
return valid;
@@ -2246,10 +2248,12 @@ static void rgblk_free(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd,
* gfs2_rgrp_dump - print out an rgrp
* @seq: The iterator
* @gl: The glock in question
+ * @fs_id_buf: pointer to file system id (if requested)
*
*/
-void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl)
+void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl,
+ const char *fs_id_buf)
{
struct gfs2_rgrpd *rgd = gl->gl_object;
struct gfs2_blkreserv *trs;
@@ -2257,14 +2261,15 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl)
if (rgd == NULL)
return;
- gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
+ gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
+ fs_id_buf,
(unsigned long long)rgd->rd_addr, rgd->rd_flags,
rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
rgd->rd_reserved, rgd->rd_extfail_pt);
if (rgd->rd_sbd->sd_args.ar_rgrplvb) {
struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
- gfs2_print_dbg(seq, " L: f:%02x b:%u i:%u\n",
+ gfs2_print_dbg(seq, "%s L: f:%02x b:%u i:%u\n", fs_id_buf,
be32_to_cpu(rgl->rl_flags),
be32_to_cpu(rgl->rl_free),
be32_to_cpu(rgl->rl_dinodes));
@@ -2272,7 +2277,7 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl)
spin_lock(&rgd->rd_rsspin);
for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
- dump_rs(seq, trs);
+ dump_rs(seq, trs, fs_id_buf);
}
spin_unlock(&rgd->rd_rsspin);
}
@@ -2280,10 +2285,13 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl)
static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
{
struct gfs2_sbd *sdp = rgd->rd_sbd;
+ char fs_id_buf[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2];
+
fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
(unsigned long long)rgd->rd_addr);
fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
- gfs2_rgrp_dump(NULL, rgd->rd_gl);
+ sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
+ gfs2_rgrp_dump(NULL, rgd->rd_gl, fs_id_buf);
rgd->rd_flags |= GFS2_RDF_ERROR;
}
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 6a3adf0ee0b7..c14a673ae36f 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -69,7 +69,8 @@ extern void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist);
extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
extern u64 gfs2_ri_total(struct gfs2_sbd *sdp);
-extern void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl);
+extern void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl,
+ const char *fs_id_buf);
extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
struct buffer_head *bh,
const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index b70cea5c8c59..0acc5834f653 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -394,6 +394,7 @@ static int init_threads(struct gfs2_sbd *sdp)
fail:
kthread_stop(sdp->sd_logd_process);
+ sdp->sd_logd_process = NULL;
return error;
}
@@ -451,8 +452,12 @@ fail:
freeze_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq_uninit(&freeze_gh);
fail_threads:
- kthread_stop(sdp->sd_quotad_process);
- kthread_stop(sdp->sd_logd_process);
+ if (sdp->sd_quotad_process)
+ kthread_stop(sdp->sd_quotad_process);
+ sdp->sd_quotad_process = NULL;
+ if (sdp->sd_logd_process)
+ kthread_stop(sdp->sd_logd_process);
+ sdp->sd_logd_process = NULL;
return error;
}
@@ -800,7 +805,7 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
if (!(flags & I_DIRTY_INODE))
return;
- if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+ if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)))
return;
if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
@@ -849,12 +854,16 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, GL_NOCACHE,
&freeze_gh);
- if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+ if (error && !test_bit(SDF_WITHDRAWN, &sdp->sd_flags))
return error;
flush_workqueue(gfs2_delete_workqueue);
- kthread_stop(sdp->sd_quotad_process);
- kthread_stop(sdp->sd_logd_process);
+ if (sdp->sd_quotad_process)
+ kthread_stop(sdp->sd_quotad_process);
+ sdp->sd_quotad_process = NULL;
+ if (sdp->sd_logd_process)
+ kthread_stop(sdp->sd_logd_process);
+ sdp->sd_logd_process = NULL;
gfs2_quota_sync(sdp->sd_vfs, 0);
gfs2_statfs_sync(sdp->sd_vfs, 0);
@@ -969,14 +978,14 @@ void gfs2_freeze_func(struct work_struct *work)
error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0,
&freeze_gh);
if (error) {
- printk(KERN_INFO "GFS2: couldn't get freeze lock : %d\n", error);
+ fs_info(sdp, "GFS2: couldn't get freeze lock : %d\n", error);
gfs2_assert_withdraw(sdp, 0);
} else {
atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
error = thaw_super(sb);
if (error) {
- printk(KERN_INFO "GFS2: couldn't thaw filesystem: %d\n",
- error);
+ fs_info(sdp, "GFS2: couldn't thaw filesystem: %d\n",
+ error);
gfs2_assert_withdraw(sdp, 0);
}
if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
@@ -1004,7 +1013,7 @@ static int gfs2_freeze(struct super_block *sb)
if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN)
goto out;
- if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
+ if (test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) {
error = -EINVAL;
goto out;
}
@@ -1014,20 +1023,14 @@ static int gfs2_freeze(struct super_block *sb)
if (!error)
break;
- switch (error) {
- case -EBUSY:
+ if (error == -EBUSY)
fs_err(sdp, "waiting for recovery before freeze\n");
- break;
-
- default:
+ else
fs_err(sdp, "error freezing FS: %d\n", error);
- break;
- }
fs_err(sdp, "retrying...\n");
msleep(1000);
}
- error = 0;
set_bit(SDF_FS_FROZEN, &sdp->sd_flags);
out:
mutex_unlock(&sdp->sd_freeze_mutex);
@@ -1273,8 +1276,6 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
error = gfs2_make_fs_ro(sdp);
else
error = gfs2_make_fs_rw(sdp);
- if (error)
- return error;
}
sdp->sd_args = args;
@@ -1300,7 +1301,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
spin_unlock(&gt->gt_spin);
gfs2_online_uevent(sdp);
- return 0;
+ return error;
}
/**
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index c5f42f0c503b..9d49eaadb9d9 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -44,6 +44,8 @@ extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
extern int gfs2_statfs_sync(struct super_block *sb, int type);
extern void gfs2_freeze_func(struct work_struct *work);
+extern void free_sbd(struct gfs2_sbd *sdp);
+
extern struct file_system_type gfs2_fs_type;
extern struct file_system_type gfs2meta_fs_type;
extern const struct export_operations gfs2_export_ops;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 159aedf63c2a..dd15b8e4af2c 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -118,7 +118,7 @@ static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf)
{
- unsigned int b = test_bit(SDF_SHUTDOWN, &sdp->sd_flags);
+ unsigned int b = test_bit(SDF_WITHDRAWN, &sdp->sd_flags);
return snprintf(buf, PAGE_SIZE, "%u\n", b);
}
@@ -296,17 +296,18 @@ static struct attribute *gfs2_attrs[] = {
&gfs2_attr_demote_rq.attr,
NULL,
};
+ATTRIBUTE_GROUPS(gfs2);
static void gfs2_sbd_release(struct kobject *kobj)
{
struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
- kfree(sdp);
+ free_sbd(sdp);
}
static struct kobj_type gfs2_ktype = {
.release = gfs2_sbd_release,
- .default_attrs = gfs2_attrs,
+ .default_groups = gfs2_groups,
.sysfs_ops = &gfs2_attr_ops,
};
@@ -679,7 +680,6 @@ fail_lock_module:
fail_tune:
sysfs_remove_group(&sdp->sd_kobj, &tune_group);
fail_reg:
- free_percpu(sdp->sd_lkstats);
fs_err(sdp, "error %d adding sysfs files\n", error);
kobject_put(&sdp->sd_kobj);
sb->s_fs_info = NULL;
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 6f67ef7aa412..35e3059255fe 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -77,10 +77,10 @@ static void gfs2_print_trans(struct gfs2_sbd *sdp, const struct gfs2_trans *tr)
fs_warn(sdp, "blocks=%u revokes=%u reserved=%u touched=%u\n",
tr->tr_blocks, tr->tr_revokes, tr->tr_reserved,
test_bit(TR_TOUCHED, &tr->tr_flags));
- fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u/%u\n",
+ fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u\n",
tr->tr_num_buf_new, tr->tr_num_buf_rm,
tr->tr_num_databuf_new, tr->tr_num_databuf_rm,
- tr->tr_num_revoke, tr->tr_num_revoke_rm);
+ tr->tr_num_revoke);
}
void gfs2_trans_end(struct gfs2_sbd *sdp)
@@ -263,7 +263,7 @@ void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke);
sdp->sd_log_num_revoke--;
kmem_cache_free(gfs2_bufdata_cachep, bd);
- tr->tr_num_revoke_rm++;
+ tr->tr_num_revoke--;
if (--n == 0)
break;
}
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index a7e55234211f..83f6c582773a 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -41,7 +41,7 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...)
struct va_format vaf;
if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
- test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+ test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags))
return 0;
if (fmt) {
@@ -178,9 +178,11 @@ int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
const char *function, char *file, unsigned int line)
{
struct gfs2_sbd *sdp = rgd->rd_sbd;
+ char fs_id_buf[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2];
int rv;
- gfs2_rgrp_dump(NULL, rgd->rd_gl);
+ sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
+ gfs2_rgrp_dump(NULL, rgd->rd_gl, fs_id_buf);
rv = gfs2_lm_withdraw(sdp,
"fatal: filesystem consistency error\n"
" RG = %llu\n"
@@ -256,7 +258,7 @@ void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
const char *function, char *file, unsigned int line,
bool withdraw)
{
- if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+ if (!test_bit(SDF_WITHDRAWN, &sdp->sd_flags))
fs_err(sdp,
"fatal: I/O error\n"
" block = %llu\n"
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index 5e6502ef7415..ce15b9496b77 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -57,9 +57,8 @@ static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags)
return 0;
}
-static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags)
+static inline unsigned int hfsplus_getflags(struct inode *inode)
{
- struct inode *inode = file_inode(file);
struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
unsigned int flags = 0;
@@ -69,6 +68,13 @@ static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags)
flags |= FS_APPEND_FL;
if (hip->userflags & HFSPLUS_FLG_NODUMP)
flags |= FS_NODUMP_FL;
+ return flags;
+}
+
+static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags)
+{
+ struct inode *inode = file_inode(file);
+ unsigned int flags = hfsplus_getflags(inode);
return put_user(flags, user_flags);
}
@@ -78,6 +84,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
struct inode *inode = file_inode(file);
struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
unsigned int flags, new_fl = 0;
+ unsigned int oldflags = hfsplus_getflags(inode);
int err = 0;
err = mnt_want_write_file(file);
@@ -96,13 +103,9 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
inode_lock(inode);
- if ((flags & (FS_IMMUTABLE_FL|FS_APPEND_FL)) ||
- inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
- if (!capable(CAP_LINUX_IMMUTABLE)) {
- err = -EPERM;
- goto out_unlock_inode;
- }
- }
+ err = vfs_ioc_setflags_prepare(inode, oldflags, flags);
+ if (err)
+ goto out_unlock_inode;
/* don't silently ignore unsupported ext2 flags */
if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) {
diff --git a/fs/inode.c b/fs/inode.c
index 2bf21e2c90fc..0f1e3b563c47 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1899,6 +1899,26 @@ int file_update_time(struct file *file)
}
EXPORT_SYMBOL(file_update_time);
+/* Caller must hold the file's inode lock */
+int file_modified(struct file *file)
+{
+ int err;
+
+ /*
+ * Clear the security bits if the process is not being run by root.
+ * This keeps people from modifying setuid and setgid binaries.
+ */
+ err = file_remove_privs(file);
+ if (err)
+ return err;
+
+ if (unlikely(file->f_mode & FMODE_NOCMTIME))
+ return 0;
+
+ return file_update_time(file);
+}
+EXPORT_SYMBOL(file_modified);
+
int inode_needs_sync(struct inode *inode)
{
if (IS_SYNC(inode))
@@ -2170,3 +2190,89 @@ struct timespec64 current_time(struct inode *inode)
return timespec64_trunc(now, inode->i_sb->s_time_gran);
}
EXPORT_SYMBOL(current_time);
+
+/*
+ * Generic function to check FS_IOC_SETFLAGS values and reject any invalid
+ * configurations.
+ *
+ * Note: the caller should be holding i_mutex, or else be sure that they have
+ * exclusive access to the inode structure.
+ */
+int vfs_ioc_setflags_prepare(struct inode *inode, unsigned int oldflags,
+ unsigned int flags)
+{
+ /*
+ * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+ * the relevant capability.
+ *
+ * This test looks nicer. Thanks to Pauline Middelink
+ */
+ if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL) &&
+ !capable(CAP_LINUX_IMMUTABLE))
+ return -EPERM;
+
+ return 0;
+}
+EXPORT_SYMBOL(vfs_ioc_setflags_prepare);
+
+/*
+ * Generic function to check FS_IOC_FSSETXATTR values and reject any invalid
+ * configurations.
+ *
+ * Note: the caller should be holding i_mutex, or else be sure that they have
+ * exclusive access to the inode structure.
+ */
+int vfs_ioc_fssetxattr_check(struct inode *inode, const struct fsxattr *old_fa,
+ struct fsxattr *fa)
+{
+ /*
+ * Can't modify an immutable/append-only file unless we have
+ * appropriate permission.
+ */
+ if ((old_fa->fsx_xflags ^ fa->fsx_xflags) &
+ (FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND) &&
+ !capable(CAP_LINUX_IMMUTABLE))
+ return -EPERM;
+
+ /*
+ * Project Quota ID state is only allowed to change from within the init
+ * namespace. Enforce that restriction only if we are trying to change
+ * the quota ID state. Everything else is allowed in user namespaces.
+ */
+ if (current_user_ns() != &init_user_ns) {
+ if (old_fa->fsx_projid != fa->fsx_projid)
+ return -EINVAL;
+ if ((old_fa->fsx_xflags ^ fa->fsx_xflags) &
+ FS_XFLAG_PROJINHERIT)
+ return -EINVAL;
+ }
+
+ /* Check extent size hints. */
+ if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(inode->i_mode))
+ return -EINVAL;
+
+ if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
+ !S_ISDIR(inode->i_mode))
+ return -EINVAL;
+
+ if ((fa->fsx_xflags & FS_XFLAG_COWEXTSIZE) &&
+ !S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
+ return -EINVAL;
+
+ /*
+ * It is only valid to set the DAX flag on regular files and
+ * directories on filesystems.
+ */
+ if ((fa->fsx_xflags & FS_XFLAG_DAX) &&
+ !(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
+ return -EINVAL;
+
+ /* Extent size hints of zero turn off the flags. */
+ if (fa->fsx_extsize == 0)
+ fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT);
+ if (fa->fsx_cowextsize == 0)
+ fa->fsx_xflags &= ~FS_XFLAG_COWEXTSIZE;
+
+ return 0;
+}
+EXPORT_SYMBOL(vfs_ioc_fssetxattr_check);
diff --git a/fs/internal.h b/fs/internal.h
index a48ef81be37d..2f3c3de51fad 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -40,8 +40,6 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait)
extern void guard_bio_eod(int rw, struct bio *bio);
extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block, struct iomap *iomap);
-void __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
- struct page *page);
/*
* char_dev.c
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4ed4b110a154..d682049c07b2 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -231,6 +231,7 @@ struct io_ring_ctx {
struct task_struct *sqo_thread; /* if using sq thread polling */
struct mm_struct *sqo_mm;
wait_queue_head_t sqo_wait;
+ struct completion sqo_thread_started;
struct {
/* CQ ring */
@@ -322,6 +323,7 @@ struct io_kiocb {
struct io_ring_ctx *ctx;
struct list_head list;
+ struct list_head link_list;
unsigned int flags;
refcount_t refs;
#define REQ_F_NOWAIT 1 /* must not punt to workers */
@@ -330,8 +332,10 @@ struct io_kiocb {
#define REQ_F_SEQ_PREV 8 /* sequential with previous */
#define REQ_F_IO_DRAIN 16 /* drain existing IO first */
#define REQ_F_IO_DRAINED 32 /* drain done */
+#define REQ_F_LINK 64 /* linked sqes */
+#define REQ_F_FAIL_LINK 128 /* fail rest of links */
u64 user_data;
- u32 error; /* iopoll result from callback */
+ u32 result;
u32 sequence;
struct work_struct work;
@@ -395,7 +399,8 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
if (!ctx)
return NULL;
- if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free, 0, GFP_KERNEL)) {
+ if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
+ PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
kfree(ctx);
return NULL;
}
@@ -403,6 +408,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
ctx->flags = p->flags;
init_waitqueue_head(&ctx->cq_wait);
init_completion(&ctx->ctx_done);
+ init_completion(&ctx->sqo_thread_started);
mutex_init(&ctx->uring_lock);
init_waitqueue_head(&ctx->wait);
for (i = 0; i < ARRAY_SIZE(ctx->pending_async); i++) {
@@ -584,6 +590,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
req->flags = 0;
/* one is dropped after submission, the other at completion */
refcount_set(&req->refs, 2);
+ req->result = 0;
return req;
out:
io_ring_drop_ctx_refs(ctx, 1);
@@ -599,7 +606,7 @@ static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr)
}
}
-static void io_free_req(struct io_kiocb *req)
+static void __io_free_req(struct io_kiocb *req)
{
if (req->file && !(req->flags & REQ_F_FIXED_FILE))
fput(req->file);
@@ -607,6 +614,63 @@ static void io_free_req(struct io_kiocb *req)
kmem_cache_free(req_cachep, req);
}
+static void io_req_link_next(struct io_kiocb *req)
+{
+ struct io_kiocb *nxt;
+
+ /*
+ * The list should never be empty when we are called here. But could
+ * potentially happen if the chain is messed up, check to be on the
+ * safe side.
+ */
+ nxt = list_first_entry_or_null(&req->link_list, struct io_kiocb, list);
+ if (nxt) {
+ list_del(&nxt->list);
+ if (!list_empty(&req->link_list)) {
+ INIT_LIST_HEAD(&nxt->link_list);
+ list_splice(&req->link_list, &nxt->link_list);
+ nxt->flags |= REQ_F_LINK;
+ }
+
+ INIT_WORK(&nxt->work, io_sq_wq_submit_work);
+ queue_work(req->ctx->sqo_wq, &nxt->work);
+ }
+}
+
+/*
+ * Called if REQ_F_LINK is set, and we fail the head request
+ */
+static void io_fail_links(struct io_kiocb *req)
+{
+ struct io_kiocb *link;
+
+ while (!list_empty(&req->link_list)) {
+ link = list_first_entry(&req->link_list, struct io_kiocb, list);
+ list_del(&link->list);
+
+ io_cqring_add_event(req->ctx, link->user_data, -ECANCELED);
+ __io_free_req(link);
+ }
+}
+
+static void io_free_req(struct io_kiocb *req)
+{
+ /*
+ * If LINK is set, we have dependent requests in this chain. If we
+ * didn't fail this request, queue the first one up, moving any other
+ * dependencies to the next request. In case of failure, fail the rest
+ * of the chain.
+ */
+ if (req->flags & REQ_F_LINK) {
+ if (req->flags & REQ_F_FAIL_LINK)
+ io_fail_links(req);
+ else
+ io_req_link_next(req);
+ }
+
+ __io_free_req(req);
+}
+
static void io_put_req(struct io_kiocb *req)
{
if (refcount_dec_and_test(&req->refs))
@@ -628,16 +692,17 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
req = list_first_entry(done, struct io_kiocb, list);
list_del(&req->list);
- io_cqring_fill_event(ctx, req->user_data, req->error);
+ io_cqring_fill_event(ctx, req->user_data, req->result);
(*nr_events)++;
if (refcount_dec_and_test(&req->refs)) {
/* If we're not using fixed files, we have to pair the
* completion part with the file put. Use regular
* completions for those, only batch free for fixed
- * file.
+ * file and non-linked commands.
*/
- if (req->flags & REQ_F_FIXED_FILE) {
+ if ((req->flags & (REQ_F_FIXED_FILE|REQ_F_LINK)) ==
+ REQ_F_FIXED_FILE) {
reqs[to_free++] = req;
if (to_free == ARRAY_SIZE(reqs))
io_free_req_many(ctx, reqs, &to_free);
@@ -776,6 +841,8 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
kiocb_end_write(kiocb);
+ if ((req->flags & REQ_F_LINK) && res != req->result)
+ req->flags |= REQ_F_FAIL_LINK;
io_cqring_add_event(req->ctx, req->user_data, res);
io_put_req(req);
}
@@ -786,7 +853,9 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
kiocb_end_write(kiocb);
- req->error = res;
+ if ((req->flags & REQ_F_LINK) && res != req->result)
+ req->flags |= REQ_F_FAIL_LINK;
+ req->result = res;
if (res != -EAGAIN)
req->flags |= REQ_F_IOPOLL_COMPLETED;
}
@@ -929,7 +998,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
!kiocb->ki_filp->f_op->iopoll)
return -EOPNOTSUPP;
- req->error = 0;
kiocb->ki_flags |= IOCB_HIPRI;
kiocb->ki_complete = io_complete_rw_iopoll;
} else {
@@ -1001,9 +1069,9 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
return 0;
}
-static int io_import_iovec(struct io_ring_ctx *ctx, int rw,
- const struct sqe_submit *s, struct iovec **iovec,
- struct iov_iter *iter)
+static ssize_t io_import_iovec(struct io_ring_ctx *ctx, int rw,
+ const struct sqe_submit *s, struct iovec **iovec,
+ struct iov_iter *iter)
{
const struct io_uring_sqe *sqe = s->sqe;
void __user *buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
@@ -1021,7 +1089,7 @@ static int io_import_iovec(struct io_ring_ctx *ctx, int rw,
opcode = READ_ONCE(sqe->opcode);
if (opcode == IORING_OP_READ_FIXED ||
opcode == IORING_OP_WRITE_FIXED) {
- int ret = io_import_fixed(ctx, rw, sqe, iter);
+ ssize_t ret = io_import_fixed(ctx, rw, sqe, iter);
*iovec = NULL;
return ret;
}
@@ -1087,7 +1155,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
struct iov_iter iter;
struct file *file;
size_t iov_count;
- int ret;
+ ssize_t read_size, ret;
ret = io_prep_rw(req, s, force_nonblock);
if (ret)
@@ -1100,16 +1168,30 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
return -EINVAL;
ret = io_import_iovec(req->ctx, READ, s, &iovec, &iter);
- if (ret)
+ if (ret < 0)
return ret;
+ read_size = ret;
+ if (req->flags & REQ_F_LINK)
+ req->result = read_size;
+
iov_count = iov_iter_count(&iter);
ret = rw_verify_area(READ, file, &kiocb->ki_pos, iov_count);
if (!ret) {
ssize_t ret2;
- /* Catch -EAGAIN return for forced non-blocking submission */
ret2 = call_read_iter(file, kiocb, &iter);
+ /*
+ * In case of a short read, punt to async. This can happen
+ * if we have data partially cached. Alternatively we can
+ * return the short read, in which case the application will
+ * need to issue another SQE and wait for it. That SQE will
+ * need async punt anyway, so it's more efficient to do it
+ * here.
+ */
+ if (force_nonblock && ret2 > 0 && ret2 < read_size)
+ ret2 = -EAGAIN;
+ /* Catch -EAGAIN return for forced non-blocking submission */
if (!force_nonblock || ret2 != -EAGAIN) {
io_rw_done(kiocb, ret2);
} else {
@@ -1134,7 +1216,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
struct iov_iter iter;
struct file *file;
size_t iov_count;
- int ret;
+ ssize_t ret;
ret = io_prep_rw(req, s, force_nonblock);
if (ret)
@@ -1147,9 +1229,12 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
return -EINVAL;
ret = io_import_iovec(req->ctx, WRITE, s, &iovec, &iter);
- if (ret)
+ if (ret < 0)
return ret;
+ if (req->flags & REQ_F_LINK)
+ req->result = ret;
+
iov_count = iov_iter_count(&iter);
ret = -EAGAIN;
@@ -1253,6 +1338,8 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
end > 0 ? end : LLONG_MAX,
fsync_flags & IORING_FSYNC_DATASYNC);
+ if (ret < 0 && (req->flags & REQ_F_LINK))
+ req->flags |= REQ_F_FAIL_LINK;
io_cqring_add_event(req->ctx, sqe->user_data, ret);
io_put_req(req);
return 0;
@@ -1297,11 +1384,70 @@ static int io_sync_file_range(struct io_kiocb *req,
ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags);
+ if (ret < 0 && (req->flags & REQ_F_LINK))
+ req->flags |= REQ_F_FAIL_LINK;
io_cqring_add_event(req->ctx, sqe->user_data, ret);
io_put_req(req);
return 0;
}
+#if defined(CONFIG_NET)
+static int io_send_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ bool force_nonblock,
+ long (*fn)(struct socket *, struct user_msghdr __user *,
+ unsigned int))
+{
+ struct socket *sock;
+ int ret;
+
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+
+ sock = sock_from_file(req->file, &ret);
+ if (sock) {
+ struct user_msghdr __user *msg;
+ unsigned flags;
+
+ flags = READ_ONCE(sqe->msg_flags);
+ if (flags & MSG_DONTWAIT)
+ req->flags |= REQ_F_NOWAIT;
+ else if (force_nonblock)
+ flags |= MSG_DONTWAIT;
+
+ msg = (struct user_msghdr __user *) (unsigned long)
+ READ_ONCE(sqe->addr);
+
+ ret = fn(sock, msg, flags);
+ if (force_nonblock && ret == -EAGAIN)
+ return ret;
+ }
+
+ io_cqring_add_event(req->ctx, sqe->user_data, ret);
+ io_put_req(req);
+ return 0;
+}
+#endif
+
+static int io_sendmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ bool force_nonblock)
+{
+#if defined(CONFIG_NET)
+ return io_send_recvmsg(req, sqe, force_nonblock, __sys_sendmsg_sock);
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+static int io_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ bool force_nonblock)
+{
+#if defined(CONFIG_NET)
+ return io_send_recvmsg(req, sqe, force_nonblock, __sys_recvmsg_sock);
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
static void io_poll_remove_one(struct io_kiocb *req)
{
struct io_poll_iocb *poll = &req->poll;
@@ -1549,9 +1695,10 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
{
int ret, opcode;
+ req->user_data = READ_ONCE(s->sqe->user_data);
+
if (unlikely(s->index >= ctx->sq_entries))
return -EINVAL;
- req->user_data = READ_ONCE(s->sqe->user_data);
opcode = READ_ONCE(s->sqe->opcode);
switch (opcode) {
@@ -1586,6 +1733,12 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
case IORING_OP_SYNC_FILE_RANGE:
ret = io_sync_file_range(req, s->sqe, force_nonblock);
break;
+ case IORING_OP_SENDMSG:
+ ret = io_sendmsg(req, s->sqe, force_nonblock);
+ break;
+ case IORING_OP_RECVMSG:
+ ret = io_recvmsg(req, s->sqe, force_nonblock);
+ break;
default:
ret = -EINVAL;
break;
@@ -1595,7 +1748,7 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
return ret;
if (ctx->flags & IORING_SETUP_IOPOLL) {
- if (req->error == -EAGAIN)
+ if (req->result == -EAGAIN)
return -EAGAIN;
/* workqueue context doesn't hold uring_lock, grab it now */
@@ -1819,31 +1972,11 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s,
return 0;
}
-static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
- struct io_submit_state *state)
+static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
+ struct sqe_submit *s)
{
- struct io_kiocb *req;
int ret;
- /* enforce forwards compatibility on users */
- if (unlikely(s->sqe->flags & ~(IOSQE_FIXED_FILE | IOSQE_IO_DRAIN)))
- return -EINVAL;
-
- req = io_get_req(ctx, state);
- if (unlikely(!req))
- return -EAGAIN;
-
- ret = io_req_set_file(ctx, s, state, req);
- if (unlikely(ret))
- goto out;
-
- ret = io_req_defer(ctx, req, s->sqe);
- if (ret) {
- if (ret == -EIOCBQUEUED)
- ret = 0;
- return ret;
- }
-
ret = __io_submit_sqe(ctx, req, s, true);
if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
struct io_uring_sqe *sqe_copy;
@@ -1866,24 +1999,93 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
/*
* Queued up for async execution, worker will release
- * submit reference when the iocb is actually
- * submitted.
+ * submit reference when the iocb is actually submitted.
*/
return 0;
}
}
-out:
/* drop submission reference */
io_put_req(req);
/* and drop final reference, if we failed */
- if (ret)
+ if (ret) {
+ io_cqring_add_event(ctx, req->user_data, ret);
+ if (req->flags & REQ_F_LINK)
+ req->flags |= REQ_F_FAIL_LINK;
io_put_req(req);
+ }
return ret;
}
+#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK)
+
+static void io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
+ struct io_submit_state *state, struct io_kiocb **link)
+{
+ struct io_uring_sqe *sqe_copy;
+ struct io_kiocb *req;
+ int ret;
+
+ /* enforce forwards compatibility on users */
+ if (unlikely(s->sqe->flags & ~SQE_VALID_FLAGS)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ req = io_get_req(ctx, state);
+ if (unlikely(!req)) {
+ ret = -EAGAIN;
+ goto err;
+ }
+
+ ret = io_req_set_file(ctx, s, state, req);
+ if (unlikely(ret)) {
+err_req:
+ io_free_req(req);
+err:
+ io_cqring_add_event(ctx, s->sqe->user_data, ret);
+ return;
+ }
+
+ ret = io_req_defer(ctx, req, s->sqe);
+ if (ret) {
+ if (ret != -EIOCBQUEUED)
+ goto err_req;
+ return;
+ }
+
+ /*
+ * If we already have a head request, queue this one for async
+ * submittal once the head completes. If we don't have a head but
+ * IOSQE_IO_LINK is set in the sqe, start a new head. This one will be
+ * submitted sync once the chain is complete. If none of those
+ * conditions are true (normal request), then just queue it.
+ */
+ if (*link) {
+ struct io_kiocb *prev = *link;
+
+ sqe_copy = kmemdup(s->sqe, sizeof(*sqe_copy), GFP_KERNEL);
+ if (!sqe_copy) {
+ ret = -EAGAIN;
+ goto err_req;
+ }
+
+ s->sqe = sqe_copy;
+ memcpy(&req->submit, s, sizeof(*s));
+ list_add_tail(&req->list, &prev->link_list);
+ } else if (s->sqe->flags & IOSQE_IO_LINK) {
+ req->flags |= REQ_F_LINK;
+
+ memcpy(&req->submit, s, sizeof(*s));
+ INIT_LIST_HEAD(&req->link_list);
+ *link = req;
+ } else {
+ io_queue_sqe(ctx, req, s);
+ }
+}
+
/*
* Batched submission is done, ensure local IO is flushed out.
*/
@@ -1966,7 +2168,9 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
unsigned int nr, bool has_user, bool mm_fault)
{
struct io_submit_state state, *statep = NULL;
- int ret, i, submitted = 0;
+ struct io_kiocb *link = NULL;
+ bool prev_was_link = false;
+ int i, submitted = 0;
if (nr > IO_PLUG_THRESHOLD) {
io_submit_state_start(&state, ctx, nr);
@@ -1974,22 +2178,30 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
}
for (i = 0; i < nr; i++) {
+ /*
+ * If previous wasn't linked and we have a linked command,
+ * that's the end of the chain. Submit the previous link.
+ */
+ if (!prev_was_link && link) {
+ io_queue_sqe(ctx, link, &link->submit);
+ link = NULL;
+ }
+ prev_was_link = (sqes[i].sqe->flags & IOSQE_IO_LINK) != 0;
+
if (unlikely(mm_fault)) {
- ret = -EFAULT;
+ io_cqring_add_event(ctx, sqes[i].sqe->user_data,
+ -EFAULT);
} else {
sqes[i].has_user = has_user;
sqes[i].needs_lock = true;
sqes[i].needs_fixed_file = true;
- ret = io_submit_sqe(ctx, &sqes[i], statep);
- }
- if (!ret) {
+ io_submit_sqe(ctx, &sqes[i], statep, &link);
submitted++;
- continue;
}
-
- io_cqring_add_event(ctx, sqes[i].sqe->user_data, ret);
}
+ if (link)
+ io_queue_sqe(ctx, link, &link->submit);
if (statep)
io_submit_state_end(&state);
@@ -2006,6 +2218,8 @@ static int io_sq_thread(void *data)
unsigned inflight;
unsigned long timeout;
+ complete(&ctx->sqo_thread_started);
+
old_fs = get_fs();
set_fs(USER_DS);
@@ -2130,6 +2344,8 @@ static int io_sq_thread(void *data)
static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
{
struct io_submit_state state, *statep = NULL;
+ struct io_kiocb *link = NULL;
+ bool prev_was_link = false;
int i, submit = 0;
if (to_submit > IO_PLUG_THRESHOLD) {
@@ -2139,22 +2355,30 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
for (i = 0; i < to_submit; i++) {
struct sqe_submit s;
- int ret;
if (!io_get_sqring(ctx, &s))
break;
+ /*
+ * If previous wasn't linked and we have a linked command,
+ * that's the end of the chain. Submit the previous link.
+ */
+ if (!prev_was_link && link) {
+ io_queue_sqe(ctx, link, &link->submit);
+ link = NULL;
+ }
+ prev_was_link = (s.sqe->flags & IOSQE_IO_LINK) != 0;
+
s.has_user = true;
s.needs_lock = false;
s.needs_fixed_file = false;
submit++;
-
- ret = io_submit_sqe(ctx, &s, statep);
- if (ret)
- io_cqring_add_event(ctx, s.sqe->user_data, ret);
+ io_submit_sqe(ctx, &s, statep, &link);
}
io_commit_sqring(ctx);
+ if (link)
+ io_queue_sqe(ctx, link, &link->submit);
if (statep)
io_submit_state_end(statep);
@@ -2240,6 +2464,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
static void io_sq_thread_stop(struct io_ring_ctx *ctx)
{
if (ctx->sqo_thread) {
+ wait_for_completion(&ctx->sqo_thread_started);
/*
* The park is a bit of a work-around, without it we get
* warning spews on shutdown with SQPOLL set and affinity
diff --git a/fs/iomap.c b/fs/iomap.c
index 7a147aa0c4d9..217c3e5a13d6 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -777,6 +777,7 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
unsigned copied, struct page *page, struct iomap *iomap)
{
const struct iomap_page_ops *page_ops = iomap->page_ops;
+ loff_t old_size = inode->i_size;
int ret;
if (iomap->type == IOMAP_INLINE) {
@@ -788,9 +789,21 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
ret = __iomap_write_end(inode, pos, len, copied, page, iomap);
}
- __generic_write_end(inode, pos, ret, page);
+ /*
+ * Update the in-memory inode size after copying the data into the page
+ * cache. It's up to the file system to write the updated size to disk,
+ * preferably after I/O completion so that no stale data is exposed.
+ */
+ if (pos + ret > old_size) {
+ i_size_write(inode, pos + ret);
+ iomap->flags |= IOMAP_F_SIZE_CHANGED;
+ }
+ unlock_page(page);
+
+ if (old_size < pos)
+ pagecache_isize_extended(inode, old_size, pos);
if (page_ops && page_ops->page_done)
- page_ops->page_done(inode, pos, copied, page, iomap);
+ page_ops->page_done(inode, pos, ret, page, iomap);
put_page(page);
if (ret < len)
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index efd0ce9489ae..132fb92098c7 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -184,17 +184,18 @@ static int journal_wait_on_commit_record(journal_t *journal,
/*
* write the filemap data using writepage() address_space_operations.
* We don't do block allocation here even for delalloc. We don't
- * use writepages() because with dealyed allocation we may be doing
+ * use writepages() because with delayed allocation we may be doing
* block allocation in writepages().
*/
-static int journal_submit_inode_data_buffers(struct address_space *mapping)
+static int journal_submit_inode_data_buffers(struct address_space *mapping,
+ loff_t dirty_start, loff_t dirty_end)
{
int ret;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = mapping->nrpages * 2,
- .range_start = 0,
- .range_end = i_size_read(mapping->host),
+ .range_start = dirty_start,
+ .range_end = dirty_end,
};
ret = generic_writepages(mapping, &wbc);
@@ -218,6 +219,9 @@ static int journal_submit_data_buffers(journal_t *journal,
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
+ loff_t dirty_start = jinode->i_dirty_start;
+ loff_t dirty_end = jinode->i_dirty_end;
+
if (!(jinode->i_flags & JI_WRITE_DATA))
continue;
mapping = jinode->i_vfs_inode->i_mapping;
@@ -230,7 +234,8 @@ static int journal_submit_data_buffers(journal_t *journal,
* only allocated blocks here.
*/
trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
- err = journal_submit_inode_data_buffers(mapping);
+ err = journal_submit_inode_data_buffers(mapping, dirty_start,
+ dirty_end);
if (!ret)
ret = err;
spin_lock(&journal->j_list_lock);
@@ -257,12 +262,16 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
/* For locking, see the comment in journal_submit_data_buffers() */
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
+ loff_t dirty_start = jinode->i_dirty_start;
+ loff_t dirty_end = jinode->i_dirty_end;
+
if (!(jinode->i_flags & JI_WAIT_DATA))
continue;
jinode->i_flags |= JI_COMMIT_RUNNING;
spin_unlock(&journal->j_list_lock);
- err = filemap_fdatawait_keep_errors(
- jinode->i_vfs_inode->i_mapping);
+ err = filemap_fdatawait_range_keep_errors(
+ jinode->i_vfs_inode->i_mapping, dirty_start,
+ dirty_end);
if (!ret)
ret = err;
spin_lock(&journal->j_list_lock);
@@ -282,6 +291,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
&jinode->i_transaction->t_inode_list);
} else {
jinode->i_transaction = NULL;
+ jinode->i_dirty_start = 0;
+ jinode->i_dirty_end = 0;
}
}
spin_unlock(&journal->j_list_lock);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 43df0c943229..953990eb70a9 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -66,9 +66,6 @@ EXPORT_SYMBOL(jbd2_journal_get_undo_access);
EXPORT_SYMBOL(jbd2_journal_set_triggers);
EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
EXPORT_SYMBOL(jbd2_journal_forget);
-#if 0
-EXPORT_SYMBOL(journal_sync_buffer);
-#endif
EXPORT_SYMBOL(jbd2_journal_flush);
EXPORT_SYMBOL(jbd2_journal_revoke);
@@ -94,6 +91,8 @@ EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
EXPORT_SYMBOL(jbd2_journal_force_commit);
EXPORT_SYMBOL(jbd2_journal_inode_add_write);
EXPORT_SYMBOL(jbd2_journal_inode_add_wait);
+EXPORT_SYMBOL(jbd2_journal_inode_ranged_write);
+EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait);
EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
@@ -203,7 +202,7 @@ loop:
if (journal->j_flags & JBD2_UNMOUNT)
goto end_loop;
- jbd_debug(1, "commit_sequence=%d, commit_request=%d\n",
+ jbd_debug(1, "commit_sequence=%u, commit_request=%u\n",
journal->j_commit_sequence, journal->j_commit_request);
if (journal->j_commit_sequence != journal->j_commit_request) {
@@ -324,7 +323,7 @@ static void journal_kill_thread(journal_t *journal)
* IO is in progress. do_get_write_access() handles this.
*
* The function returns a pointer to the buffer_head to be used for IO.
- *
+ *
*
* Return value:
* <0: Error
@@ -500,7 +499,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
*/
journal->j_commit_request = target;
- jbd_debug(1, "JBD2: requesting commit %d/%d\n",
+ jbd_debug(1, "JBD2: requesting commit %u/%u\n",
journal->j_commit_request,
journal->j_commit_sequence);
journal->j_running_transaction->t_requested = jiffies;
@@ -513,7 +512,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n",
journal->j_commit_request,
journal->j_commit_sequence,
- target, journal->j_running_transaction ?
+ target, journal->j_running_transaction ?
journal->j_running_transaction->t_tid : 0);
return 0;
}
@@ -698,12 +697,12 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
#ifdef CONFIG_JBD2_DEBUG
if (!tid_geq(journal->j_commit_request, tid)) {
printk(KERN_ERR
- "%s: error: j_commit_request=%d, tid=%d\n",
+ "%s: error: j_commit_request=%u, tid=%u\n",
__func__, journal->j_commit_request, tid);
}
#endif
while (tid_gt(tid, journal->j_commit_sequence)) {
- jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n",
+ jbd_debug(1, "JBD2: want %u, j_commit_sequence=%u\n",
tid, journal->j_commit_sequence);
read_unlock(&journal->j_state_lock);
wake_up(&journal->j_wait_commit);
@@ -944,7 +943,7 @@ int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
trace_jbd2_update_log_tail(journal, tid, block, freed);
jbd_debug(1,
- "Cleaning journal tail from %d to %d (offset %lu), "
+ "Cleaning journal tail from %u to %u (offset %lu), "
"freeing %lu\n",
journal->j_tail_sequence, tid, block, freed);
@@ -1318,7 +1317,7 @@ static int journal_reset(journal_t *journal)
*/
if (sb->s_start == 0) {
jbd_debug(1, "JBD2: Skipping superblock update on recovered sb "
- "(start %ld, seq %d, errno %d)\n",
+ "(start %ld, seq %u, errno %d)\n",
journal->j_tail, journal->j_tail_sequence,
journal->j_errno);
journal->j_flags |= JBD2_FLUSHED;
@@ -1453,7 +1452,7 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
return;
}
- jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n",
+ jbd_debug(1, "JBD2: Marking journal as empty (seq %u)\n",
journal->j_tail_sequence);
sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
@@ -2574,6 +2573,8 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
jinode->i_next_transaction = NULL;
jinode->i_vfs_inode = inode;
jinode->i_flags = 0;
+ jinode->i_dirty_start = 0;
+ jinode->i_dirty_end = 0;
INIT_LIST_HEAD(&jinode->i_list);
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 8ca4fddc705f..990e7b5062e7 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -2565,7 +2565,7 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
* File inode in the inode list of the handle's transaction
*/
static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
- unsigned long flags)
+ unsigned long flags, loff_t start_byte, loff_t end_byte)
{
transaction_t *transaction = handle->h_transaction;
journal_t *journal;
@@ -2577,26 +2577,17 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
transaction->t_tid);
- /*
- * First check whether inode isn't already on the transaction's
- * lists without taking the lock. Note that this check is safe
- * without the lock as we cannot race with somebody removing inode
- * from the transaction. The reason is that we remove inode from the
- * transaction only in journal_release_jbd_inode() and when we commit
- * the transaction. We are guarded from the first case by holding
- * a reference to the inode. We are safe against the second case
- * because if jinode->i_transaction == transaction, commit code
- * cannot touch the transaction because we hold reference to it,
- * and if jinode->i_next_transaction == transaction, commit code
- * will only file the inode where we want it.
- */
- if ((jinode->i_transaction == transaction ||
- jinode->i_next_transaction == transaction) &&
- (jinode->i_flags & flags) == flags)
- return 0;
-
spin_lock(&journal->j_list_lock);
jinode->i_flags |= flags;
+
+ if (jinode->i_dirty_end) {
+ jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte);
+ jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte);
+ } else {
+ jinode->i_dirty_start = start_byte;
+ jinode->i_dirty_end = end_byte;
+ }
+
/* Is inode already attached where we need it? */
if (jinode->i_transaction == transaction ||
jinode->i_next_transaction == transaction)
@@ -2631,12 +2622,28 @@ done:
int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode)
{
return jbd2_journal_file_inode(handle, jinode,
- JI_WRITE_DATA | JI_WAIT_DATA);
+ JI_WRITE_DATA | JI_WAIT_DATA, 0, LLONG_MAX);
}
int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode)
{
- return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA);
+ return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, 0,
+ LLONG_MAX);
+}
+
+int jbd2_journal_inode_ranged_write(handle_t *handle,
+ struct jbd2_inode *jinode, loff_t start_byte, loff_t length)
+{
+ return jbd2_journal_file_inode(handle, jinode,
+ JI_WRITE_DATA | JI_WAIT_DATA, start_byte,
+ start_byte + length - 1);
+}
+
+int jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *jinode,
+ loff_t start_byte, loff_t length)
+{
+ return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA,
+ start_byte, start_byte + length - 1);
}
/*
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 7d8654a1472e..f8fb89b10227 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -109,9 +109,9 @@ static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg)
return ret;
}
-int jffs2_do_readpage_unlock(struct inode *inode, struct page *pg)
+int jffs2_do_readpage_unlock(void *data, struct page *pg)
{
- int ret = jffs2_do_readpage_nolock(inode, pg);
+ int ret = jffs2_do_readpage_nolock(data, pg);
unlock_page(pg);
return ret;
}
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 112d85849db1..8a20ddd25f2d 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -687,7 +687,7 @@ unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c,
struct page *pg;
pg = read_cache_page(inode->i_mapping, offset >> PAGE_SHIFT,
- (void *)jffs2_do_readpage_unlock, inode);
+ jffs2_do_readpage_unlock, inode);
if (IS_ERR(pg))
return (void *)pg;
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index a2dbbb3f4c74..bd3d5f0ddc34 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -155,7 +155,7 @@ extern const struct file_operations jffs2_file_operations;
extern const struct inode_operations jffs2_file_inode_operations;
extern const struct address_space_operations jffs2_file_address_operations;
int jffs2_fsync(struct file *, loff_t, loff_t, int);
-int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg);
+int jffs2_do_readpage_unlock(void *data, struct page *pg);
/* ioctl.c */
long jffs2_ioctl(struct file *, unsigned int, unsigned long);
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index ba34dae8bd9f..10ee0ecca1a8 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -98,24 +98,16 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
/* Lock against other parallel changes of flags */
inode_lock(inode);
- oldflags = jfs_inode->mode2;
-
- /*
- * The IMMUTABLE and APPEND_ONLY flags can only be changed by
- * the relevant capability.
- */
- if ((oldflags & JFS_IMMUTABLE_FL) ||
- ((flags ^ oldflags) &
- (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
- if (!capable(CAP_LINUX_IMMUTABLE)) {
- inode_unlock(inode);
- err = -EPERM;
- goto setflags_out;
- }
+ oldflags = jfs_map_ext2(jfs_inode->mode2 & JFS_FL_USER_VISIBLE,
+ 0);
+ err = vfs_ioc_setflags_prepare(inode, oldflags, flags);
+ if (err) {
+ inode_unlock(inode);
+ goto setflags_out;
}
flags = flags & JFS_FL_USER_MODIFIABLE;
- flags |= oldflags & ~JFS_FL_USER_MODIFIABLE;
+ flags |= jfs_inode->mode2 & ~JFS_FL_USER_MODIFIABLE;
jfs_inode->mode2 = flags;
jfs_set_inode_flags(inode);
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 62f98225abb3..b11f2afa84f1 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -47,13 +47,14 @@ void nlmclnt_next_cookie(struct nlm_cookie *c)
c->len=4;
}
-static struct nlm_lockowner *nlm_get_lockowner(struct nlm_lockowner *lockowner)
+static struct nlm_lockowner *
+nlmclnt_get_lockowner(struct nlm_lockowner *lockowner)
{
refcount_inc(&lockowner->count);
return lockowner;
}
-static void nlm_put_lockowner(struct nlm_lockowner *lockowner)
+static void nlmclnt_put_lockowner(struct nlm_lockowner *lockowner)
{
if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock))
return;
@@ -82,28 +83,28 @@ static inline uint32_t __nlm_alloc_pid(struct nlm_host *host)
return res;
}
-static struct nlm_lockowner *__nlm_find_lockowner(struct nlm_host *host, fl_owner_t owner)
+static struct nlm_lockowner *__nlmclnt_find_lockowner(struct nlm_host *host, fl_owner_t owner)
{
struct nlm_lockowner *lockowner;
list_for_each_entry(lockowner, &host->h_lockowners, list) {
if (lockowner->owner != owner)
continue;
- return nlm_get_lockowner(lockowner);
+ return nlmclnt_get_lockowner(lockowner);
}
return NULL;
}
-static struct nlm_lockowner *nlm_find_lockowner(struct nlm_host *host, fl_owner_t owner)
+static struct nlm_lockowner *nlmclnt_find_lockowner(struct nlm_host *host, fl_owner_t owner)
{
struct nlm_lockowner *res, *new = NULL;
spin_lock(&host->h_lock);
- res = __nlm_find_lockowner(host, owner);
+ res = __nlmclnt_find_lockowner(host, owner);
if (res == NULL) {
spin_unlock(&host->h_lock);
new = kmalloc(sizeof(*new), GFP_KERNEL);
spin_lock(&host->h_lock);
- res = __nlm_find_lockowner(host, owner);
+ res = __nlmclnt_find_lockowner(host, owner);
if (res == NULL && new != NULL) {
res = new;
refcount_set(&new->count, 1);
@@ -457,7 +458,7 @@ static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
{
spin_lock(&fl->fl_u.nfs_fl.owner->host->h_lock);
new->fl_u.nfs_fl.state = fl->fl_u.nfs_fl.state;
- new->fl_u.nfs_fl.owner = nlm_get_lockowner(fl->fl_u.nfs_fl.owner);
+ new->fl_u.nfs_fl.owner = nlmclnt_get_lockowner(fl->fl_u.nfs_fl.owner);
list_add_tail(&new->fl_u.nfs_fl.list, &fl->fl_u.nfs_fl.owner->host->h_granted);
spin_unlock(&fl->fl_u.nfs_fl.owner->host->h_lock);
}
@@ -467,7 +468,7 @@ static void nlmclnt_locks_release_private(struct file_lock *fl)
spin_lock(&fl->fl_u.nfs_fl.owner->host->h_lock);
list_del(&fl->fl_u.nfs_fl.list);
spin_unlock(&fl->fl_u.nfs_fl.owner->host->h_lock);
- nlm_put_lockowner(fl->fl_u.nfs_fl.owner);
+ nlmclnt_put_lockowner(fl->fl_u.nfs_fl.owner);
}
static const struct file_lock_operations nlmclnt_lock_ops = {
@@ -478,7 +479,7 @@ static const struct file_lock_operations nlmclnt_lock_ops = {
static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *host)
{
fl->fl_u.nfs_fl.state = 0;
- fl->fl_u.nfs_fl.owner = nlm_find_lockowner(host, fl->fl_owner);
+ fl->fl_u.nfs_fl.owner = nlmclnt_find_lockowner(host, fl->fl_owner);
INIT_LIST_HEAD(&fl->fl_u.nfs_fl.list);
fl->fl_ops = &nlmclnt_lock_ops;
}
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 1bddf70d9656..e4d3f783e06a 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -46,8 +46,14 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Set up the missing parts of the file_lock structure */
lock->fl.fl_file = file->f_file;
- lock->fl.fl_owner = (fl_owner_t) host;
+ lock->fl.fl_pid = current->tgid;
lock->fl.fl_lmops = &nlmsvc_lock_operations;
+ nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid);
+ if (!lock->fl.fl_owner) {
+ /* lockowner allocation has failed */
+ nlmsvc_release_host(host);
+ return nlm_lck_denied_nolocks;
+ }
}
return 0;
@@ -94,6 +100,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
else
dprintk("lockd: TEST4 status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
@@ -142,6 +149,7 @@ __nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
else
dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
@@ -178,6 +186,7 @@ __nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp)
resp->status = nlmsvc_cancel_blocked(SVC_NET(rqstp), file, &argp->lock);
dprintk("lockd: CANCEL status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
@@ -217,6 +226,7 @@ __nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp)
resp->status = nlmsvc_unlock(SVC_NET(rqstp), file, &argp->lock);
dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
@@ -365,6 +375,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp)
resp->status = nlmsvc_share_file(host, file, argp);
dprintk("lockd: SHARE status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
@@ -399,6 +410,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp)
resp->status = nlmsvc_unshare_file(host, file, argp);
dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index ea719cdd6a36..61d3cc2283dc 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -332,6 +332,93 @@ restart:
mutex_unlock(&file->f_mutex);
}
+static struct nlm_lockowner *
+nlmsvc_get_lockowner(struct nlm_lockowner *lockowner)
+{
+ refcount_inc(&lockowner->count);
+ return lockowner;
+}
+
+static void nlmsvc_put_lockowner(struct nlm_lockowner *lockowner)
+{
+ if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock))
+ return;
+ list_del(&lockowner->list);
+ spin_unlock(&lockowner->host->h_lock);
+ nlmsvc_release_host(lockowner->host);
+ kfree(lockowner);
+}
+
+static struct nlm_lockowner *__nlmsvc_find_lockowner(struct nlm_host *host, pid_t pid)
+{
+ struct nlm_lockowner *lockowner;
+ list_for_each_entry(lockowner, &host->h_lockowners, list) {
+ if (lockowner->pid != pid)
+ continue;
+ return nlmsvc_get_lockowner(lockowner);
+ }
+ return NULL;
+}
+
+static struct nlm_lockowner *nlmsvc_find_lockowner(struct nlm_host *host, pid_t pid)
+{
+ struct nlm_lockowner *res, *new = NULL;
+
+ spin_lock(&host->h_lock);
+ res = __nlmsvc_find_lockowner(host, pid);
+
+ if (res == NULL) {
+ spin_unlock(&host->h_lock);
+ new = kmalloc(sizeof(*res), GFP_KERNEL);
+ spin_lock(&host->h_lock);
+ res = __nlmsvc_find_lockowner(host, pid);
+ if (res == NULL && new != NULL) {
+ res = new;
+ /* fs/locks.c will manage the refcount through lock_ops */
+ refcount_set(&new->count, 1);
+ new->pid = pid;
+ new->host = nlm_get_host(host);
+ list_add(&new->list, &host->h_lockowners);
+ new = NULL;
+ }
+ }
+
+ spin_unlock(&host->h_lock);
+ kfree(new);
+ return res;
+}
+
+void
+nlmsvc_release_lockowner(struct nlm_lock *lock)
+{
+ if (lock->fl.fl_owner)
+ nlmsvc_put_lockowner(lock->fl.fl_owner);
+}
+
+static void nlmsvc_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+{
+ struct nlm_lockowner *nlm_lo = (struct nlm_lockowner *)fl->fl_owner;
+ new->fl_owner = nlmsvc_get_lockowner(nlm_lo);
+}
+
+static void nlmsvc_locks_release_private(struct file_lock *fl)
+{
+ nlmsvc_put_lockowner((struct nlm_lockowner *)fl->fl_owner);
+}
+
+static const struct file_lock_operations nlmsvc_lock_ops = {
+ .fl_copy_lock = nlmsvc_locks_copy_lock,
+ .fl_release_private = nlmsvc_locks_release_private,
+};
+
+void nlmsvc_locks_init_private(struct file_lock *fl, struct nlm_host *host,
+ pid_t pid)
+{
+ fl->fl_owner = nlmsvc_find_lockowner(host, pid);
+ if (fl->fl_owner != NULL)
+ fl->fl_ops = &nlmsvc_lock_ops;
+}
+
/*
* Initialize arguments for GRANTED call. The nlm_rqst structure
* has been cleared already.
@@ -345,7 +432,7 @@ static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
/* set default data area */
call->a_args.lock.oh.data = call->a_owner;
- call->a_args.lock.svid = lock->fl.fl_pid;
+ call->a_args.lock.svid = ((struct nlm_lockowner *)lock->fl.fl_owner)->pid;
if (lock->oh.len > NLMCLNT_OHSIZE) {
void *data = kmalloc(lock->oh.len, GFP_KERNEL);
@@ -509,6 +596,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
{
int error;
__be32 ret;
+ struct nlm_lockowner *test_owner;
dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
locks_inode(file->f_file)->i_sb->s_id,
@@ -522,6 +610,9 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
goto out;
}
+ /* If there's a conflicting lock, remember to clean up the test lock */
+ test_owner = (struct nlm_lockowner *)lock->fl.fl_owner;
+
error = vfs_test_lock(file->f_file, &lock->fl);
if (error) {
/* We can't currently deal with deferred test requests */
@@ -543,11 +634,16 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
conflock->caller = "somehost"; /* FIXME */
conflock->len = strlen(conflock->caller);
conflock->oh.len = 0; /* don't return OH info */
- conflock->svid = lock->fl.fl_pid;
+ conflock->svid = ((struct nlm_lockowner *)lock->fl.fl_owner)->pid;
conflock->fl.fl_type = lock->fl.fl_type;
conflock->fl.fl_start = lock->fl.fl_start;
conflock->fl.fl_end = lock->fl.fl_end;
locks_release_private(&lock->fl);
+
+ /* Clean up the test lock */
+ lock->fl.fl_owner = NULL;
+ nlmsvc_put_lockowner(test_owner);
+
ret = nlm_lck_denied;
out:
return ret;
@@ -692,25 +788,7 @@ nlmsvc_notify_blocked(struct file_lock *fl)
printk(KERN_WARNING "lockd: notification for unknown block!\n");
}
-static int nlmsvc_same_owner(struct file_lock *fl1, struct file_lock *fl2)
-{
- return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid;
-}
-
-/*
- * Since NLM uses two "keys" for tracking locks, we need to hash them down
- * to one for the blocked_hash. Here, we're just xor'ing the host address
- * with the pid in order to create a key value for picking a hash bucket.
- */
-static unsigned long
-nlmsvc_owner_key(struct file_lock *fl)
-{
- return (unsigned long)fl->fl_owner ^ (unsigned long)fl->fl_pid;
-}
-
const struct lock_manager_operations nlmsvc_lock_operations = {
- .lm_compare_owner = nlmsvc_same_owner,
- .lm_owner_key = nlmsvc_owner_key,
.lm_notify = nlmsvc_notify_blocked,
.lm_grant = nlmsvc_grant_deferred,
};
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index ea77c66d3cc3..d0bb7a6bf005 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -76,8 +76,14 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Set up the missing parts of the file_lock structure */
lock->fl.fl_file = file->f_file;
- lock->fl.fl_owner = (fl_owner_t) host;
+ lock->fl.fl_pid = current->tgid;
lock->fl.fl_lmops = &nlmsvc_lock_operations;
+ nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid);
+ if (!lock->fl.fl_owner) {
+ /* lockowner allocation has failed */
+ nlmsvc_release_host(host);
+ return nlm_lck_denied_nolocks;
+ }
}
return 0;
@@ -125,6 +131,7 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
dprintk("lockd: TEST status %d vers %d\n",
ntohl(resp->status), rqstp->rq_vers);
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
@@ -173,6 +180,7 @@ __nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
else
dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rc;
@@ -210,6 +218,7 @@ __nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp)
resp->status = cast_status(nlmsvc_cancel_blocked(net, file, &argp->lock));
dprintk("lockd: CANCEL status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
@@ -250,6 +259,7 @@ __nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp)
resp->status = cast_status(nlmsvc_unlock(net, file, &argp->lock));
dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
@@ -408,6 +418,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp)
resp->status = cast_status(nlmsvc_share_file(host, file, argp));
dprintk("lockd: SHARE status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
@@ -442,6 +453,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp)
resp->status = cast_status(nlmsvc_unshare_file(host, file, argp));
dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status));
+ nlmsvc_release_lockowner(&argp->lock);
nlmsvc_release_host(host);
nlm_release_file(file);
return rpc_success;
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 0e610f422406..028fc152da22 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -180,7 +180,7 @@ again:
/* update current lock count */
file->f_locks++;
- lockhost = (struct nlm_host *) fl->fl_owner;
+ lockhost = ((struct nlm_lockowner *)fl->fl_owner)->host;
if (match(lockhost, host)) {
struct file_lock lock = *fl;
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 7147e4aebecc..982629f7b120 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -126,8 +126,6 @@ nlm_decode_lock(__be32 *p, struct nlm_lock *lock)
lock->svid = ntohl(*p++);
locks_init_lock(fl);
- fl->fl_owner = current->files;
- fl->fl_pid = (pid_t)lock->svid;
fl->fl_flags = FL_POSIX;
fl->fl_type = F_RDLCK; /* as good as anything else */
start = ntohl(*p++);
@@ -269,7 +267,6 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
memset(lock, 0, sizeof(*lock));
locks_init_lock(&lock->fl);
lock->svid = ~(u32) 0;
- lock->fl.fl_pid = (pid_t)lock->svid;
if (!(p = nlm_decode_cookie(p, &argp->cookie))
|| !(p = xdr_decode_string_inplace(p, &lock->caller,
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 7ed9edf9aed4..5fa9f48a9dba 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -118,8 +118,6 @@ nlm4_decode_lock(__be32 *p, struct nlm_lock *lock)
lock->svid = ntohl(*p++);
locks_init_lock(fl);
- fl->fl_owner = current->files;
- fl->fl_pid = (pid_t)lock->svid;
fl->fl_flags = FL_POSIX;
fl->fl_type = F_RDLCK; /* as good as anything else */
p = xdr_decode_hyper(p, &start);
@@ -266,7 +264,6 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
memset(lock, 0, sizeof(*lock));
locks_init_lock(&lock->fl);
lock->svid = ~(u32) 0;
- lock->fl.fl_pid = (pid_t)lock->svid;
if (!(p = nlm4_decode_cookie(p, &argp->cookie))
|| !(p = xdr_decode_string_inplace(p, &lock->caller,
diff --git a/fs/locks.c b/fs/locks.c
index ec1e4a5df629..686eae21daf6 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -658,9 +658,6 @@ static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2)
*/
static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
{
- if (fl1->fl_lmops && fl1->fl_lmops->lm_compare_owner)
- return fl2->fl_lmops == fl1->fl_lmops &&
- fl1->fl_lmops->lm_compare_owner(fl1, fl2);
return fl1->fl_owner == fl2->fl_owner;
}
@@ -701,8 +698,6 @@ static void locks_delete_global_locks(struct file_lock *fl)
static unsigned long
posix_owner_key(struct file_lock *fl)
{
- if (fl->fl_lmops && fl->fl_lmops->lm_owner_key)
- return fl->fl_lmops->lm_owner_key(fl);
return (unsigned long)fl->fl_owner;
}
@@ -1534,11 +1529,21 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose)
static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker)
{
- if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT))
- return false;
- if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE))
- return false;
- return locks_conflict(breaker, lease);
+ bool rc;
+
+ if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) {
+ rc = false;
+ goto trace;
+ }
+ if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) {
+ rc = false;
+ goto trace;
+ }
+
+ rc = locks_conflict(breaker, lease);
+trace:
+ trace_leases_conflict(rc, lease, breaker);
+ return rc;
}
static bool
@@ -1753,10 +1758,10 @@ int fcntl_getlease(struct file *filp)
}
/**
- * check_conflicting_open - see if the given dentry points to a file that has
+ * check_conflicting_open - see if the given file points to an inode that has
* an existing open that would conflict with the
* desired lease.
- * @dentry: dentry to check
+ * @filp: file to check
* @arg: type of lease that we're trying to acquire
* @flags: current lock flags
*
@@ -1764,30 +1769,42 @@ int fcntl_getlease(struct file *filp)
* conflict with the lease we're trying to set.
*/
static int
-check_conflicting_open(const struct dentry *dentry, const long arg, int flags)
+check_conflicting_open(struct file *filp, const long arg, int flags)
{
- int ret = 0;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = locks_inode(filp);
+ int self_wcount = 0, self_rcount = 0;
if (flags & FL_LAYOUT)
return 0;
- if ((arg == F_RDLCK) && inode_is_open_for_write(inode))
- return -EAGAIN;
+ if (arg == F_RDLCK)
+ return inode_is_open_for_write(inode) ? -EAGAIN : 0;
+ else if (arg != F_WRLCK)
+ return 0;
- if ((arg == F_WRLCK) && ((d_count(dentry) > 1) ||
- (atomic_read(&inode->i_count) > 1)))
- ret = -EAGAIN;
+ /*
+ * Make sure that only read/write count is from lease requestor.
+ * Note that this will result in denying write leases when i_writecount
+ * is negative, which is what we want. (We shouldn't grant write leases
+ * on files open for execution.)
+ */
+ if (filp->f_mode & FMODE_WRITE)
+ self_wcount = 1;
+ else if (filp->f_mode & FMODE_READ)
+ self_rcount = 1;
- return ret;
+ if (atomic_read(&inode->i_writecount) != self_wcount ||
+ atomic_read(&inode->i_readcount) != self_rcount)
+ return -EAGAIN;
+
+ return 0;
}
static int
generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv)
{
struct file_lock *fl, *my_fl = NULL, *lease;
- struct dentry *dentry = filp->f_path.dentry;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = locks_inode(filp);
struct file_lock_context *ctx;
bool is_deleg = (*flp)->fl_flags & FL_DELEG;
int error;
@@ -1822,7 +1839,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock);
time_out_leases(inode, &dispose);
- error = check_conflicting_open(dentry, arg, lease->fl_flags);
+ error = check_conflicting_open(filp, arg, lease->fl_flags);
if (error)
goto out;
@@ -1879,7 +1896,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
* precedes these checks.
*/
smp_mb();
- error = check_conflicting_open(dentry, arg, lease->fl_flags);
+ error = check_conflicting_open(filp, arg, lease->fl_flags);
if (error) {
locks_unlink_lock_ctx(lease);
goto out;
diff --git a/fs/namei.c b/fs/namei.c
index 20831c2fbb34..209c51a5226c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3883,6 +3883,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
dentry->d_inode->i_flags |= S_DEAD;
dont_mount(dentry);
detach_mounts(dentry);
+ fsnotify_rmdir(dir, dentry);
out:
inode_unlock(dentry->d_inode);
@@ -3999,6 +4000,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate
if (!error) {
dont_mount(dentry);
detach_mounts(dentry);
+ fsnotify_unlink(dir, dentry);
}
}
}
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index cf42a8b939e3..f4157eb1f69d 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -129,10 +129,13 @@ nfs4_file_flush(struct file *file, fl_owner_t id)
}
#ifdef CONFIG_NFS_V4_2
-static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- size_t count, unsigned int flags)
+static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t count, unsigned int flags)
{
+ /* Only offload copy if superblock is the same */
+ if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
+ return -EXDEV;
if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY))
return -EOPNOTSUPP;
if (file_inode(file_in) == file_inode(file_out))
@@ -140,6 +143,20 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
}
+static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t count, unsigned int flags)
+{
+ ssize_t ret;
+
+ ret = __nfs4_copy_file_range(file_in, pos_in, file_out, pos_out, count,
+ flags);
+ if (ret == -EOPNOTSUPP || ret == -EXDEV)
+ ret = generic_copy_file_range(file_in, pos_in, file_out,
+ pos_out, count, flags);
+ return ret;
+}
+
static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
{
loff_t ret;
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index 69679f4f2e6c..1e7296395d71 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -72,25 +72,6 @@ struct idmap {
const struct cred *cred;
};
-static struct key_acl nfs_idmap_key_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 2,
- .possessor_viewable = true,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_VIEW | KEY_ACE_SEARCH | KEY_ACE_READ),
- KEY_OWNER_ACE(KEY_ACE_VIEW),
- }
-};
-
-static struct key_acl nfs_idmap_keyring_acl = {
- .usage = REFCOUNT_INIT(1),
- .nr_ace = 2,
- .aces = {
- KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_WRITE),
- KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ),
- }
-};
-
static struct user_namespace *idmap_userns(const struct idmap *idmap)
{
if (idmap && idmap->cred)
@@ -227,7 +208,8 @@ int nfs_idmap_init(void)
keyring = keyring_alloc(".id_resolver",
GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
- &nfs_idmap_keyring_acl,
+ (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ,
KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
if (IS_ERR(keyring)) {
ret = PTR_ERR(keyring);
@@ -305,13 +287,11 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
return ERR_PTR(ret);
if (!idmap->cred || idmap->cred->user_ns == &init_user_ns)
- rkey = request_key(&key_type_id_resolver, desc, "",
- &nfs_idmap_key_acl);
+ rkey = request_key(&key_type_id_resolver, desc, "");
if (IS_ERR(rkey)) {
mutex_lock(&idmap->idmap_mutex);
rkey = request_key_with_auxdata(&key_type_id_resolver_legacy,
- desc, NULL, "", 0, idmap,
- &nfs_idmap_key_acl);
+ desc, NULL, "", 0, idmap);
mutex_unlock(&idmap->idmap_mutex);
}
if (!IS_ERR(rkey))
@@ -340,6 +320,8 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
}
rcu_read_lock();
+ rkey->perm |= KEY_USR_VIEW;
+
ret = key_validate(rkey);
if (ret < 0)
goto out_up;
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 52d533967485..0effeee28352 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -396,12 +396,6 @@ nfs_complete_sillyrename(struct rpc_task *task, struct nfs_renamedata *data)
nfs_cancel_async_unlink(dentry);
return;
}
-
- /*
- * vfs_unlink and the like do not issue this when a file is
- * sillyrenamed, so do it here.
- */
- fsnotify_nameremove(dentry, 0);
}
#define SILLYNAME_PREFIX ".nfs"
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 4fb1f72a25fb..66d4c55eb48e 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -121,15 +121,13 @@ nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp,
{
loff_t new_size = lcp->lc_last_wr + 1;
struct iattr iattr = { .ia_valid = 0 };
- struct timespec ts;
int error;
- ts = timespec64_to_timespec(inode->i_mtime);
if (lcp->lc_mtime.tv_nsec == UTIME_NOW ||
- timespec_compare(&lcp->lc_mtime, &ts) < 0)
- lcp->lc_mtime = timespec64_to_timespec(current_time(inode));
+ timespec64_compare(&lcp->lc_mtime, &inode->i_mtime) < 0)
+ lcp->lc_mtime = current_time(inode);
iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME;
- iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = timespec_to_timespec64(lcp->lc_mtime);
+ iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime;
if (new_size > i_size_read(inode)) {
iattr.ia_valid |= ATTR_SIZE;
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
index 4a98537efb0f..10ec5ecdf117 100644
--- a/fs/nfsd/cache.h
+++ b/fs/nfsd/cache.h
@@ -10,6 +10,7 @@
#define NFSCACHE_H
#include <linux/sunrpc/svc.h>
+#include "netns.h"
/*
* Representation of a reply cache entry.
@@ -77,8 +78,8 @@ enum {
/* Checksum this amount of the request */
#define RC_CSUMLEN (256U)
-int nfsd_reply_cache_init(void);
-void nfsd_reply_cache_shutdown(void);
+int nfsd_reply_cache_init(struct nfsd_net *);
+void nfsd_reply_cache_shutdown(struct nfsd_net *);
int nfsd_cache_lookup(struct svc_rqst *);
void nfsd_cache_update(struct svc_rqst *, int, __be32 *);
int nfsd_reply_cache_stats_open(struct inode *, struct file *);
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index 84831253203d..76bee0a0d308 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -127,24 +127,16 @@ static struct nfsd_fault_inject_op inject_ops[] = {
},
};
-int nfsd_fault_inject_init(void)
+void nfsd_fault_inject_init(void)
{
unsigned int i;
struct nfsd_fault_inject_op *op;
umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
debug_dir = debugfs_create_dir("nfsd", NULL);
- if (!debug_dir)
- goto fail;
for (i = 0; i < ARRAY_SIZE(inject_ops); i++) {
op = &inject_ops[i];
- if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd))
- goto fail;
+ debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd);
}
- return 0;
-
-fail:
- nfsd_fault_inject_cleanup();
- return -ENOMEM;
}
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 7c686a270d60..bdfe5bcb3dcd 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -42,6 +42,11 @@ struct nfsd_net {
bool grace_ended;
time_t boot_time;
+ /* internal mount of the "nfsd" pseudofilesystem: */
+ struct vfsmount *nfsd_mnt;
+
+ struct dentry *nfsd_client_dir;
+
/*
* reclaim_str_hashtbl[] holds known client info from previous reset/reboot
* used in reboot/reset lease grace period processing
@@ -106,6 +111,7 @@ struct nfsd_net {
*/
unsigned int max_connections;
+ u32 clientid_base;
u32 clientid_counter;
u32 clverifier_counter;
@@ -127,6 +133,44 @@ struct nfsd_net {
*/
bool *nfsd_versions;
bool *nfsd4_minorversions;
+
+ /*
+ * Duplicate reply cache
+ */
+ struct nfsd_drc_bucket *drc_hashtbl;
+ struct kmem_cache *drc_slab;
+
+ /* max number of entries allowed in the cache */
+ unsigned int max_drc_entries;
+
+ /* number of significant bits in the hash value */
+ unsigned int maskbits;
+ unsigned int drc_hashsize;
+
+ /*
+ * Stats and other tracking of on the duplicate reply cache.
+ * These fields and the "rc" fields in nfsdstats are modified
+ * with only the per-bucket cache lock, which isn't really safe
+ * and should be fixed if we want the statistics to be
+ * completely accurate.
+ */
+
+ /* total number of entries */
+ atomic_t num_drc_entries;
+
+ /* cache misses due only to checksum comparison failures */
+ unsigned int payload_misses;
+
+ /* amount of memory (in bytes) currently consumed by the DRC */
+ unsigned int drc_mem_usage;
+
+ /* longest hash chain seen */
+ unsigned int longest_chain;
+
+ /* size of cache when we saw the longest hash chain */
+ unsigned int longest_chain_cachesize;
+
+ struct shrinker nfsd_reply_cache_shrinker;
};
/* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 2961016097ac..d1f285245af8 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -83,7 +83,7 @@ ent_init(struct cache_head *cnew, struct cache_head *citm)
new->type = itm->type;
strlcpy(new->name, itm->name, sizeof(new->name));
- strlcpy(new->authname, itm->authname, sizeof(new->name));
+ strlcpy(new->authname, itm->authname, sizeof(new->authname));
}
static void
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1a0cdeb3b875..7857942c5ca6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -42,6 +42,7 @@
#include <linux/sunrpc/svcauth_gss.h>
#include <linux/sunrpc/addr.h>
#include <linux/jhash.h>
+#include <linux/string_helpers.h>
#include "xdr4.h"
#include "xdr4cb.h"
#include "vfs.h"
@@ -99,6 +100,13 @@ enum nfsd4_st_mutex_lock_subclass {
*/
static DECLARE_WAIT_QUEUE_HEAD(close_wq);
+/*
+ * A waitqueue where a writer to clients/#/ctl destroying a client can
+ * wait for cl_rpc_users to drop to 0 and then for the client to be
+ * unhashed.
+ */
+static DECLARE_WAIT_QUEUE_HEAD(expiry_wq);
+
static struct kmem_cache *client_slab;
static struct kmem_cache *openowner_slab;
static struct kmem_cache *lockowner_slab;
@@ -138,7 +146,7 @@ static __be32 get_client_locked(struct nfs4_client *clp)
if (is_client_expired(clp))
return nfserr_expired;
- atomic_inc(&clp->cl_refcount);
+ atomic_inc(&clp->cl_rpc_users);
return nfs_ok;
}
@@ -170,20 +178,24 @@ static void put_client_renew_locked(struct nfs4_client *clp)
lockdep_assert_held(&nn->client_lock);
- if (!atomic_dec_and_test(&clp->cl_refcount))
+ if (!atomic_dec_and_test(&clp->cl_rpc_users))
return;
if (!is_client_expired(clp))
renew_client_locked(clp);
+ else
+ wake_up_all(&expiry_wq);
}
static void put_client_renew(struct nfs4_client *clp)
{
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
- if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock))
+ if (!atomic_dec_and_lock(&clp->cl_rpc_users, &nn->client_lock))
return;
if (!is_client_expired(clp))
renew_client_locked(clp);
+ else
+ wake_up_all(&expiry_wq);
spin_unlock(&nn->client_lock);
}
@@ -694,7 +706,8 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
idr_preload(GFP_KERNEL);
spin_lock(&cl->cl_lock);
- new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 0, 0, GFP_NOWAIT);
+ /* Reserving 0 for start of file in nfsdfs "states" file: */
+ new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 1, 0, GFP_NOWAIT);
spin_unlock(&cl->cl_lock);
idr_preload_end();
if (new_id < 0)
@@ -1844,7 +1857,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
clp = kmem_cache_zalloc(client_slab, GFP_KERNEL);
if (clp == NULL)
return NULL;
- clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL);
+ xdr_netobj_dup(&clp->cl_name, &name, GFP_KERNEL);
if (clp->cl_name.data == NULL)
goto err_no_name;
clp->cl_ownerstr_hashtbl = kmalloc_array(OWNER_HASH_SIZE,
@@ -1854,10 +1867,9 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
goto err_no_hashtbl;
for (i = 0; i < OWNER_HASH_SIZE; i++)
INIT_LIST_HEAD(&clp->cl_ownerstr_hashtbl[i]);
- clp->cl_name.len = name.len;
INIT_LIST_HEAD(&clp->cl_sessions);
idr_init(&clp->cl_stateids);
- atomic_set(&clp->cl_refcount, 0);
+ atomic_set(&clp->cl_rpc_users, 0);
clp->cl_cb_state = NFSD4_CB_UNKNOWN;
INIT_LIST_HEAD(&clp->cl_idhash);
INIT_LIST_HEAD(&clp->cl_openowners);
@@ -1879,6 +1891,25 @@ err_no_name:
return NULL;
}
+static void __free_client(struct kref *k)
+{
+ struct nfsdfs_client *c = container_of(k, struct nfsdfs_client, cl_ref);
+ struct nfs4_client *clp = container_of(c, struct nfs4_client, cl_nfsdfs);
+
+ free_svc_cred(&clp->cl_cred);
+ kfree(clp->cl_ownerstr_hashtbl);
+ kfree(clp->cl_name.data);
+ kfree(clp->cl_nii_domain.data);
+ kfree(clp->cl_nii_name.data);
+ idr_destroy(&clp->cl_stateids);
+ kmem_cache_free(client_slab, clp);
+}
+
+static void drop_client(struct nfs4_client *clp)
+{
+ kref_put(&clp->cl_nfsdfs.cl_ref, __free_client);
+}
+
static void
free_client(struct nfs4_client *clp)
{
@@ -1891,11 +1922,12 @@ free_client(struct nfs4_client *clp)
free_session(ses);
}
rpc_destroy_wait_queue(&clp->cl_cb_waitq);
- free_svc_cred(&clp->cl_cred);
- kfree(clp->cl_ownerstr_hashtbl);
- kfree(clp->cl_name.data);
- idr_destroy(&clp->cl_stateids);
- kmem_cache_free(client_slab, clp);
+ if (clp->cl_nfsd_dentry) {
+ nfsd_client_rmdir(clp->cl_nfsd_dentry);
+ clp->cl_nfsd_dentry = NULL;
+ wake_up_all(&expiry_wq);
+ }
+ drop_client(clp);
}
/* must be called under the client_lock */
@@ -1936,7 +1968,7 @@ unhash_client(struct nfs4_client *clp)
static __be32 mark_client_expired_locked(struct nfs4_client *clp)
{
- if (atomic_read(&clp->cl_refcount))
+ if (atomic_read(&clp->cl_rpc_users))
return nfserr_jukebox;
unhash_client_locked(clp);
return nfs_ok;
@@ -1989,6 +2021,7 @@ __destroy_client(struct nfs4_client *clp)
if (clp->cl_cb_conn.cb_xprt)
svc_xprt_put(clp->cl_cb_conn.cb_xprt);
free_client(clp);
+ wake_up_all(&expiry_wq);
}
static void
@@ -2199,6 +2232,342 @@ find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
return s;
}
+static struct nfs4_client *get_nfsdfs_clp(struct inode *inode)
+{
+ struct nfsdfs_client *nc;
+ nc = get_nfsdfs_client(inode);
+ if (!nc)
+ return NULL;
+ return container_of(nc, struct nfs4_client, cl_nfsdfs);
+}
+
+static void seq_quote_mem(struct seq_file *m, char *data, int len)
+{
+ seq_printf(m, "\"");
+ seq_escape_mem_ascii(m, data, len);
+ seq_printf(m, "\"");
+}
+
+static int client_info_show(struct seq_file *m, void *v)
+{
+ struct inode *inode = m->private;
+ struct nfs4_client *clp;
+ u64 clid;
+
+ clp = get_nfsdfs_clp(inode);
+ if (!clp)
+ return -ENXIO;
+ memcpy(&clid, &clp->cl_clientid, sizeof(clid));
+ seq_printf(m, "clientid: 0x%llx\n", clid);
+ seq_printf(m, "address: \"%pISpc\"\n", (struct sockaddr *)&clp->cl_addr);
+ seq_printf(m, "name: ");
+ seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len);
+ seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion);
+ if (clp->cl_nii_domain.data) {
+ seq_printf(m, "Implementation domain: ");
+ seq_quote_mem(m, clp->cl_nii_domain.data,
+ clp->cl_nii_domain.len);
+ seq_printf(m, "\nImplementation name: ");
+ seq_quote_mem(m, clp->cl_nii_name.data, clp->cl_nii_name.len);
+ seq_printf(m, "\nImplementation time: [%ld, %ld]\n",
+ clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec);
+ }
+ drop_client(clp);
+
+ return 0;
+}
+
+static int client_info_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, client_info_show, inode);
+}
+
+static const struct file_operations client_info_fops = {
+ .open = client_info_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static void *states_start(struct seq_file *s, loff_t *pos)
+ __acquires(&clp->cl_lock)
+{
+ struct nfs4_client *clp = s->private;
+ unsigned long id = *pos;
+ void *ret;
+
+ spin_lock(&clp->cl_lock);
+ ret = idr_get_next_ul(&clp->cl_stateids, &id);
+ *pos = id;
+ return ret;
+}
+
+static void *states_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct nfs4_client *clp = s->private;
+ unsigned long id = *pos;
+ void *ret;
+
+ id = *pos;
+ id++;
+ ret = idr_get_next_ul(&clp->cl_stateids, &id);
+ *pos = id;
+ return ret;
+}
+
+static void states_stop(struct seq_file *s, void *v)
+ __releases(&clp->cl_lock)
+{
+ struct nfs4_client *clp = s->private;
+
+ spin_unlock(&clp->cl_lock);
+}
+
+static void nfs4_show_superblock(struct seq_file *s, struct file *f)
+{
+ struct inode *inode = file_inode(f);
+
+ seq_printf(s, "superblock: \"%02x:%02x:%ld\"",
+ MAJOR(inode->i_sb->s_dev),
+ MINOR(inode->i_sb->s_dev),
+ inode->i_ino);
+}
+
+static void nfs4_show_owner(struct seq_file *s, struct nfs4_stateowner *oo)
+{
+ seq_printf(s, "owner: ");
+ seq_quote_mem(s, oo->so_owner.data, oo->so_owner.len);
+}
+
+static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
+{
+ struct nfs4_ol_stateid *ols;
+ struct nfs4_file *nf;
+ struct file *file;
+ struct nfs4_stateowner *oo;
+ unsigned int access, deny;
+
+ if (st->sc_type != NFS4_OPEN_STID && st->sc_type != NFS4_LOCK_STID)
+ return 0; /* XXX: or SEQ_SKIP? */
+ ols = openlockstateid(st);
+ oo = ols->st_stateowner;
+ nf = st->sc_file;
+ file = find_any_file(nf);
+
+ seq_printf(s, "- 0x%16phN: { type: open, ", &st->sc_stateid);
+
+ access = bmap_to_share_mode(ols->st_access_bmap);
+ deny = bmap_to_share_mode(ols->st_deny_bmap);
+
+ seq_printf(s, "access: \%s\%s, ",
+ access & NFS4_SHARE_ACCESS_READ ? "r" : "-",
+ access & NFS4_SHARE_ACCESS_WRITE ? "w" : "-");
+ seq_printf(s, "deny: \%s\%s, ",
+ deny & NFS4_SHARE_ACCESS_READ ? "r" : "-",
+ deny & NFS4_SHARE_ACCESS_WRITE ? "w" : "-");
+
+ nfs4_show_superblock(s, file);
+ seq_printf(s, ", ");
+ nfs4_show_owner(s, oo);
+ seq_printf(s, " }\n");
+ fput(file);
+
+ return 0;
+}
+
+static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
+{
+ struct nfs4_ol_stateid *ols;
+ struct nfs4_file *nf;
+ struct file *file;
+ struct nfs4_stateowner *oo;
+
+ ols = openlockstateid(st);
+ oo = ols->st_stateowner;
+ nf = st->sc_file;
+ file = find_any_file(nf);
+
+ seq_printf(s, "- 0x%16phN: { type: lock, ", &st->sc_stateid);
+
+ /*
+ * Note: a lock stateid isn't really the same thing as a lock,
+ * it's the locking state held by one owner on a file, and there
+ * may be multiple (or no) lock ranges associated with it.
+ * (Same for the matter is true of open stateids.)
+ */
+
+ nfs4_show_superblock(s, file);
+ /* XXX: open stateid? */
+ seq_printf(s, ", ");
+ nfs4_show_owner(s, oo);
+ seq_printf(s, " }\n");
+ fput(file);
+
+ return 0;
+}
+
+static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
+{
+ struct nfs4_delegation *ds;
+ struct nfs4_file *nf;
+ struct file *file;
+
+ ds = delegstateid(st);
+ nf = st->sc_file;
+ file = nf->fi_deleg_file;
+
+ seq_printf(s, "- 0x%16phN: { type: deleg, ", &st->sc_stateid);
+
+ /* Kinda dead code as long as we only support read delegs: */
+ seq_printf(s, "access: %s, ",
+ ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w");
+
+ /* XXX: lease time, whether it's being recalled. */
+
+ nfs4_show_superblock(s, file);
+ seq_printf(s, " }\n");
+
+ return 0;
+}
+
+static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st)
+{
+ struct nfs4_layout_stateid *ls;
+ struct file *file;
+
+ ls = container_of(st, struct nfs4_layout_stateid, ls_stid);
+ file = ls->ls_file;
+
+ seq_printf(s, "- 0x%16phN: { type: layout, ", &st->sc_stateid);
+
+ /* XXX: What else would be useful? */
+
+ nfs4_show_superblock(s, file);
+ seq_printf(s, " }\n");
+
+ return 0;
+}
+
+static int states_show(struct seq_file *s, void *v)
+{
+ struct nfs4_stid *st = v;
+
+ switch (st->sc_type) {
+ case NFS4_OPEN_STID:
+ return nfs4_show_open(s, st);
+ case NFS4_LOCK_STID:
+ return nfs4_show_lock(s, st);
+ case NFS4_DELEG_STID:
+ return nfs4_show_deleg(s, st);
+ case NFS4_LAYOUT_STID:
+ return nfs4_show_layout(s, st);
+ default:
+ return 0; /* XXX: or SEQ_SKIP? */
+ }
+ /* XXX: copy stateids? */
+}
+
+static struct seq_operations states_seq_ops = {
+ .start = states_start,
+ .next = states_next,
+ .stop = states_stop,
+ .show = states_show
+};
+
+static int client_states_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *s;
+ struct nfs4_client *clp;
+ int ret;
+
+ clp = get_nfsdfs_clp(inode);
+ if (!clp)
+ return -ENXIO;
+
+ ret = seq_open(file, &states_seq_ops);
+ if (ret)
+ return ret;
+ s = file->private_data;
+ s->private = clp;
+ return 0;
+}
+
+static int client_opens_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *m = file->private_data;
+ struct nfs4_client *clp = m->private;
+
+ /* XXX: alternatively, we could get/drop in seq start/stop */
+ drop_client(clp);
+ return 0;
+}
+
+static const struct file_operations client_states_fops = {
+ .open = client_states_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = client_opens_release,
+};
+
+/*
+ * Normally we refuse to destroy clients that are in use, but here the
+ * administrator is telling us to just do it. We also want to wait
+ * so the caller has a guarantee that the client's locks are gone by
+ * the time the write returns:
+ */
+static void force_expire_client(struct nfs4_client *clp)
+{
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ bool already_expired;
+
+ spin_lock(&clp->cl_lock);
+ clp->cl_time = 0;
+ spin_unlock(&clp->cl_lock);
+
+ wait_event(expiry_wq, atomic_read(&clp->cl_rpc_users) == 0);
+ spin_lock(&nn->client_lock);
+ already_expired = list_empty(&clp->cl_lru);
+ if (!already_expired)
+ unhash_client_locked(clp);
+ spin_unlock(&nn->client_lock);
+
+ if (!already_expired)
+ expire_client(clp);
+ else
+ wait_event(expiry_wq, clp->cl_nfsd_dentry == NULL);
+}
+
+static ssize_t client_ctl_write(struct file *file, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ char *data;
+ struct nfs4_client *clp;
+
+ data = simple_transaction_get(file, buf, size);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+ if (size != 7 || 0 != memcmp(data, "expire\n", 7))
+ return -EINVAL;
+ clp = get_nfsdfs_clp(file_inode(file));
+ if (!clp)
+ return -ENXIO;
+ force_expire_client(clp);
+ drop_client(clp);
+ return 7;
+}
+
+static const struct file_operations client_ctl_fops = {
+ .write = client_ctl_write,
+ .release = simple_transaction_release,
+};
+
+static const struct tree_descr client_files[] = {
+ [0] = {"info", &client_info_fops, S_IRUSR},
+ [1] = {"states", &client_states_fops, S_IRUSR},
+ [2] = {"ctl", &client_ctl_fops, S_IRUSR|S_IWUSR},
+ [3] = {""},
+};
+
static struct nfs4_client *create_client(struct xdr_netobj name,
struct svc_rqst *rqstp, nfs4_verifier *verf)
{
@@ -2206,6 +2575,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
struct sockaddr *sa = svc_addr(rqstp);
int ret;
struct net *net = SVC_NET(rqstp);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
clp = alloc_client(name);
if (clp == NULL)
@@ -2216,13 +2586,22 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
free_client(clp);
return NULL;
}
+ gen_clid(clp, nn);
+ kref_init(&clp->cl_nfsdfs.cl_ref);
nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL);
clp->cl_time = get_seconds();
clear_bit(0, &clp->cl_cb_slot_busy);
copy_verf(clp, verf);
- rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
+ memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage));
clp->cl_cb_session = NULL;
clp->net = net;
+ clp->cl_nfsd_dentry = nfsd_client_mkdir(nn, &clp->cl_nfsdfs,
+ clp->cl_clientid.cl_id - nn->clientid_base,
+ client_files);
+ if (!clp->cl_nfsd_dentry) {
+ free_client(clp);
+ return NULL;
+ }
return clp;
}
@@ -2533,6 +2912,22 @@ static bool client_has_state(struct nfs4_client *clp)
|| !list_empty(&clp->async_copies);
}
+static __be32 copy_impl_id(struct nfs4_client *clp,
+ struct nfsd4_exchange_id *exid)
+{
+ if (!exid->nii_domain.data)
+ return 0;
+ xdr_netobj_dup(&clp->cl_nii_domain, &exid->nii_domain, GFP_KERNEL);
+ if (!clp->cl_nii_domain.data)
+ return nfserr_jukebox;
+ xdr_netobj_dup(&clp->cl_nii_name, &exid->nii_name, GFP_KERNEL);
+ if (!clp->cl_nii_name.data)
+ return nfserr_jukebox;
+ clp->cl_nii_time.tv_sec = exid->nii_time.tv_sec;
+ clp->cl_nii_time.tv_nsec = exid->nii_time.tv_nsec;
+ return 0;
+}
+
__be32
nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
@@ -2559,6 +2954,9 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
new = create_client(exid->clname, rqstp, &verf);
if (new == NULL)
return nfserr_jukebox;
+ status = copy_impl_id(new, exid);
+ if (status)
+ goto out_nolock;
switch (exid->spa_how) {
case SP4_MACH_CRED:
@@ -2667,7 +3065,6 @@ out_new:
new->cl_spo_must_allow.u.words[0] = exid->spo_must_allow[0];
new->cl_spo_must_allow.u.words[1] = exid->spo_must_allow[1];
- gen_clid(new, nn);
add_to_unconfirmed(new);
swap(new, conf);
out_copy:
@@ -3411,7 +3808,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
copy_clid(new, conf);
gen_confirm(new, nn);
} else /* case 4 (new client) or cases 2, 3 (client reboot): */
- gen_clid(new, nn);
+ ;
new->cl_minorversion = 0;
gen_callback(new, setclid, rqstp);
add_to_unconfirmed(new);
@@ -3632,12 +4029,11 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj
if (!sop)
return NULL;
- sop->so_owner.data = kmemdup(owner->data, owner->len, GFP_KERNEL);
+ xdr_netobj_dup(&sop->so_owner, owner, GFP_KERNEL);
if (!sop->so_owner.data) {
kmem_cache_free(slab, sop);
return NULL;
}
- sop->so_owner.len = owner->len;
INIT_LIST_HEAD(&sop->so_stateids);
sop->so_client = clp;
@@ -4092,7 +4488,7 @@ static __be32 lookup_clientid(clientid_t *clid,
spin_unlock(&nn->client_lock);
return nfserr_expired;
}
- atomic_inc(&found->cl_refcount);
+ atomic_inc(&found->cl_rpc_users);
spin_unlock(&nn->client_lock);
/* Cache the nfs4_client in cstate! */
@@ -5725,12 +6121,11 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
if (fl->fl_lmops == &nfsd_posix_mng_ops) {
lo = (struct nfs4_lockowner *) fl->fl_owner;
- deny->ld_owner.data = kmemdup(lo->lo_owner.so_owner.data,
- lo->lo_owner.so_owner.len, GFP_KERNEL);
+ xdr_netobj_dup(&deny->ld_owner, &lo->lo_owner.so_owner,
+ GFP_KERNEL);
if (!deny->ld_owner.data)
/* We just don't care that much */
goto nevermind;
- deny->ld_owner.len = lo->lo_owner.so_owner.len;
deny->ld_clientid = lo->lo_owner.so_client->cl_clientid;
} else {
nevermind:
@@ -6584,7 +6979,7 @@ nfs4_check_open_reclaim(clientid_t *clid,
static inline void
put_client(struct nfs4_client *clp)
{
- atomic_dec(&clp->cl_refcount);
+ atomic_dec(&clp->cl_rpc_users);
}
static struct nfs4_client *
@@ -6702,7 +7097,7 @@ nfsd_inject_add_lock_to_list(struct nfs4_ol_stateid *lst,
return;
lockdep_assert_held(&nn->client_lock);
- atomic_inc(&clp->cl_refcount);
+ atomic_inc(&clp->cl_rpc_users);
list_add(&lst->st_locks, collect);
}
@@ -6731,7 +7126,7 @@ static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max,
* Despite the fact that these functions deal
* with 64-bit integers for "count", we must
* ensure that it doesn't blow up the
- * clp->cl_refcount. Throw a warning if we
+ * clp->cl_rpc_users. Throw a warning if we
* start to approach INT_MAX here.
*/
WARN_ON_ONCE(count == (INT_MAX / 2));
@@ -6855,7 +7250,7 @@ nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max,
if (func) {
func(oop);
if (collect) {
- atomic_inc(&clp->cl_refcount);
+ atomic_inc(&clp->cl_rpc_users);
list_add(&oop->oo_perclient, collect);
}
}
@@ -6863,7 +7258,7 @@ nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max,
/*
* Despite the fact that these functions deal with
* 64-bit integers for "count", we must ensure that
- * it doesn't blow up the clp->cl_refcount. Throw a
+ * it doesn't blow up the clp->cl_rpc_users. Throw a
* warning if we start to approach INT_MAX here.
*/
WARN_ON_ONCE(count == (INT_MAX / 2));
@@ -6993,7 +7388,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max,
if (dp->dl_time != 0)
continue;
- atomic_inc(&clp->cl_refcount);
+ atomic_inc(&clp->cl_rpc_users);
WARN_ON(!unhash_delegation_locked(dp));
list_add(&dp->dl_recall_lru, victims);
}
@@ -7001,7 +7396,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max,
/*
* Despite the fact that these functions deal with
* 64-bit integers for "count", we must ensure that
- * it doesn't blow up the clp->cl_refcount. Throw a
+ * it doesn't blow up the clp->cl_rpc_users. Throw a
* warning if we start to approach INT_MAX here.
*/
WARN_ON_ONCE(count == (INT_MAX / 2));
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 52c4f6daa649..442811809f3d 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -269,19 +269,13 @@ static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
return ret;
}
-/*
- * We require the high 32 bits of 'seconds' to be 0, and
- * we ignore all 32 bits of 'nseconds'.
- */
static __be32
-nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec *tv)
+nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec64 *tv)
{
DECODE_HEAD;
- u64 sec;
READ_BUF(12);
- p = xdr_decode_hyper(p, &sec);
- tv->tv_sec = sec;
+ p = xdr_decode_hyper(p, &tv->tv_sec);
tv->tv_nsec = be32_to_cpup(p++);
if (tv->tv_nsec >= (u32)1000000000)
return nfserr_inval;
@@ -320,7 +314,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
struct iattr *iattr, struct nfs4_acl **acl,
struct xdr_netobj *label, int *umask)
{
- struct timespec ts;
int expected_len, len = 0;
u32 dummy32;
char *buf;
@@ -422,8 +415,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
switch (dummy32) {
case NFS4_SET_TO_CLIENT_TIME:
len += 12;
- status = nfsd4_decode_time(argp, &ts);
- iattr->ia_atime = timespec_to_timespec64(ts);
+ status = nfsd4_decode_time(argp, &iattr->ia_atime);
if (status)
return status;
iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET);
@@ -442,8 +434,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
switch (dummy32) {
case NFS4_SET_TO_CLIENT_TIME:
len += 12;
- status = nfsd4_decode_time(argp, &ts);
- iattr->ia_mtime = timespec_to_timespec64(ts);
+ status = nfsd4_decode_time(argp, &iattr->ia_mtime);
if (status)
return status;
iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET);
@@ -1398,7 +1389,6 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
goto xdr_error;
}
- /* Ignore Implementation ID */
READ_BUF(4); /* nfs_impl_id4 array length */
dummy = be32_to_cpup(p++);
@@ -1406,21 +1396,19 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
goto xdr_error;
if (dummy == 1) {
- /* nii_domain */
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy);
- p += XDR_QUADLEN(dummy);
+ status = nfsd4_decode_opaque(argp, &exid->nii_domain);
+ if (status)
+ goto xdr_error;
/* nii_name */
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy);
- p += XDR_QUADLEN(dummy);
+ status = nfsd4_decode_opaque(argp, &exid->nii_name);
+ if (status)
+ goto xdr_error;
/* nii_date */
- READ_BUF(12);
- p += 3;
+ status = nfsd4_decode_time(argp, &exid->nii_time);
+ if (status)
+ goto xdr_error;
}
DECODE_TAIL;
}
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index da52b594362a..26ad75ae2be0 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -9,6 +9,7 @@
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
*/
+#include <linux/sunrpc/svc_xprt.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sunrpc/addr.h>
@@ -35,48 +36,12 @@ struct nfsd_drc_bucket {
spinlock_t cache_lock;
};
-static struct nfsd_drc_bucket *drc_hashtbl;
-static struct kmem_cache *drc_slab;
-
-/* max number of entries allowed in the cache */
-static unsigned int max_drc_entries;
-
-/* number of significant bits in the hash value */
-static unsigned int maskbits;
-static unsigned int drc_hashsize;
-
-/*
- * Stats and other tracking of on the duplicate reply cache. All of these and
- * the "rc" fields in nfsdstats are protected by the cache_lock
- */
-
-/* total number of entries */
-static atomic_t num_drc_entries;
-
-/* cache misses due only to checksum comparison failures */
-static unsigned int payload_misses;
-
-/* amount of memory (in bytes) currently consumed by the DRC */
-static unsigned int drc_mem_usage;
-
-/* longest hash chain seen */
-static unsigned int longest_chain;
-
-/* size of cache when we saw the longest hash chain */
-static unsigned int longest_chain_cachesize;
-
static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
static unsigned long nfsd_reply_cache_count(struct shrinker *shrink,
struct shrink_control *sc);
static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink,
struct shrink_control *sc);
-static struct shrinker nfsd_reply_cache_shrinker = {
- .scan_objects = nfsd_reply_cache_scan,
- .count_objects = nfsd_reply_cache_count,
- .seeks = 1,
-};
-
/*
* Put a cap on the size of the DRC based on the amount of available
* low memory in the machine.
@@ -94,6 +59,9 @@ static struct shrinker nfsd_reply_cache_shrinker = {
* ...with a hard cap of 256k entries. In the worst case, each entry will be
* ~1k, so the above numbers should give a rough max of the amount of memory
* used in k.
+ *
+ * XXX: these limits are per-container, so memory used will increase
+ * linearly with number of containers. Maybe that's OK.
*/
static unsigned int
nfsd_cache_size_limit(void)
@@ -116,17 +84,18 @@ nfsd_hashsize(unsigned int limit)
}
static u32
-nfsd_cache_hash(__be32 xid)
+nfsd_cache_hash(__be32 xid, struct nfsd_net *nn)
{
- return hash_32(be32_to_cpu(xid), maskbits);
+ return hash_32(be32_to_cpu(xid), nn->maskbits);
}
static struct svc_cacherep *
-nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum)
+nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum,
+ struct nfsd_net *nn)
{
struct svc_cacherep *rp;
- rp = kmem_cache_alloc(drc_slab, GFP_KERNEL);
+ rp = kmem_cache_alloc(nn->drc_slab, GFP_KERNEL);
if (rp) {
rp->c_state = RC_UNUSED;
rp->c_type = RC_NOCACHE;
@@ -147,91 +116,101 @@ nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum)
}
static void
-nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
+nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
+ struct nfsd_net *nn)
{
if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) {
- drc_mem_usage -= rp->c_replvec.iov_len;
+ nn->drc_mem_usage -= rp->c_replvec.iov_len;
kfree(rp->c_replvec.iov_base);
}
if (rp->c_state != RC_UNUSED) {
rb_erase(&rp->c_node, &b->rb_head);
list_del(&rp->c_lru);
- atomic_dec(&num_drc_entries);
- drc_mem_usage -= sizeof(*rp);
+ atomic_dec(&nn->num_drc_entries);
+ nn->drc_mem_usage -= sizeof(*rp);
}
- kmem_cache_free(drc_slab, rp);
+ kmem_cache_free(nn->drc_slab, rp);
}
static void
-nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
+nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
+ struct nfsd_net *nn)
{
spin_lock(&b->cache_lock);
- nfsd_reply_cache_free_locked(b, rp);
+ nfsd_reply_cache_free_locked(b, rp, nn);
spin_unlock(&b->cache_lock);
}
-int nfsd_reply_cache_init(void)
+int nfsd_reply_cache_init(struct nfsd_net *nn)
{
unsigned int hashsize;
unsigned int i;
int status = 0;
- max_drc_entries = nfsd_cache_size_limit();
- atomic_set(&num_drc_entries, 0);
- hashsize = nfsd_hashsize(max_drc_entries);
- maskbits = ilog2(hashsize);
+ nn->max_drc_entries = nfsd_cache_size_limit();
+ atomic_set(&nn->num_drc_entries, 0);
+ hashsize = nfsd_hashsize(nn->max_drc_entries);
+ nn->maskbits = ilog2(hashsize);
- status = register_shrinker(&nfsd_reply_cache_shrinker);
+ nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan;
+ nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count;
+ nn->nfsd_reply_cache_shrinker.seeks = 1;
+ status = register_shrinker(&nn->nfsd_reply_cache_shrinker);
if (status)
- return status;
-
- drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),
- 0, 0, NULL);
- if (!drc_slab)
goto out_nomem;
- drc_hashtbl = kcalloc(hashsize, sizeof(*drc_hashtbl), GFP_KERNEL);
- if (!drc_hashtbl) {
- drc_hashtbl = vzalloc(array_size(hashsize,
- sizeof(*drc_hashtbl)));
- if (!drc_hashtbl)
- goto out_nomem;
+ nn->drc_slab = kmem_cache_create("nfsd_drc",
+ sizeof(struct svc_cacherep), 0, 0, NULL);
+ if (!nn->drc_slab)
+ goto out_shrinker;
+
+ nn->drc_hashtbl = kcalloc(hashsize,
+ sizeof(*nn->drc_hashtbl), GFP_KERNEL);
+ if (!nn->drc_hashtbl) {
+ nn->drc_hashtbl = vzalloc(array_size(hashsize,
+ sizeof(*nn->drc_hashtbl)));
+ if (!nn->drc_hashtbl)
+ goto out_slab;
}
for (i = 0; i < hashsize; i++) {
- INIT_LIST_HEAD(&drc_hashtbl[i].lru_head);
- spin_lock_init(&drc_hashtbl[i].cache_lock);
+ INIT_LIST_HEAD(&nn->drc_hashtbl[i].lru_head);
+ spin_lock_init(&nn->drc_hashtbl[i].cache_lock);
}
- drc_hashsize = hashsize;
+ nn->drc_hashsize = hashsize;
return 0;
+out_slab:
+ kmem_cache_destroy(nn->drc_slab);
+out_shrinker:
+ unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
out_nomem:
printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
- nfsd_reply_cache_shutdown();
return -ENOMEM;
}
-void nfsd_reply_cache_shutdown(void)
+void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
{
struct svc_cacherep *rp;
unsigned int i;
- unregister_shrinker(&nfsd_reply_cache_shrinker);
+ unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
- for (i = 0; i < drc_hashsize; i++) {
- struct list_head *head = &drc_hashtbl[i].lru_head;
+ for (i = 0; i < nn->drc_hashsize; i++) {
+ struct list_head *head = &nn->drc_hashtbl[i].lru_head;
while (!list_empty(head)) {
rp = list_first_entry(head, struct svc_cacherep, c_lru);
- nfsd_reply_cache_free_locked(&drc_hashtbl[i], rp);
+ nfsd_reply_cache_free_locked(&nn->drc_hashtbl[i],
+ rp, nn);
}
}
- kvfree(drc_hashtbl);
- drc_hashtbl = NULL;
- drc_hashsize = 0;
+ kvfree(nn->drc_hashtbl);
+ nn->drc_hashtbl = NULL;
+ nn->drc_hashsize = 0;
- kmem_cache_destroy(drc_slab);
- drc_slab = NULL;
+ kmem_cache_destroy(nn->drc_slab);
+ nn->drc_slab = NULL;
}
/*
@@ -246,7 +225,7 @@ lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
}
static long
-prune_bucket(struct nfsd_drc_bucket *b)
+prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn)
{
struct svc_cacherep *rp, *tmp;
long freed = 0;
@@ -258,10 +237,10 @@ prune_bucket(struct nfsd_drc_bucket *b)
*/
if (rp->c_state == RC_INPROG)
continue;
- if (atomic_read(&num_drc_entries) <= max_drc_entries &&
+ if (atomic_read(&nn->num_drc_entries) <= nn->max_drc_entries &&
time_before(jiffies, rp->c_timestamp + RC_EXPIRE))
break;
- nfsd_reply_cache_free_locked(b, rp);
+ nfsd_reply_cache_free_locked(b, rp, nn);
freed++;
}
return freed;
@@ -272,18 +251,18 @@ prune_bucket(struct nfsd_drc_bucket *b)
* Also prune the oldest ones when the total exceeds the max number of entries.
*/
static long
-prune_cache_entries(void)
+prune_cache_entries(struct nfsd_net *nn)
{
unsigned int i;
long freed = 0;
- for (i = 0; i < drc_hashsize; i++) {
- struct nfsd_drc_bucket *b = &drc_hashtbl[i];
+ for (i = 0; i < nn->drc_hashsize; i++) {
+ struct nfsd_drc_bucket *b = &nn->drc_hashtbl[i];
if (list_empty(&b->lru_head))
continue;
spin_lock(&b->cache_lock);
- freed += prune_bucket(b);
+ freed += prune_bucket(b, nn);
spin_unlock(&b->cache_lock);
}
return freed;
@@ -292,13 +271,19 @@ prune_cache_entries(void)
static unsigned long
nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
{
- return atomic_read(&num_drc_entries);
+ struct nfsd_net *nn = container_of(shrink,
+ struct nfsd_net, nfsd_reply_cache_shrinker);
+
+ return atomic_read(&nn->num_drc_entries);
}
static unsigned long
nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
{
- return prune_cache_entries();
+ struct nfsd_net *nn = container_of(shrink,
+ struct nfsd_net, nfsd_reply_cache_shrinker);
+
+ return prune_cache_entries(nn);
}
/*
* Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
@@ -334,11 +319,12 @@ nfsd_cache_csum(struct svc_rqst *rqstp)
}
static int
-nfsd_cache_key_cmp(const struct svc_cacherep *key, const struct svc_cacherep *rp)
+nfsd_cache_key_cmp(const struct svc_cacherep *key,
+ const struct svc_cacherep *rp, struct nfsd_net *nn)
{
if (key->c_key.k_xid == rp->c_key.k_xid &&
key->c_key.k_csum != rp->c_key.k_csum)
- ++payload_misses;
+ ++nn->payload_misses;
return memcmp(&key->c_key, &rp->c_key, sizeof(key->c_key));
}
@@ -349,7 +335,8 @@ nfsd_cache_key_cmp(const struct svc_cacherep *key, const struct svc_cacherep *rp
* inserts an empty key on failure.
*/
static struct svc_cacherep *
-nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key)
+nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key,
+ struct nfsd_net *nn)
{
struct svc_cacherep *rp, *ret = key;
struct rb_node **p = &b->rb_head.rb_node,
@@ -362,7 +349,7 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key)
parent = *p;
rp = rb_entry(parent, struct svc_cacherep, c_node);
- cmp = nfsd_cache_key_cmp(key, rp);
+ cmp = nfsd_cache_key_cmp(key, rp, nn);
if (cmp < 0)
p = &parent->rb_left;
else if (cmp > 0)
@@ -376,14 +363,14 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key)
rb_insert_color(&key->c_node, &b->rb_head);
out:
/* tally hash chain length stats */
- if (entries > longest_chain) {
- longest_chain = entries;
- longest_chain_cachesize = atomic_read(&num_drc_entries);
- } else if (entries == longest_chain) {
+ if (entries > nn->longest_chain) {
+ nn->longest_chain = entries;
+ nn->longest_chain_cachesize = atomic_read(&nn->num_drc_entries);
+ } else if (entries == nn->longest_chain) {
/* prefer to keep the smallest cachesize possible here */
- longest_chain_cachesize = min_t(unsigned int,
- longest_chain_cachesize,
- atomic_read(&num_drc_entries));
+ nn->longest_chain_cachesize = min_t(unsigned int,
+ nn->longest_chain_cachesize,
+ atomic_read(&nn->num_drc_entries));
}
lru_put_end(b, ret);
@@ -400,11 +387,12 @@ out:
int
nfsd_cache_lookup(struct svc_rqst *rqstp)
{
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct svc_cacherep *rp, *found;
__be32 xid = rqstp->rq_xid;
__wsum csum;
- u32 hash = nfsd_cache_hash(xid);
- struct nfsd_drc_bucket *b = &drc_hashtbl[hash];
+ u32 hash = nfsd_cache_hash(xid, nn);
+ struct nfsd_drc_bucket *b = &nn->drc_hashtbl[hash];
int type = rqstp->rq_cachetype;
int rtn = RC_DOIT;
@@ -420,16 +408,16 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
* Since the common case is a cache miss followed by an insert,
* preallocate an entry.
*/
- rp = nfsd_reply_cache_alloc(rqstp, csum);
+ rp = nfsd_reply_cache_alloc(rqstp, csum, nn);
if (!rp) {
dprintk("nfsd: unable to allocate DRC entry!\n");
return rtn;
}
spin_lock(&b->cache_lock);
- found = nfsd_cache_insert(b, rp);
+ found = nfsd_cache_insert(b, rp, nn);
if (found != rp) {
- nfsd_reply_cache_free_locked(NULL, rp);
+ nfsd_reply_cache_free_locked(NULL, rp, nn);
rp = found;
goto found_entry;
}
@@ -438,11 +426,11 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
rqstp->rq_cacherep = rp;
rp->c_state = RC_INPROG;
- atomic_inc(&num_drc_entries);
- drc_mem_usage += sizeof(*rp);
+ atomic_inc(&nn->num_drc_entries);
+ nn->drc_mem_usage += sizeof(*rp);
/* go ahead and prune the cache */
- prune_bucket(b);
+ prune_bucket(b, nn);
out:
spin_unlock(&b->cache_lock);
return rtn;
@@ -477,7 +465,7 @@ found_entry:
break;
default:
printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type);
- nfsd_reply_cache_free_locked(b, rp);
+ nfsd_reply_cache_free_locked(b, rp, nn);
}
goto out;
@@ -502,6 +490,7 @@ found_entry:
void
nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
{
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct svc_cacherep *rp = rqstp->rq_cacherep;
struct kvec *resv = &rqstp->rq_res.head[0], *cachv;
u32 hash;
@@ -512,15 +501,15 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
if (!rp)
return;
- hash = nfsd_cache_hash(rp->c_key.k_xid);
- b = &drc_hashtbl[hash];
+ hash = nfsd_cache_hash(rp->c_key.k_xid, nn);
+ b = &nn->drc_hashtbl[hash];
len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
len >>= 2;
/* Don't cache excessive amounts of data and XDR failures */
if (!statp || len > (256 >> 2)) {
- nfsd_reply_cache_free(b, rp);
+ nfsd_reply_cache_free(b, rp, nn);
return;
}
@@ -535,18 +524,18 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
bufsize = len << 2;
cachv->iov_base = kmalloc(bufsize, GFP_KERNEL);
if (!cachv->iov_base) {
- nfsd_reply_cache_free(b, rp);
+ nfsd_reply_cache_free(b, rp, nn);
return;
}
cachv->iov_len = bufsize;
memcpy(cachv->iov_base, statp, bufsize);
break;
case RC_NOCACHE:
- nfsd_reply_cache_free(b, rp);
+ nfsd_reply_cache_free(b, rp, nn);
return;
}
spin_lock(&b->cache_lock);
- drc_mem_usage += bufsize;
+ nn->drc_mem_usage += bufsize;
lru_put_end(b, rp);
rp->c_secure = test_bit(RQ_SECURE, &rqstp->rq_flags);
rp->c_type = cachetype;
@@ -582,21 +571,26 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
*/
static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
{
- seq_printf(m, "max entries: %u\n", max_drc_entries);
+ struct nfsd_net *nn = v;
+
+ seq_printf(m, "max entries: %u\n", nn->max_drc_entries);
seq_printf(m, "num entries: %u\n",
- atomic_read(&num_drc_entries));
- seq_printf(m, "hash buckets: %u\n", 1 << maskbits);
- seq_printf(m, "mem usage: %u\n", drc_mem_usage);
+ atomic_read(&nn->num_drc_entries));
+ seq_printf(m, "hash buckets: %u\n", 1 << nn->maskbits);
+ seq_printf(m, "mem usage: %u\n", nn->drc_mem_usage);
seq_printf(m, "cache hits: %u\n", nfsdstats.rchits);
seq_printf(m, "cache misses: %u\n", nfsdstats.rcmisses);
seq_printf(m, "not cached: %u\n", nfsdstats.rcnocache);
- seq_printf(m, "payload misses: %u\n", payload_misses);
- seq_printf(m, "longest chain len: %u\n", longest_chain);
- seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize);
+ seq_printf(m, "payload misses: %u\n", nn->payload_misses);
+ seq_printf(m, "longest chain len: %u\n", nn->longest_chain);
+ seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize);
return 0;
}
int nfsd_reply_cache_stats_open(struct inode *inode, struct file *file)
{
- return single_open(file, nfsd_reply_cache_stats_show, NULL);
+ struct nfsd_net *nn = net_generic(file_inode(file)->i_sb->s_fs_info,
+ nfsd_net_id);
+
+ return single_open(file, nfsd_reply_cache_stats_show, nn);
}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 62c58cfeb8d8..0a9a49ded546 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -16,6 +16,7 @@
#include <linux/sunrpc/gss_krb5_enctypes.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/module.h>
+#include <linux/fsnotify.h>
#include "idmap.h"
#include "nfsd.h"
@@ -53,6 +54,7 @@ enum {
NFSD_RecoveryDir,
NFSD_V4EndGrace,
#endif
+ NFSD_MaxReserved
};
/*
@@ -1147,8 +1149,201 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
* populating the filesystem.
*/
+/* Basically copying rpc_get_inode. */
+static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode)
+{
+ struct inode *inode = new_inode(sb);
+ if (!inode)
+ return NULL;
+ /* Following advice from simple_fill_super documentation: */
+ inode->i_ino = iunique(sb, NFSD_MaxReserved);
+ inode->i_mode = mode;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ switch (mode & S_IFMT) {
+ case S_IFDIR:
+ inode->i_fop = &simple_dir_operations;
+ inode->i_op = &simple_dir_inode_operations;
+ inc_nlink(inode);
+ default:
+ break;
+ }
+ return inode;
+}
+
+static int __nfsd_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ struct inode *inode;
+
+ inode = nfsd_get_inode(dir->i_sb, mode);
+ if (!inode)
+ return -ENOMEM;
+ d_add(dentry, inode);
+ inc_nlink(dir);
+ fsnotify_mkdir(dir, dentry);
+ return 0;
+}
+
+static struct dentry *nfsd_mkdir(struct dentry *parent, struct nfsdfs_client *ncl, char *name)
+{
+ struct inode *dir = parent->d_inode;
+ struct dentry *dentry;
+ int ret = -ENOMEM;
+
+ inode_lock(dir);
+ dentry = d_alloc_name(parent, name);
+ if (!dentry)
+ goto out_err;
+ ret = __nfsd_mkdir(d_inode(parent), dentry, S_IFDIR | 0600);
+ if (ret)
+ goto out_err;
+ if (ncl) {
+ d_inode(dentry)->i_private = ncl;
+ kref_get(&ncl->cl_ref);
+ }
+out:
+ inode_unlock(dir);
+ return dentry;
+out_err:
+ dentry = ERR_PTR(ret);
+ goto out;
+}
+
+static void clear_ncl(struct inode *inode)
+{
+ struct nfsdfs_client *ncl = inode->i_private;
+
+ inode->i_private = NULL;
+ synchronize_rcu();
+ kref_put(&ncl->cl_ref, ncl->cl_release);
+}
+
+
+static struct nfsdfs_client *__get_nfsdfs_client(struct inode *inode)
+{
+ struct nfsdfs_client *nc = inode->i_private;
+
+ if (nc)
+ kref_get(&nc->cl_ref);
+ return nc;
+}
+
+struct nfsdfs_client *get_nfsdfs_client(struct inode *inode)
+{
+ struct nfsdfs_client *nc;
+
+ rcu_read_lock();
+ nc = __get_nfsdfs_client(inode);
+ rcu_read_unlock();
+ return nc;
+}
+/* from __rpc_unlink */
+static void nfsdfs_remove_file(struct inode *dir, struct dentry *dentry)
+{
+ int ret;
+
+ clear_ncl(d_inode(dentry));
+ dget(dentry);
+ ret = simple_unlink(dir, dentry);
+ d_delete(dentry);
+ dput(dentry);
+ WARN_ON_ONCE(ret);
+}
+
+static void nfsdfs_remove_files(struct dentry *root)
+{
+ struct dentry *dentry, *tmp;
+
+ list_for_each_entry_safe(dentry, tmp, &root->d_subdirs, d_child) {
+ if (!simple_positive(dentry)) {
+ WARN_ON_ONCE(1); /* I think this can't happen? */
+ continue;
+ }
+ nfsdfs_remove_file(d_inode(root), dentry);
+ }
+}
+
+/* XXX: cut'n'paste from simple_fill_super; figure out if we could share
+ * code instead. */
+static int nfsdfs_create_files(struct dentry *root,
+ const struct tree_descr *files)
+{
+ struct inode *dir = d_inode(root);
+ struct inode *inode;
+ struct dentry *dentry;
+ int i;
+
+ inode_lock(dir);
+ for (i = 0; files->name && files->name[0]; i++, files++) {
+ if (!files->name)
+ continue;
+ dentry = d_alloc_name(root, files->name);
+ if (!dentry)
+ goto out;
+ inode = nfsd_get_inode(d_inode(root)->i_sb,
+ S_IFREG | files->mode);
+ if (!inode) {
+ dput(dentry);
+ goto out;
+ }
+ inode->i_fop = files->ops;
+ inode->i_private = __get_nfsdfs_client(dir);
+ d_add(dentry, inode);
+ fsnotify_create(dir, dentry);
+ }
+ inode_unlock(dir);
+ return 0;
+out:
+ nfsdfs_remove_files(root);
+ inode_unlock(dir);
+ return -ENOMEM;
+}
+
+/* on success, returns positive number unique to that client. */
+struct dentry *nfsd_client_mkdir(struct nfsd_net *nn,
+ struct nfsdfs_client *ncl, u32 id,
+ const struct tree_descr *files)
+{
+ struct dentry *dentry;
+ char name[11];
+ int ret;
+
+ sprintf(name, "%u", id);
+
+ dentry = nfsd_mkdir(nn->nfsd_client_dir, ncl, name);
+ if (IS_ERR(dentry)) /* XXX: tossing errors? */
+ return NULL;
+ ret = nfsdfs_create_files(dentry, files);
+ if (ret) {
+ nfsd_client_rmdir(dentry);
+ return NULL;
+ }
+ return dentry;
+}
+
+/* Taken from __rpc_rmdir: */
+void nfsd_client_rmdir(struct dentry *dentry)
+{
+ struct inode *dir = d_inode(dentry->d_parent);
+ struct inode *inode = d_inode(dentry);
+ int ret;
+
+ inode_lock(dir);
+ nfsdfs_remove_files(dentry);
+ clear_ncl(inode);
+ dget(dentry);
+ ret = simple_rmdir(dir, dentry);
+ WARN_ON_ONCE(ret);
+ d_delete(dentry);
+ inode_unlock(dir);
+}
+
static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
{
+ struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
+ nfsd_net_id);
+ struct dentry *dentry;
+ int ret;
+
static const struct tree_descr nfsd_files[] = {
[NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO},
[NFSD_Export_features] = {"export_features",
@@ -1178,7 +1373,15 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
/* last one */ {""}
};
get_net(sb->s_fs_info);
- return simple_fill_super(sb, 0x6e667364, nfsd_files);
+ ret = simple_fill_super(sb, 0x6e667364, nfsd_files);
+ if (ret)
+ return ret;
+ dentry = nfsd_mkdir(sb->s_root, NULL, "clients");
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+ nn->nfsd_client_dir = dentry;
+ return 0;
+
}
static struct dentry *nfsd_mount(struct file_system_type *fs_type,
@@ -1232,6 +1435,7 @@ unsigned int nfsd_net_id;
static __net_init int nfsd_init_net(struct net *net)
{
int retval;
+ struct vfsmount *mnt;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
retval = nfsd_export_init(net);
@@ -1242,18 +1446,33 @@ static __net_init int nfsd_init_net(struct net *net)
goto out_idmap_error;
nn->nfsd_versions = NULL;
nn->nfsd4_minorversions = NULL;
+ retval = nfsd_reply_cache_init(nn);
+ if (retval)
+ goto out_drc_error;
nn->nfsd4_lease = 90; /* default lease time */
nn->nfsd4_grace = 90;
nn->somebody_reclaimed = false;
nn->track_reclaim_completes = false;
nn->clverifier_counter = prandom_u32();
- nn->clientid_counter = prandom_u32();
+ nn->clientid_base = prandom_u32();
+ nn->clientid_counter = nn->clientid_base + 1;
nn->s2s_cp_cl_id = nn->clientid_counter++;
atomic_set(&nn->ntf_refcnt, 0);
init_waitqueue_head(&nn->ntf_wq);
+
+ mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
+ if (IS_ERR(mnt)) {
+ retval = PTR_ERR(mnt);
+ goto out_mount_err;
+ }
+ nn->nfsd_mnt = mnt;
return 0;
+out_mount_err:
+ nfsd_reply_cache_shutdown(nn);
+out_drc_error:
+ nfsd_idmap_shutdown(net);
out_idmap_error:
nfsd_export_shutdown(net);
out_export_error:
@@ -1262,6 +1481,10 @@ out_export_error:
static __net_exit void nfsd_exit_net(struct net *net)
{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ mntput(nn->nfsd_mnt);
+ nfsd_reply_cache_shutdown(nn);
nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net);
nfsd_netns_free_versions(net_generic(net, nfsd_net_id));
@@ -1291,13 +1514,8 @@ static int __init init_nfsd(void)
retval = nfsd4_init_pnfs();
if (retval)
goto out_free_slabs;
- retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */
- if (retval)
- goto out_exit_pnfs;
+ nfsd_fault_inject_init(); /* nfsd fault injection controls */
nfsd_stat_init(); /* Statistics */
- retval = nfsd_reply_cache_init();
- if (retval)
- goto out_free_stat;
nfsd_lockd_init(); /* lockd->nfsd callbacks */
retval = create_proc_exports_entry();
if (retval)
@@ -1311,11 +1529,8 @@ out_free_all:
remove_proc_entry("fs/nfs", NULL);
out_free_lockd:
nfsd_lockd_shutdown();
- nfsd_reply_cache_shutdown();
-out_free_stat:
nfsd_stat_shutdown();
nfsd_fault_inject_cleanup();
-out_exit_pnfs:
nfsd4_exit_pnfs();
out_free_slabs:
nfsd4_free_slabs();
@@ -1328,7 +1543,6 @@ out_unregister_pernet:
static void __exit exit_nfsd(void)
{
- nfsd_reply_cache_shutdown();
remove_proc_entry("fs/nfs/exports", NULL);
remove_proc_entry("fs/nfs", NULL);
nfsd_stat_shutdown();
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 24187b5dd638..af2947551e9c 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -22,6 +22,7 @@
#include <uapi/linux/nfsd/debug.h>
+#include "netns.h"
#include "stats.h"
#include "export.h"
@@ -86,6 +87,16 @@ int nfsd_pool_stats_release(struct inode *, struct file *);
void nfsd_destroy(struct net *net);
+struct nfsdfs_client {
+ struct kref cl_ref;
+ void (*cl_release)(struct kref *kref);
+};
+
+struct nfsdfs_client *get_nfsdfs_client(struct inode *);
+struct dentry *nfsd_client_mkdir(struct nfsd_net *nn,
+ struct nfsdfs_client *ncl, u32 id, const struct tree_descr *);
+void nfsd_client_rmdir(struct dentry *dentry);
+
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
#ifdef CONFIG_NFSD_V2_ACL
extern const struct svc_version nfsd_acl_version2;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 0b74d371ed67..5dbd16946e8e 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -39,6 +39,7 @@
#include <linux/refcount.h>
#include <linux/sunrpc/svc_xprt.h>
#include "nfsfh.h"
+#include "nfsd.h"
typedef struct {
u32 cl_boot;
@@ -316,6 +317,10 @@ struct nfs4_client {
clientid_t cl_clientid; /* generated by server */
nfs4_verifier cl_confirm; /* generated by server */
u32 cl_minorversion;
+ /* NFSv4.1 client implementation id: */
+ struct xdr_netobj cl_nii_domain;
+ struct xdr_netobj cl_nii_name;
+ struct timespec cl_nii_time;
/* for v4.0 and v4.1 callbacks: */
struct nfs4_cb_conn cl_cb_conn;
@@ -347,9 +352,13 @@ struct nfs4_client {
struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */
u32 cl_exchange_flags;
/* number of rpc's in progress over an associated session: */
- atomic_t cl_refcount;
+ atomic_t cl_rpc_users;
+ struct nfsdfs_client cl_nfsdfs;
struct nfs4_op_map cl_spo_must_allow;
+ /* debugging info directory under nfsd/clients/ : */
+ struct dentry *cl_nfsd_dentry;
+
/* for nfs41 callbacks */
/* We currently support a single back channel with a single slot */
unsigned long cl_cb_slot_busy;
@@ -663,7 +672,7 @@ extern void nfsd4_record_grace_done(struct nfsd_net *nn);
/* nfs fault injection functions */
#ifdef CONFIG_NFSD_FAULT_INJECTION
-int nfsd_fault_inject_init(void);
+void nfsd_fault_inject_init(void);
void nfsd_fault_inject_cleanup(void);
u64 nfsd_inject_print_clients(void);
@@ -684,7 +693,7 @@ u64 nfsd_inject_forget_delegations(u64);
u64 nfsd_inject_recall_client_delegations(struct sockaddr_storage *, size_t);
u64 nfsd_inject_recall_delegations(u64);
#else /* CONFIG_NFSD_FAULT_INJECTION */
-static inline int nfsd_fault_inject_init(void) { return 0; }
+static inline void nfsd_fault_inject_init(void) {}
static inline void nfsd_fault_inject_cleanup(void) {}
#endif /* CONFIG_NFSD_FAULT_INJECTION */
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index fc24ee47eab5..c85783e536d5 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -404,7 +404,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
/*
* If utimes(2) and friends are called with times not NULL, we should
* not set NFSD_MAY_WRITE bit. Otherwise fh_verify->nfsd_permission
- * will return EACCESS, when the caller's effective UID does not match
+ * will return EACCES, when the caller's effective UID does not match
* the owner of the file, and the caller is not privileged. In this
* situation, we should return EPERM(notify_change will return this).
*/
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index feeb6d4bdffd..d64c870f998a 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -410,6 +410,9 @@ struct nfsd4_exchange_id {
int spa_how;
u32 spo_must_enforce[3];
u32 spo_must_allow[3];
+ struct xdr_netobj nii_domain;
+ struct xdr_netobj nii_name;
+ struct timespec64 nii_time;
};
struct nfsd4_sequence {
@@ -472,7 +475,7 @@ struct nfsd4_layoutcommit {
u32 lc_reclaim; /* request */
u32 lc_newoffset; /* request */
u64 lc_last_wr; /* request */
- struct timespec lc_mtime; /* request */
+ struct timespec64 lc_mtime; /* request */
u32 lc_layout_type; /* request */
u32 lc_up_len; /* layout length */
void *lc_up_layout; /* decoded by callback */
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 9b96d79eea6c..91b9dac6b2cc 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -148,13 +148,8 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
oldflags = NILFS_I(inode)->i_flags;
- /*
- * The IMMUTABLE and APPEND_ONLY flags can only be changed by the
- * relevant capability.
- */
- ret = -EPERM;
- if (((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) &&
- !capable(CAP_LINUX_IMMUTABLE))
+ ret = vfs_ioc_setflags_prepare(inode, oldflags, flags);
+ if (ret)
goto out;
ret = nilfs_transaction_begin(inode->i_sb, &ti, 0);
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index b428c295d13f..5778d1347b35 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -288,10 +288,13 @@ struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
/*
* For queues with unlimited length lost events are not expected and
* can possibly have security implications. Avoid losing events when
- * memory is short.
+ * memory is short. For the limited size queues, avoid OOM killer in the
+ * target monitoring memcg as it may have security repercussion.
*/
if (group->max_events == UINT_MAX)
gfp |= __GFP_NOFAIL;
+ else
+ gfp |= __GFP_RETRY_MAYFAIL;
/* Whoever is interested in the event, pays for the allocation. */
memalloc_use_memcg(group->memcg);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index a90bb19dcfa2..91006f47e420 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -920,6 +920,22 @@ static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid)
return 0;
}
+static int fanotify_events_supported(struct path *path, __u64 mask)
+{
+ /*
+ * Some filesystems such as 'proc' acquire unusual locks when opening
+ * files. For them fanotify permission events have high chances of
+ * deadlocking the system - open done when reporting fanotify event
+ * blocks on this "unusual" lock while another process holding the lock
+ * waits for fanotify permission event to be answered. Just disallow
+ * permission events for such filesystems.
+ */
+ if (mask & FANOTIFY_PERM_EVENTS &&
+ path->mnt->mnt_sb->s_type->fs_flags & FS_DISALLOW_NOTIFY_PERM)
+ return -EINVAL;
+ return 0;
+}
+
static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
int dfd, const char __user *pathname)
{
@@ -1018,6 +1034,12 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
if (ret)
goto fput_and_out;
+ if (flags & FAN_MARK_ADD) {
+ ret = fanotify_events_supported(&path, mask);
+ if (ret)
+ goto path_put_and_out;
+ }
+
if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
ret = fanotify_test_fid(&path, &__fsid);
if (ret)
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 4eb2ebfac468..2ecef6155fc0 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -95,47 +95,6 @@ void fsnotify_sb_delete(struct super_block *sb)
}
/*
- * fsnotify_nameremove - a filename was removed from a directory
- *
- * This is mostly called under parent vfs inode lock so name and
- * dentry->d_parent should be stable. However there are some corner cases where
- * inode lock is not held. So to be on the safe side and be reselient to future
- * callers and out of tree users of d_delete(), we do not assume that d_parent
- * and d_name are stable and we use dget_parent() and
- * take_dentry_name_snapshot() to grab stable references.
- */
-void fsnotify_nameremove(struct dentry *dentry, int isdir)
-{
- struct dentry *parent;
- struct name_snapshot name;
- __u32 mask = FS_DELETE;
-
- /* d_delete() of pseudo inode? (e.g. __ns_get_path() playing tricks) */
- if (IS_ROOT(dentry))
- return;
-
- if (isdir)
- mask |= FS_ISDIR;
-
- parent = dget_parent(dentry);
- /* Avoid unneeded take_dentry_name_snapshot() */
- if (!(d_inode(parent)->i_fsnotify_mask & FS_DELETE) &&
- !(dentry->d_sb->s_fsnotify_mask & FS_DELETE))
- goto out_dput;
-
- take_dentry_name_snapshot(&name, dentry);
-
- fsnotify(d_inode(parent), mask, d_inode(dentry), FSNOTIFY_EVENT_INODE,
- &name.name, 0);
-
- release_dentry_name_snapshot(&name);
-
-out_dput:
- dput(parent);
-}
-EXPORT_SYMBOL(fsnotify_nameremove);
-
-/*
* Given an inode, first check if we care what happens to our children. Inotify
* and dnotify both tell their parents about events. If we care about any event
* on a child we run all of our children and set a dentry flag saying that the
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 2fda08b2b885..d510223d302c 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -90,9 +90,13 @@ int inotify_handle_event(struct fsnotify_group *group,
i_mark = container_of(inode_mark, struct inotify_inode_mark,
fsn_mark);
- /* Whoever is interested in the event, pays for the allocation. */
+ /*
+ * Whoever is interested in the event, pays for the allocation. Do not
+ * trigger OOM killer in the target monitoring memcg as it may have
+ * security repercussion.
+ */
memalloc_use_memcg(group->memcg);
- event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT);
+ event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
memalloc_unuse_memcg();
if (unlikely(!event)) {
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index d1348fc4ca6d..0c335b51043d 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6191,17 +6191,17 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
if (le16_to_cpu(tl->tl_used)) {
trace_ocfs2_truncate_log_recovery_num(le16_to_cpu(tl->tl_used));
- *tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL);
+ /*
+ * Assuming the write-out below goes well, this copy will be
+ * passed back to recovery for processing.
+ */
+ *tl_copy = kmemdup(tl_bh->b_data, tl_bh->b_size, GFP_KERNEL);
if (!(*tl_copy)) {
status = -ENOMEM;
mlog_errno(status);
goto bail;
}
- /* Assuming the write-out below goes well, this copy
- * will be passed back to recovery for processing. */
- memcpy(*tl_copy, tl_bh->b_data, tl_bh->b_size);
-
/* All we need to do to clear the truncate log is set
* tl_used. */
tl->tl_used = 0;
diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c
index 005b813a56b6..429e6a8359a5 100644
--- a/fs/ocfs2/blockcheck.c
+++ b/fs/ocfs2/blockcheck.c
@@ -242,57 +242,29 @@ static struct dentry *blockcheck_debugfs_create(const char *name,
static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
{
if (stats) {
- debugfs_remove(stats->b_debug_check);
- stats->b_debug_check = NULL;
- debugfs_remove(stats->b_debug_failure);
- stats->b_debug_failure = NULL;
- debugfs_remove(stats->b_debug_recover);
- stats->b_debug_recover = NULL;
- debugfs_remove(stats->b_debug_dir);
+ debugfs_remove_recursive(stats->b_debug_dir);
stats->b_debug_dir = NULL;
}
}
-static int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
- struct dentry *parent)
+static void ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
+ struct dentry *parent)
{
- int rc = -EINVAL;
-
- if (!stats)
- goto out;
-
stats->b_debug_dir = debugfs_create_dir("blockcheck", parent);
- if (!stats->b_debug_dir)
- goto out;
- stats->b_debug_check =
- blockcheck_debugfs_create("blocks_checked",
- stats->b_debug_dir,
- &stats->b_check_count);
+ blockcheck_debugfs_create("blocks_checked", stats->b_debug_dir,
+ &stats->b_check_count);
- stats->b_debug_failure =
- blockcheck_debugfs_create("checksums_failed",
- stats->b_debug_dir,
- &stats->b_failure_count);
+ blockcheck_debugfs_create("checksums_failed", stats->b_debug_dir,
+ &stats->b_failure_count);
- stats->b_debug_recover =
- blockcheck_debugfs_create("ecc_recoveries",
- stats->b_debug_dir,
- &stats->b_recover_count);
- if (stats->b_debug_check && stats->b_debug_failure &&
- stats->b_debug_recover)
- rc = 0;
-
-out:
- if (rc)
- ocfs2_blockcheck_debug_remove(stats);
- return rc;
+ blockcheck_debugfs_create("ecc_recoveries", stats->b_debug_dir,
+ &stats->b_recover_count);
}
#else
-static inline int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
- struct dentry *parent)
+static inline void ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
+ struct dentry *parent)
{
- return 0;
}
static inline void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
@@ -301,10 +273,10 @@ static inline void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *
#endif /* CONFIG_DEBUG_FS */
/* Always-called wrappers for starting and stopping the debugfs files */
-int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats,
- struct dentry *parent)
+void ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats,
+ struct dentry *parent)
{
- return ocfs2_blockcheck_debug_install(stats, parent);
+ ocfs2_blockcheck_debug_install(stats, parent);
}
void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats)
diff --git a/fs/ocfs2/blockcheck.h b/fs/ocfs2/blockcheck.h
index f2d2689407fa..8f17d2c85f40 100644
--- a/fs/ocfs2/blockcheck.h
+++ b/fs/ocfs2/blockcheck.h
@@ -25,9 +25,6 @@ struct ocfs2_blockcheck_stats {
* ocfs2_blockcheck_stats_debugfs_install()
*/
struct dentry *b_debug_dir; /* Parent of the debugfs files */
- struct dentry *b_debug_check; /* Exposes b_check_count */
- struct dentry *b_debug_failure; /* Exposes b_failure_count */
- struct dentry *b_debug_recover; /* Exposes b_recover_count */
};
@@ -56,8 +53,8 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
struct ocfs2_blockcheck_stats *stats);
/* Debug Initialization */
-int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats,
- struct dentry *parent);
+void ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats,
+ struct dentry *parent);
void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats);
/*
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 7a3a096856a8..f1b613327ac8 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -92,10 +92,6 @@ static struct o2hb_debug_buf *o2hb_db_failedregions;
#define O2HB_DEBUG_REGION_PINNED "pinned"
static struct dentry *o2hb_debug_dir;
-static struct dentry *o2hb_debug_livenodes;
-static struct dentry *o2hb_debug_liveregions;
-static struct dentry *o2hb_debug_quorumregions;
-static struct dentry *o2hb_debug_failedregions;
static LIST_HEAD(o2hb_all_regions);
@@ -1184,7 +1180,7 @@ bail:
if (atomic_read(&reg->hr_steady_iterations) != 0) {
if (atomic_dec_and_test(&reg->hr_unsteady_iterations)) {
printk(KERN_NOTICE "o2hb: Unable to stabilize "
- "heartbeart on region %s (%s)\n",
+ "heartbeat on region %s (%s)\n",
config_item_name(&reg->hr_item),
reg->hr_dev_name);
atomic_set(&reg->hr_steady_iterations, 0);
@@ -1391,11 +1387,7 @@ static const struct file_operations o2hb_debug_fops = {
void o2hb_exit(void)
{
- debugfs_remove(o2hb_debug_failedregions);
- debugfs_remove(o2hb_debug_quorumregions);
- debugfs_remove(o2hb_debug_liveregions);
- debugfs_remove(o2hb_debug_livenodes);
- debugfs_remove(o2hb_debug_dir);
+ debugfs_remove_recursive(o2hb_debug_dir);
kfree(o2hb_db_livenodes);
kfree(o2hb_db_liveregions);
kfree(o2hb_db_quorumregions);
@@ -1419,79 +1411,37 @@ static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir,
&o2hb_debug_fops);
}
-static int o2hb_debug_init(void)
+static void o2hb_debug_init(void)
{
- int ret = -ENOMEM;
-
o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL);
- if (!o2hb_debug_dir) {
- mlog_errno(ret);
- goto bail;
- }
- o2hb_debug_livenodes = o2hb_debug_create(O2HB_DEBUG_LIVENODES,
- o2hb_debug_dir,
- &o2hb_db_livenodes,
- sizeof(*o2hb_db_livenodes),
- O2HB_DB_TYPE_LIVENODES,
- sizeof(o2hb_live_node_bitmap),
- O2NM_MAX_NODES,
- o2hb_live_node_bitmap);
- if (!o2hb_debug_livenodes) {
- mlog_errno(ret);
- goto bail;
- }
+ o2hb_debug_create(O2HB_DEBUG_LIVENODES, o2hb_debug_dir,
+ &o2hb_db_livenodes, sizeof(*o2hb_db_livenodes),
+ O2HB_DB_TYPE_LIVENODES, sizeof(o2hb_live_node_bitmap),
+ O2NM_MAX_NODES, o2hb_live_node_bitmap);
- o2hb_debug_liveregions = o2hb_debug_create(O2HB_DEBUG_LIVEREGIONS,
- o2hb_debug_dir,
- &o2hb_db_liveregions,
- sizeof(*o2hb_db_liveregions),
- O2HB_DB_TYPE_LIVEREGIONS,
- sizeof(o2hb_live_region_bitmap),
- O2NM_MAX_REGIONS,
- o2hb_live_region_bitmap);
- if (!o2hb_debug_liveregions) {
- mlog_errno(ret);
- goto bail;
- }
+ o2hb_debug_create(O2HB_DEBUG_LIVEREGIONS, o2hb_debug_dir,
+ &o2hb_db_liveregions, sizeof(*o2hb_db_liveregions),
+ O2HB_DB_TYPE_LIVEREGIONS,
+ sizeof(o2hb_live_region_bitmap), O2NM_MAX_REGIONS,
+ o2hb_live_region_bitmap);
- o2hb_debug_quorumregions =
- o2hb_debug_create(O2HB_DEBUG_QUORUMREGIONS,
- o2hb_debug_dir,
- &o2hb_db_quorumregions,
- sizeof(*o2hb_db_quorumregions),
- O2HB_DB_TYPE_QUORUMREGIONS,
- sizeof(o2hb_quorum_region_bitmap),
- O2NM_MAX_REGIONS,
- o2hb_quorum_region_bitmap);
- if (!o2hb_debug_quorumregions) {
- mlog_errno(ret);
- goto bail;
- }
-
- o2hb_debug_failedregions =
- o2hb_debug_create(O2HB_DEBUG_FAILEDREGIONS,
- o2hb_debug_dir,
- &o2hb_db_failedregions,
- sizeof(*o2hb_db_failedregions),
- O2HB_DB_TYPE_FAILEDREGIONS,
- sizeof(o2hb_failed_region_bitmap),
- O2NM_MAX_REGIONS,
- o2hb_failed_region_bitmap);
- if (!o2hb_debug_failedregions) {
- mlog_errno(ret);
- goto bail;
- }
+ o2hb_debug_create(O2HB_DEBUG_QUORUMREGIONS, o2hb_debug_dir,
+ &o2hb_db_quorumregions,
+ sizeof(*o2hb_db_quorumregions),
+ O2HB_DB_TYPE_QUORUMREGIONS,
+ sizeof(o2hb_quorum_region_bitmap), O2NM_MAX_REGIONS,
+ o2hb_quorum_region_bitmap);
- ret = 0;
-bail:
- if (ret)
- o2hb_exit();
-
- return ret;
+ o2hb_debug_create(O2HB_DEBUG_FAILEDREGIONS, o2hb_debug_dir,
+ &o2hb_db_failedregions,
+ sizeof(*o2hb_db_failedregions),
+ O2HB_DB_TYPE_FAILEDREGIONS,
+ sizeof(o2hb_failed_region_bitmap), O2NM_MAX_REGIONS,
+ o2hb_failed_region_bitmap);
}
-int o2hb_init(void)
+void o2hb_init(void)
{
int i;
@@ -1511,7 +1461,7 @@ int o2hb_init(void)
o2hb_dependent_users = 0;
- return o2hb_debug_init();
+ o2hb_debug_init();
}
/* if we're already in a callback then we're already serialized by the sem */
diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h
index 7f37540ac4ab..beed31ea86cf 100644
--- a/fs/ocfs2/cluster/heartbeat.h
+++ b/fs/ocfs2/cluster/heartbeat.h
@@ -63,7 +63,7 @@ void o2hb_unregister_callback(const char *region_uuid,
void o2hb_fill_node_map(unsigned long *map,
unsigned bytes);
void o2hb_exit(void);
-int o2hb_init(void);
+void o2hb_init(void);
int o2hb_check_node_heartbeating_no_sem(u8 node_num);
int o2hb_check_node_heartbeating_from_callback(u8 node_num);
void o2hb_stop_all_regions(void);
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index 0784575f4c2a..02bf4a1774cc 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -38,10 +38,6 @@
#define SHOW_SOCK_STATS 1
static struct dentry *o2net_dentry;
-static struct dentry *sc_dentry;
-static struct dentry *nst_dentry;
-static struct dentry *stats_dentry;
-static struct dentry *nodes_dentry;
static DEFINE_SPINLOCK(o2net_debug_lock);
@@ -490,36 +486,23 @@ static const struct file_operations nodes_fops = {
void o2net_debugfs_exit(void)
{
- debugfs_remove(nodes_dentry);
- debugfs_remove(stats_dentry);
- debugfs_remove(sc_dentry);
- debugfs_remove(nst_dentry);
- debugfs_remove(o2net_dentry);
+ debugfs_remove_recursive(o2net_dentry);
}
-int o2net_debugfs_init(void)
+void o2net_debugfs_init(void)
{
umode_t mode = S_IFREG|S_IRUSR;
o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL);
- if (o2net_dentry)
- nst_dentry = debugfs_create_file(NST_DEBUG_NAME, mode,
- o2net_dentry, NULL, &nst_seq_fops);
- if (nst_dentry)
- sc_dentry = debugfs_create_file(SC_DEBUG_NAME, mode,
- o2net_dentry, NULL, &sc_seq_fops);
- if (sc_dentry)
- stats_dentry = debugfs_create_file(STATS_DEBUG_NAME, mode,
- o2net_dentry, NULL, &stats_seq_fops);
- if (stats_dentry)
- nodes_dentry = debugfs_create_file(NODES_DEBUG_NAME, mode,
- o2net_dentry, NULL, &nodes_fops);
- if (nodes_dentry)
- return 0;
-
- o2net_debugfs_exit();
- mlog_errno(-ENOMEM);
- return -ENOMEM;
+
+ debugfs_create_file(NST_DEBUG_NAME, mode, o2net_dentry, NULL,
+ &nst_seq_fops);
+ debugfs_create_file(SC_DEBUG_NAME, mode, o2net_dentry, NULL,
+ &sc_seq_fops);
+ debugfs_create_file(STATS_DEBUG_NAME, mode, o2net_dentry, NULL,
+ &stats_seq_fops);
+ debugfs_create_file(NODES_DEBUG_NAME, mode, o2net_dentry, NULL,
+ &nodes_fops);
}
#endif /* CONFIG_DEBUG_FS */
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 2234f7fd1f7c..7a7640c59f3c 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -828,9 +828,7 @@ static int __init init_o2nm(void)
{
int ret = -1;
- ret = o2hb_init();
- if (ret)
- goto out;
+ o2hb_init();
ret = o2net_init();
if (ret)
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index 3d5d4b2b1356..5c424a099280 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -76,7 +76,7 @@ static void o2quo_fence_self(void)
};
}
-/* Indicate that a timeout occurred on a hearbeat region write. The
+/* Indicate that a timeout occurred on a heartbeat region write. The
* other nodes in the cluster may consider us dead at that time so we
* want to "fence" ourselves so that we don't scribble on the disk
* after they think they've recovered us. This can't solve all
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index c599463d0694..48a3398f0bf5 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1762,7 +1762,7 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
(msecs_to_jiffies(o2net_reconnect_delay()) + 1);
if (node_num != o2nm_this_node()) {
- /* believe it or not, accept and node hearbeating testing
+ /* believe it or not, accept and node heartbeating testing
* can succeed for this node before we got here.. so
* only use set_nn_state to clear the persistent error
* if that hasn't already happened */
@@ -2129,8 +2129,7 @@ int o2net_init(void)
o2quo_init();
- if (o2net_debugfs_init())
- goto out;
+ o2net_debugfs_init();
o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL);
o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL);
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h
index dd4242be3f1f..de87cbffd175 100644
--- a/fs/ocfs2/cluster/tcp.h
+++ b/fs/ocfs2/cluster/tcp.h
@@ -109,16 +109,15 @@ struct o2net_send_tracking;
struct o2net_sock_container;
#ifdef CONFIG_DEBUG_FS
-int o2net_debugfs_init(void);
+void o2net_debugfs_init(void);
void o2net_debugfs_exit(void);
void o2net_debug_add_nst(struct o2net_send_tracking *nst);
void o2net_debug_del_nst(struct o2net_send_tracking *nst);
void o2net_debug_add_sc(struct o2net_sock_container *sc);
void o2net_debug_del_sc(struct o2net_sock_container *sc);
#else
-static inline int o2net_debugfs_init(void)
+static inline void o2net_debugfs_init(void)
{
- return 0;
}
static inline void o2net_debugfs_exit(void)
{
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index c8af5bc9e980..a4b58ba99927 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -851,7 +851,7 @@ static const struct file_operations debug_state_fops = {
/* end - debug state funcs */
/* files in subroot */
-int dlm_debug_init(struct dlm_ctxt *dlm)
+void dlm_debug_init(struct dlm_ctxt *dlm)
{
struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt;
@@ -860,10 +860,6 @@ int dlm_debug_init(struct dlm_ctxt *dlm)
S_IFREG|S_IRUSR,
dlm->dlm_debugfs_subroot,
dlm, &debug_state_fops);
- if (!dc->debug_state_dentry) {
- mlog_errno(-ENOMEM);
- goto bail;
- }
/* for dumping lockres */
dc->debug_lockres_dentry =
@@ -871,20 +867,12 @@ int dlm_debug_init(struct dlm_ctxt *dlm)
S_IFREG|S_IRUSR,
dlm->dlm_debugfs_subroot,
dlm, &debug_lockres_fops);
- if (!dc->debug_lockres_dentry) {
- mlog_errno(-ENOMEM);
- goto bail;
- }
/* for dumping mles */
dc->debug_mle_dentry = debugfs_create_file(DLM_DEBUGFS_MLE_STATE,
S_IFREG|S_IRUSR,
dlm->dlm_debugfs_subroot,
dlm, &debug_mle_fops);
- if (!dc->debug_mle_dentry) {
- mlog_errno(-ENOMEM);
- goto bail;
- }
/* for dumping lockres on the purge list */
dc->debug_purgelist_dentry =
@@ -892,15 +880,6 @@ int dlm_debug_init(struct dlm_ctxt *dlm)
S_IFREG|S_IRUSR,
dlm->dlm_debugfs_subroot,
dlm, &debug_purgelist_fops);
- if (!dc->debug_purgelist_dentry) {
- mlog_errno(-ENOMEM);
- goto bail;
- }
-
- return 0;
-
-bail:
- return -ENOMEM;
}
void dlm_debug_shutdown(struct dlm_ctxt *dlm)
@@ -920,24 +899,16 @@ void dlm_debug_shutdown(struct dlm_ctxt *dlm)
/* subroot - domain dir */
int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
{
- dlm->dlm_debugfs_subroot = debugfs_create_dir(dlm->name,
- dlm_debugfs_root);
- if (!dlm->dlm_debugfs_subroot) {
- mlog_errno(-ENOMEM);
- goto bail;
- }
-
dlm->dlm_debug_ctxt = kzalloc(sizeof(struct dlm_debug_ctxt),
GFP_KERNEL);
if (!dlm->dlm_debug_ctxt) {
mlog_errno(-ENOMEM);
- goto bail;
+ return -ENOMEM;
}
+ dlm->dlm_debugfs_subroot = debugfs_create_dir(dlm->name,
+ dlm_debugfs_root);
return 0;
-bail:
- dlm_destroy_debugfs_subroot(dlm);
- return -ENOMEM;
}
void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
@@ -946,14 +917,9 @@ void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
}
/* debugfs root */
-int dlm_create_debugfs_root(void)
+void dlm_create_debugfs_root(void)
{
dlm_debugfs_root = debugfs_create_dir(DLM_DEBUGFS_DIR, NULL);
- if (!dlm_debugfs_root) {
- mlog_errno(-ENOMEM);
- return -ENOMEM;
- }
- return 0;
}
void dlm_destroy_debugfs_root(void)
diff --git a/fs/ocfs2/dlm/dlmdebug.h b/fs/ocfs2/dlm/dlmdebug.h
index 74d019694c7e..7d0c7c9013ce 100644
--- a/fs/ocfs2/dlm/dlmdebug.h
+++ b/fs/ocfs2/dlm/dlmdebug.h
@@ -28,20 +28,19 @@ struct debug_lockres {
struct dlm_lock_resource *dl_res;
};
-int dlm_debug_init(struct dlm_ctxt *dlm);
+void dlm_debug_init(struct dlm_ctxt *dlm);
void dlm_debug_shutdown(struct dlm_ctxt *dlm);
int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm);
void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm);
-int dlm_create_debugfs_root(void);
+void dlm_create_debugfs_root(void);
void dlm_destroy_debugfs_root(void);
#else
-static inline int dlm_debug_init(struct dlm_ctxt *dlm)
+static inline void dlm_debug_init(struct dlm_ctxt *dlm)
{
- return 0;
}
static inline void dlm_debug_shutdown(struct dlm_ctxt *dlm)
{
@@ -53,9 +52,8 @@ static inline int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
static inline void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
{
}
-static inline int dlm_create_debugfs_root(void)
+static inline void dlm_create_debugfs_root(void)
{
- return 0;
}
static inline void dlm_destroy_debugfs_root(void)
{
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 9021e72e1f98..7338b5d4647c 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1881,11 +1881,7 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
goto bail;
}
- status = dlm_debug_init(dlm);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
+ dlm_debug_init(dlm);
snprintf(wq_name, O2NM_MAX_NAME_LEN, "dlm_wq-%s", dlm->name);
dlm->dlm_worker = alloc_workqueue(wq_name, WQ_MEM_RECLAIM, 0);
@@ -2346,9 +2342,7 @@ static int __init dlm_init(void)
goto error;
}
- status = dlm_create_debugfs_root();
- if (status)
- goto error;
+ dlm_create_debugfs_root();
return 0;
error:
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 810f841494ef..74b768ca1cd8 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2161,7 +2161,7 @@ put:
* think that $RECOVERY is currently mastered by a dead node. If so,
* we wait a short time to allow that node to get notified by its own
* heartbeat stack, then check again. All $RECOVERY lock resources
- * mastered by dead nodes are purged when the hearbeat callback is
+ * mastered by dead nodes are purged when the heartbeat callback is
* fired, so we can know for sure that it is safe to continue once
* the node returns a live node or no node. */
static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index e22d6a115220..064ce5bbc3f6 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1109,7 +1109,7 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
{
u64 mig_cookie = be64_to_cpu(mres->mig_cookie);
int mres_total_locks = be32_to_cpu(mres->total_locks);
- int sz, ret = 0, status = 0;
+ int ret = 0, status = 0;
u8 orig_flags = mres->flags,
orig_master = mres->master;
@@ -1117,9 +1117,6 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
if (!mres->num_locks)
return 0;
- sz = sizeof(struct dlm_migratable_lockres) +
- (mres->num_locks * sizeof(struct dlm_migratable_lock));
-
/* add an all-done flag if we reached the last lock */
orig_flags = mres->flags;
BUG_ON(total_locks > mres_total_locks);
@@ -1133,7 +1130,8 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
/* send it */
ret = o2net_send_message(DLM_MIG_LOCKRES_MSG, dlm->key, mres,
- sz, send_to, &status);
+ struct_size(mres, ml, mres->num_locks),
+ send_to, &status);
if (ret < 0) {
/* XXX: negative status is not handled.
* this will end up killing this node. */
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index b5fc5d3c7525..14207234fa3d 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -426,6 +426,7 @@ static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
{
res->l_lock_refresh = 0;
+ res->l_lock_wait = 0;
memset(&res->l_lock_prmode, 0, sizeof(struct ocfs2_lock_stats));
memset(&res->l_lock_exmode, 0, sizeof(struct ocfs2_lock_stats));
}
@@ -460,6 +461,8 @@ static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,
if (ret)
stats->ls_fail++;
+
+ stats->ls_last = ktime_to_us(ktime_get_real());
}
static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
@@ -467,6 +470,21 @@ static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
lockres->l_lock_refresh++;
}
+static inline void ocfs2_track_lock_wait(struct ocfs2_lock_res *lockres)
+{
+ struct ocfs2_mask_waiter *mw;
+
+ if (list_empty(&lockres->l_mask_waiters)) {
+ lockres->l_lock_wait = 0;
+ return;
+ }
+
+ mw = list_first_entry(&lockres->l_mask_waiters,
+ struct ocfs2_mask_waiter, mw_item);
+ lockres->l_lock_wait =
+ ktime_to_us(ktime_mono_to_real(mw->mw_lock_start));
+}
+
static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
{
mw->mw_lock_start = ktime_get();
@@ -482,6 +500,9 @@ static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res,
static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
{
}
+static inline void ocfs2_track_lock_wait(struct ocfs2_lock_res *lockres)
+{
+}
static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
{
}
@@ -875,6 +896,7 @@ static void lockres_set_flags(struct ocfs2_lock_res *lockres,
list_del_init(&mw->mw_item);
mw->mw_status = 0;
complete(&mw->mw_complete);
+ ocfs2_track_lock_wait(lockres);
}
}
static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or)
@@ -1386,6 +1408,7 @@ static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres,
list_add_tail(&mw->mw_item, &lockres->l_mask_waiters);
mw->mw_mask = mask;
mw->mw_goal = goal;
+ ocfs2_track_lock_wait(lockres);
}
/* returns 0 if the mw that was removed was already satisfied, -EBUSY
@@ -1402,6 +1425,7 @@ static int __lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
list_del_init(&mw->mw_item);
init_completion(&mw->mw_complete);
+ ocfs2_track_lock_wait(lockres);
}
return ret;
@@ -2989,6 +3013,8 @@ struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void)
kref_init(&dlm_debug->d_refcnt);
INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking);
dlm_debug->d_locking_state = NULL;
+ dlm_debug->d_locking_filter = NULL;
+ dlm_debug->d_filter_secs = 0;
out:
return dlm_debug;
}
@@ -3079,17 +3105,43 @@ static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos)
* - Lock stats printed
* New in version 3
* - Max time in lock stats is in usecs (instead of nsecs)
+ * New in version 4
+ * - Add last pr/ex unlock times and first lock wait time in usecs
*/
-#define OCFS2_DLM_DEBUG_STR_VERSION 3
+#define OCFS2_DLM_DEBUG_STR_VERSION 4
static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
{
int i;
char *lvb;
struct ocfs2_lock_res *lockres = v;
+#ifdef CONFIG_OCFS2_FS_STATS
+ u64 now, last;
+ struct ocfs2_dlm_debug *dlm_debug =
+ ((struct ocfs2_dlm_seq_priv *)m->private)->p_dlm_debug;
+#endif
if (!lockres)
return -EINVAL;
+#ifdef CONFIG_OCFS2_FS_STATS
+ if (!lockres->l_lock_wait && dlm_debug->d_filter_secs) {
+ now = ktime_to_us(ktime_get_real());
+ if (lockres->l_lock_prmode.ls_last >
+ lockres->l_lock_exmode.ls_last)
+ last = lockres->l_lock_prmode.ls_last;
+ else
+ last = lockres->l_lock_exmode.ls_last;
+ /*
+ * Use d_filter_secs field to filter lock resources dump,
+ * the default d_filter_secs(0) value filters nothing,
+ * otherwise, only dump the last N seconds active lock
+ * resources.
+ */
+ if (div_u64(now - last, 1000000) > dlm_debug->d_filter_secs)
+ return 0;
+ }
+#endif
+
seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION);
if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY)
@@ -3131,6 +3183,9 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
# define lock_max_prmode(_l) ((_l)->l_lock_prmode.ls_max)
# define lock_max_exmode(_l) ((_l)->l_lock_exmode.ls_max)
# define lock_refresh(_l) ((_l)->l_lock_refresh)
+# define lock_last_prmode(_l) ((_l)->l_lock_prmode.ls_last)
+# define lock_last_exmode(_l) ((_l)->l_lock_exmode.ls_last)
+# define lock_wait(_l) ((_l)->l_lock_wait)
#else
# define lock_num_prmode(_l) (0)
# define lock_num_exmode(_l) (0)
@@ -3141,6 +3196,9 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
# define lock_max_prmode(_l) (0)
# define lock_max_exmode(_l) (0)
# define lock_refresh(_l) (0)
+# define lock_last_prmode(_l) (0ULL)
+# define lock_last_exmode(_l) (0ULL)
+# define lock_wait(_l) (0ULL)
#endif
/* The following seq_print was added in version 2 of this output */
seq_printf(m, "%u\t"
@@ -3151,7 +3209,10 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
"%llu\t"
"%u\t"
"%u\t"
- "%u\t",
+ "%u\t"
+ "%llu\t"
+ "%llu\t"
+ "%llu\t",
lock_num_prmode(lockres),
lock_num_exmode(lockres),
lock_num_prmode_failed(lockres),
@@ -3160,7 +3221,10 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
lock_total_exmode(lockres),
lock_max_prmode(lockres),
lock_max_exmode(lockres),
- lock_refresh(lockres));
+ lock_refresh(lockres),
+ lock_last_prmode(lockres),
+ lock_last_exmode(lockres),
+ lock_wait(lockres));
/* End the line */
seq_printf(m, "\n");
@@ -3214,9 +3278,8 @@ static const struct file_operations ocfs2_dlm_debug_fops = {
.llseek = seq_lseek,
};
-static int ocfs2_dlm_init_debug(struct ocfs2_super *osb)
+static void ocfs2_dlm_init_debug(struct ocfs2_super *osb)
{
- int ret = 0;
struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
dlm_debug->d_locking_state = debugfs_create_file("locking_state",
@@ -3224,16 +3287,11 @@ static int ocfs2_dlm_init_debug(struct ocfs2_super *osb)
osb->osb_debug_root,
osb,
&ocfs2_dlm_debug_fops);
- if (!dlm_debug->d_locking_state) {
- ret = -EINVAL;
- mlog(ML_ERROR,
- "Unable to create locking state debugfs file.\n");
- goto out;
- }
- ocfs2_get_dlm_debug(dlm_debug);
-out:
- return ret;
+ dlm_debug->d_locking_filter = debugfs_create_u32("locking_filter",
+ 0600,
+ osb->osb_debug_root,
+ &dlm_debug->d_filter_secs);
}
static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
@@ -3242,6 +3300,7 @@ static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
if (dlm_debug) {
debugfs_remove(dlm_debug->d_locking_state);
+ debugfs_remove(dlm_debug->d_locking_filter);
ocfs2_put_dlm_debug(dlm_debug);
}
}
@@ -3256,11 +3315,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
goto local;
}
- status = ocfs2_dlm_init_debug(osb);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
+ ocfs2_dlm_init_debug(osb);
/* launch downconvert thread */
osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc-%s",
@@ -4352,7 +4407,6 @@ static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
static int ocfs2_downconvert_thread(void *arg)
{
- int status = 0;
struct ocfs2_super *osb = arg;
/* only quit once we've been asked to stop and there is no more
@@ -4370,7 +4424,7 @@ static int ocfs2_downconvert_thread(void *arg)
}
osb->dc_task = NULL;
- return status;
+ return 0;
}
void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb)
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 994726ada857..d6f7b299eb23 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -106,16 +106,9 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
flags = flags & mask;
flags |= oldflags & ~mask;
- /*
- * The IMMUTABLE and APPEND_ONLY flags can only be changed by
- * the relevant capability.
- */
- status = -EPERM;
- if ((oldflags & OCFS2_IMMUTABLE_FL) || ((flags ^ oldflags) &
- (OCFS2_APPEND_FL | OCFS2_IMMUTABLE_FL))) {
- if (!capable(CAP_LINUX_IMMUTABLE))
- goto bail_unlock;
- }
+ status = vfs_ioc_setflags_prepare(inode, oldflags, flags);
+ if (status)
+ goto bail_unlock;
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
if (IS_ERR(handle)) {
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index f03674afbd30..158e5af767fd 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -424,12 +424,11 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
bh = osb->local_alloc_bh;
alloc = (struct ocfs2_dinode *) bh->b_data;
- alloc_copy = kmalloc(bh->b_size, GFP_NOFS);
+ alloc_copy = kmemdup(alloc, bh->b_size, GFP_NOFS);
if (!alloc_copy) {
status = -ENOMEM;
goto out_commit;
}
- memcpy(alloc_copy, alloc, bh->b_size);
status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode),
bh, OCFS2_JOURNAL_ACCESS_WRITE);
@@ -1272,13 +1271,12 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
* local alloc shutdown won't try to double free main bitmap
* bits. Make a copy so the sync function knows which bits to
* free. */
- alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_NOFS);
+ alloc_copy = kmemdup(alloc, osb->local_alloc_bh->b_size, GFP_NOFS);
if (!alloc_copy) {
status = -ENOMEM;
mlog_errno(status);
goto bail;
}
- memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size);
status = ocfs2_journal_access_di(handle,
INODE_CACHE(local_alloc_inode),
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index a4647a646f07..fddbbd60f434 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -150,6 +150,7 @@ struct ocfs2_lock_stats {
/* Storing max wait in usecs saves 24 bytes per inode */
u32 ls_max; /* Max wait in USEC */
+ u64 ls_last; /* Last unlock time in USEC */
};
#endif
@@ -191,6 +192,7 @@ struct ocfs2_lock_res {
#ifdef CONFIG_OCFS2_FS_STATS
struct ocfs2_lock_stats l_lock_prmode; /* PR mode stats */
u32 l_lock_refresh; /* Disk refreshes */
+ u64 l_lock_wait; /* First lock wait time */
struct ocfs2_lock_stats l_lock_exmode; /* EX mode stats */
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -222,6 +224,8 @@ struct ocfs2_orphan_scan {
struct ocfs2_dlm_debug {
struct kref d_refcnt;
struct dentry *d_locking_state;
+ struct dentry *d_locking_filter;
+ u32 d_filter_secs;
struct list_head d_lockres_tracking;
};
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index a201f9780b35..8b2f39506648 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1079,33 +1079,15 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
osb->osb_debug_root = debugfs_create_dir(osb->uuid_str,
ocfs2_debugfs_root);
- if (!osb->osb_debug_root) {
- status = -EINVAL;
- mlog(ML_ERROR, "Unable to create per-mount debugfs root.\n");
- goto read_super_error;
- }
osb->osb_ctxt = debugfs_create_file("fs_state", S_IFREG|S_IRUSR,
osb->osb_debug_root,
osb,
&ocfs2_osb_debug_fops);
- if (!osb->osb_ctxt) {
- status = -EINVAL;
- mlog_errno(status);
- goto read_super_error;
- }
- if (ocfs2_meta_ecc(osb)) {
- status = ocfs2_blockcheck_stats_debugfs_install(
- &osb->osb_ecc_stats,
- osb->osb_debug_root);
- if (status) {
- mlog(ML_ERROR,
- "Unable to create blockcheck statistics "
- "files\n");
- goto read_super_error;
- }
- }
+ if (ocfs2_meta_ecc(osb))
+ ocfs2_blockcheck_stats_debugfs_install( &osb->osb_ecc_stats,
+ osb->osb_debug_root);
status = ocfs2_mount_volume(sb);
if (status < 0)
@@ -1592,11 +1574,6 @@ static int __init ocfs2_init(void)
goto out2;
ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL);
- if (!ocfs2_debugfs_root) {
- status = -ENOMEM;
- mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
- goto out3;
- }
ocfs2_set_locking_protocol();
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index a35c17017210..679a3c8e4fb3 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -357,11 +357,28 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb,
return ret;
}
+static int orangefs_getflags(struct inode *inode, unsigned long *uval)
+{
+ __u64 val = 0;
+ int ret;
+
+ ret = orangefs_inode_getxattr(inode,
+ "user.pvfs2.meta_hint",
+ &val, sizeof(val));
+ if (ret < 0 && ret != -ENODATA)
+ return ret;
+ else if (ret == -ENODATA)
+ val = 0;
+ *uval = val;
+ return 0;
+}
+
/*
* Perform a miscellaneous operation on a file.
*/
static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
+ struct inode *inode = file_inode(file);
int ret = -ENOTTY;
__u64 val = 0;
unsigned long uval;
@@ -375,20 +392,16 @@ static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long ar
* and append flags
*/
if (cmd == FS_IOC_GETFLAGS) {
- val = 0;
- ret = orangefs_inode_getxattr(file_inode(file),
- "user.pvfs2.meta_hint",
- &val, sizeof(val));
- if (ret < 0 && ret != -ENODATA)
+ ret = orangefs_getflags(inode, &uval);
+ if (ret)
return ret;
- else if (ret == -ENODATA)
- val = 0;
- uval = val;
gossip_debug(GOSSIP_FILE_DEBUG,
"orangefs_ioctl: FS_IOC_GETFLAGS: %llu\n",
(unsigned long long)uval);
return put_user(uval, (int __user *)arg);
} else if (cmd == FS_IOC_SETFLAGS) {
+ unsigned long old_uval;
+
ret = 0;
if (get_user(uval, (int __user *)arg))
return -EFAULT;
@@ -404,11 +417,17 @@ static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long ar
gossip_err("orangefs_ioctl: the FS_IOC_SETFLAGS only supports setting one of FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL\n");
return -EINVAL;
}
+ ret = orangefs_getflags(inode, &old_uval);
+ if (ret)
+ return ret;
+ ret = vfs_ioc_setflags_prepare(inode, old_uval, uval);
+ if (ret)
+ return ret;
val = uval;
gossip_debug(GOSSIP_FILE_DEBUG,
"orangefs_ioctl: FS_IOC_SETFLAGS: %llu\n",
(unsigned long long)val);
- ret = orangefs_inode_setxattr(file_inode(file),
+ ret = orangefs_inode_setxattr(inode,
"user.pvfs2.meta_hint",
&val, sizeof(val), 0);
}
diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c
index 87b1a6fce628..25543a966c48 100644
--- a/fs/orangefs/orangefs-debugfs.c
+++ b/fs/orangefs/orangefs-debugfs.c
@@ -64,7 +64,7 @@ struct client_debug_mask {
__u64 mask2;
};
-static int orangefs_kernel_debug_init(void);
+static void orangefs_kernel_debug_init(void);
static int orangefs_debug_help_open(struct inode *, struct file *);
static void *help_start(struct seq_file *, loff_t *);
@@ -99,7 +99,6 @@ static char *debug_help_string;
static char client_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
static char client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
-static struct dentry *help_file_dentry;
static struct dentry *client_debug_dentry;
static struct dentry *debug_dir;
@@ -151,10 +150,8 @@ static DEFINE_MUTEX(orangefs_help_file_lock);
* initialize kmod debug operations, create orangefs debugfs dir and
* ORANGEFS_KMOD_DEBUG_HELP_FILE.
*/
-int orangefs_debugfs_init(int debug_mask)
+void orangefs_debugfs_init(int debug_mask)
{
- int rc = -ENOMEM;
-
/* convert input debug mask to a 64-bit unsigned integer */
orangefs_gossip_debug_mask = (unsigned long long)debug_mask;
@@ -183,37 +180,21 @@ int orangefs_debugfs_init(int debug_mask)
(unsigned long long)orangefs_gossip_debug_mask);
debug_dir = debugfs_create_dir("orangefs", NULL);
- if (!debug_dir) {
- pr_info("%s: debugfs_create_dir failed.\n", __func__);
- goto out;
- }
- help_file_dentry = debugfs_create_file(ORANGEFS_KMOD_DEBUG_HELP_FILE,
- 0444,
- debug_dir,
- debug_help_string,
- &debug_help_fops);
- if (!help_file_dentry) {
- pr_info("%s: debugfs_create_file failed.\n", __func__);
- goto out;
- }
+ debugfs_create_file(ORANGEFS_KMOD_DEBUG_HELP_FILE, 0444, debug_dir,
+ debug_help_string, &debug_help_fops);
orangefs_debug_disabled = 0;
- rc = orangefs_kernel_debug_init();
-
-out:
-
- return rc;
+ orangefs_kernel_debug_init();
}
/*
* initialize the kernel-debug file.
*/
-static int orangefs_kernel_debug_init(void)
+static void orangefs_kernel_debug_init(void)
{
int rc = -ENOMEM;
- struct dentry *ret;
char *k_buffer = NULL;
gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__);
@@ -230,24 +211,11 @@ static int orangefs_kernel_debug_init(void)
pr_info("%s: overflow 1!\n", __func__);
}
- ret = debugfs_create_file(ORANGEFS_KMOD_DEBUG_FILE,
- 0444,
- debug_dir,
- k_buffer,
- &kernel_debug_fops);
- if (!ret) {
- pr_info("%s: failed to create %s.\n",
- __func__,
- ORANGEFS_KMOD_DEBUG_FILE);
- goto out;
- }
-
- rc = 0;
+ debugfs_create_file(ORANGEFS_KMOD_DEBUG_FILE, 0444, debug_dir, k_buffer,
+ &kernel_debug_fops);
out:
-
gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc);
- return rc;
}
@@ -353,12 +321,6 @@ static int orangefs_client_debug_init(void)
debug_dir,
c_buffer,
&kernel_debug_fops);
- if (!client_debug_dentry) {
- pr_info("%s: failed to create updated %s.\n",
- __func__,
- ORANGEFS_CLIENT_DEBUG_FILE);
- goto out;
- }
rc = 0;
diff --git a/fs/orangefs/orangefs-debugfs.h b/fs/orangefs/orangefs-debugfs.h
index 51147f9ce3d6..502f6dedccde 100644
--- a/fs/orangefs/orangefs-debugfs.h
+++ b/fs/orangefs/orangefs-debugfs.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-int orangefs_debugfs_init(int);
+void orangefs_debugfs_init(int);
void orangefs_debugfs_cleanup(void);
int orangefs_prepare_debugfs_help_string(int);
int orangefs_debugfs_new_client_mask(void __user *);
diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c
index 4f2d7ee0d2d1..c010c1fddafc 100644
--- a/fs/orangefs/orangefs-mod.c
+++ b/fs/orangefs/orangefs-mod.c
@@ -129,9 +129,7 @@ static int __init orangefs_init(void)
if (ret)
goto cleanup_key_table;
- ret = orangefs_debugfs_init(module_parm_debug_mask);
- if (ret)
- goto debugfs_init_failed;
+ orangefs_debugfs_init(module_parm_debug_mask);
ret = orangefs_sysfs_init();
if (ret)
@@ -161,8 +159,6 @@ cleanup_device:
orangefs_dev_cleanup();
sysfs_init_failed:
-
-debugfs_init_failed:
orangefs_debugfs_cleanup();
cleanup_key_table:
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c40fca98f2b7..77eb628ecc7f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -532,8 +532,7 @@ static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
unsigned long totalpages = totalram_pages() + total_swap_pages;
unsigned long points = 0;
- points = oom_badness(task, NULL, NULL, totalpages) *
- 1000 / totalpages;
+ points = oom_badness(task, totalpages) * 1000 / totalpages;
seq_printf(m, "%lu\n", points);
return 0;
@@ -1962,9 +1961,12 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
goto out;
if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
- down_read(&mm->mmap_sem);
- exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end);
- up_read(&mm->mmap_sem);
+ status = down_read_killable(&mm->mmap_sem);
+ if (!status) {
+ exact_vma_exists = !!find_exact_vma(mm, vm_start,
+ vm_end);
+ up_read(&mm->mmap_sem);
+ }
}
mmput(mm);
@@ -2010,8 +2012,11 @@ static int map_files_get_link(struct dentry *dentry, struct path *path)
if (rc)
goto out_mmput;
+ rc = down_read_killable(&mm->mmap_sem);
+ if (rc)
+ goto out_mmput;
+
rc = -ENOENT;
- down_read(&mm->mmap_sem);
vma = find_exact_vma(mm, vm_start, vm_end);
if (vma && vma->vm_file) {
*path = vma->vm_file->f_path;
@@ -2107,7 +2112,11 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
if (!mm)
goto out_put_task;
- down_read(&mm->mmap_sem);
+ result = ERR_PTR(-EINTR);
+ if (down_read_killable(&mm->mmap_sem))
+ goto out_put_mm;
+
+ result = ERR_PTR(-ENOENT);
vma = find_exact_vma(mm, vm_start, vm_end);
if (!vma)
goto out_no_vma;
@@ -2118,6 +2127,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
out_no_vma:
up_read(&mm->mmap_sem);
+out_put_mm:
mmput(mm);
out_put_task:
put_task_struct(task);
@@ -2160,7 +2170,12 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
mm = get_task_mm(task);
if (!mm)
goto out_put_task;
- down_read(&mm->mmap_sem);
+
+ ret = down_read_killable(&mm->mmap_sem);
+ if (ret) {
+ mmput(mm);
+ goto out_put_task;
+ }
nr_files = 0;
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 568d90e17c17..465ea0153b2a 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -120,7 +120,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "Committed_AS: ", committed);
seq_printf(m, "VmallocTotal: %8lu kB\n",
(unsigned long)VMALLOC_TOTAL >> 10);
- show_val_kb(m, "VmallocUsed: ", 0ul);
+ show_val_kb(m, "VmallocUsed: ", vmalloc_nr_pages());
show_val_kb(m, "VmallocChunk: ", 0ul);
show_val_kb(m, "Percpu: ", pcpu_nr_pages());
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 8b145e7b9661..522199e9525e 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -211,7 +211,7 @@ static struct file_system_type proc_fs_type = {
.init_fs_context = proc_init_fs_context,
.parameters = &proc_fs_parameters,
.kill_sb = proc_kill_sb,
- .fs_flags = FS_USERNS_MOUNT,
+ .fs_flags = FS_USERNS_MOUNT | FS_DISALLOW_NOTIFY_PERM,
};
void __init proc_root_init(void)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 01d4eb0e6bd1..818cedbed95f 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -166,7 +166,11 @@ static void *m_start(struct seq_file *m, loff_t *ppos)
if (!mm || !mmget_not_zero(mm))
return NULL;
- down_read(&mm->mmap_sem);
+ if (down_read_killable(&mm->mmap_sem)) {
+ mmput(mm);
+ return ERR_PTR(-EINTR);
+ }
+
hold_task_mempolicy(priv);
priv->tail_vma = get_gate_vma(mm);
@@ -417,17 +421,53 @@ struct mem_size_stats {
unsigned long shared_hugetlb;
unsigned long private_hugetlb;
u64 pss;
+ u64 pss_anon;
+ u64 pss_file;
+ u64 pss_shmem;
u64 pss_locked;
u64 swap_pss;
bool check_shmem_swap;
};
+static void smaps_page_accumulate(struct mem_size_stats *mss,
+ struct page *page, unsigned long size, unsigned long pss,
+ bool dirty, bool locked, bool private)
+{
+ mss->pss += pss;
+
+ if (PageAnon(page))
+ mss->pss_anon += pss;
+ else if (PageSwapBacked(page))
+ mss->pss_shmem += pss;
+ else
+ mss->pss_file += pss;
+
+ if (locked)
+ mss->pss_locked += pss;
+
+ if (dirty || PageDirty(page)) {
+ if (private)
+ mss->private_dirty += size;
+ else
+ mss->shared_dirty += size;
+ } else {
+ if (private)
+ mss->private_clean += size;
+ else
+ mss->shared_clean += size;
+ }
+}
+
static void smaps_account(struct mem_size_stats *mss, struct page *page,
bool compound, bool young, bool dirty, bool locked)
{
int i, nr = compound ? 1 << compound_order(page) : 1;
unsigned long size = nr * PAGE_SIZE;
+ /*
+ * First accumulate quantities that depend only on |size| and the type
+ * of the compound page.
+ */
if (PageAnon(page)) {
mss->anonymous += size;
if (!PageSwapBacked(page) && !dirty && !PageDirty(page))
@@ -440,42 +480,25 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
mss->referenced += size;
/*
+ * Then accumulate quantities that may depend on sharing, or that may
+ * differ page-by-page.
+ *
* page_count(page) == 1 guarantees the page is mapped exactly once.
* If any subpage of the compound page mapped with PTE it would elevate
* page_count().
*/
if (page_count(page) == 1) {
- if (dirty || PageDirty(page))
- mss->private_dirty += size;
- else
- mss->private_clean += size;
- mss->pss += (u64)size << PSS_SHIFT;
- if (locked)
- mss->pss_locked += (u64)size << PSS_SHIFT;
+ smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty,
+ locked, true);
return;
}
-
for (i = 0; i < nr; i++, page++) {
int mapcount = page_mapcount(page);
- unsigned long pss = (PAGE_SIZE << PSS_SHIFT);
-
- if (mapcount >= 2) {
- if (dirty || PageDirty(page))
- mss->shared_dirty += PAGE_SIZE;
- else
- mss->shared_clean += PAGE_SIZE;
- mss->pss += pss / mapcount;
- if (locked)
- mss->pss_locked += pss / mapcount;
- } else {
- if (dirty || PageDirty(page))
- mss->private_dirty += PAGE_SIZE;
- else
- mss->private_clean += PAGE_SIZE;
- mss->pss += pss;
- if (locked)
- mss->pss_locked += pss;
- }
+ unsigned long pss = PAGE_SIZE << PSS_SHIFT;
+ if (mapcount >= 2)
+ pss /= mapcount;
+ smaps_page_accumulate(mss, page, PAGE_SIZE, pss, dirty, locked,
+ mapcount < 2);
}
}
@@ -754,10 +777,23 @@ static void smap_gather_stats(struct vm_area_struct *vma,
seq_put_decimal_ull_width(m, str, (val) >> 10, 8)
/* Show the contents common for smaps and smaps_rollup */
-static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss)
+static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss,
+ bool rollup_mode)
{
SEQ_PUT_DEC("Rss: ", mss->resident);
SEQ_PUT_DEC(" kB\nPss: ", mss->pss >> PSS_SHIFT);
+ if (rollup_mode) {
+ /*
+ * These are meaningful only for smaps_rollup, otherwise two of
+ * them are zero, and the other one is the same as Pss.
+ */
+ SEQ_PUT_DEC(" kB\nPss_Anon: ",
+ mss->pss_anon >> PSS_SHIFT);
+ SEQ_PUT_DEC(" kB\nPss_File: ",
+ mss->pss_file >> PSS_SHIFT);
+ SEQ_PUT_DEC(" kB\nPss_Shmem: ",
+ mss->pss_shmem >> PSS_SHIFT);
+ }
SEQ_PUT_DEC(" kB\nShared_Clean: ", mss->shared_clean);
SEQ_PUT_DEC(" kB\nShared_Dirty: ", mss->shared_dirty);
SEQ_PUT_DEC(" kB\nPrivate_Clean: ", mss->private_clean);
@@ -794,7 +830,7 @@ static int show_smap(struct seq_file *m, void *v)
SEQ_PUT_DEC(" kB\nMMUPageSize: ", vma_mmu_pagesize(vma));
seq_puts(m, " kB\n");
- __show_smap(m, &mss);
+ __show_smap(m, &mss, false);
seq_printf(m, "THPeligible: %d\n", transparent_hugepage_enabled(vma));
@@ -828,7 +864,10 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
memset(&mss, 0, sizeof(mss));
- down_read(&mm->mmap_sem);
+ ret = down_read_killable(&mm->mmap_sem);
+ if (ret)
+ goto out_put_mm;
+
hold_task_mempolicy(priv);
for (vma = priv->mm->mmap; vma; vma = vma->vm_next) {
@@ -841,12 +880,13 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
seq_pad(m, ' ');
seq_puts(m, "[rollup]\n");
- __show_smap(m, &mss);
+ __show_smap(m, &mss, true);
release_task_mempolicy(priv);
up_read(&mm->mmap_sem);
- mmput(mm);
+out_put_mm:
+ mmput(mm);
out_put_task:
put_task_struct(priv->task);
priv->task = NULL;
@@ -1132,7 +1172,10 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
goto out_mm;
}
- down_read(&mm->mmap_sem);
+ if (down_read_killable(&mm->mmap_sem)) {
+ count = -EINTR;
+ goto out_mm;
+ }
tlb_gather_mmu(&tlb, mm, 0, -1);
if (type == CLEAR_REFS_SOFT_DIRTY) {
for (vma = mm->mmap; vma; vma = vma->vm_next) {
@@ -1279,7 +1322,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
if (pm->show_pfn)
frame = pte_pfn(pte);
flags |= PM_PRESENT;
- page = _vm_normal_page(vma, addr, pte, true);
+ page = vm_normal_page(vma, addr, pte);
if (pte_soft_dirty(pte))
flags |= PM_SOFT_DIRTY;
} else if (is_swap_pte(pte)) {
@@ -1539,7 +1582,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
/* overflow ? */
if (end < start_vaddr || end > end_vaddr)
end = end_vaddr;
- down_read(&mm->mmap_sem);
+ ret = down_read_killable(&mm->mmap_sem);
+ if (ret)
+ goto out_free;
ret = walk_page_range(start_vaddr, end, &pagemap_walk);
up_read(&mm->mmap_sem);
start_vaddr = end;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 36bf0f2e102e..7907e6419e57 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -211,7 +211,11 @@ static void *m_start(struct seq_file *m, loff_t *pos)
if (!mm || !mmget_not_zero(mm))
return NULL;
- down_read(&mm->mmap_sem);
+ if (down_read_killable(&mm->mmap_sem)) {
+ mmput(mm);
+ return ERR_PTR(-EINTR);
+ }
+
/* start from the Nth VMA */
for (p = rb_first(&mm->mm_rb); p; p = rb_next(p))
if (n-- == 0)
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 7bb96fdd38ad..57957c91c6df 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -166,7 +166,7 @@ void __weak elfcorehdr_free(unsigned long long addr)
*/
ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
{
- return read_from_oldmem(buf, count, ppos, 0, false);
+ return read_from_oldmem(buf, count, ppos, 0, sev_active());
}
/*
@@ -174,7 +174,7 @@ ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
*/
ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
{
- return read_from_oldmem(buf, count, ppos, 0, sme_active());
+ return read_from_oldmem(buf, count, ppos, 0, mem_encrypt_active());
}
/*
@@ -374,7 +374,7 @@ static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos,
buflen);
start = m->paddr + *fpos - m->offset;
tmp = read_from_oldmem(buffer, tsz, &start,
- userbuf, sme_active());
+ userbuf, mem_encrypt_active());
if (tmp < 0)
return tmp;
buflen -= tsz;
diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c
index 8e0a17ce3180..bfbfc2698070 100644
--- a/fs/pstore/ftrace.c
+++ b/fs/pstore/ftrace.c
@@ -112,27 +112,13 @@ static struct dentry *pstore_ftrace_dir;
void pstore_register_ftrace(void)
{
- struct dentry *file;
-
if (!psinfo->write)
return;
pstore_ftrace_dir = debugfs_create_dir("pstore", NULL);
- if (!pstore_ftrace_dir) {
- pr_err("%s: unable to create pstore directory\n", __func__);
- return;
- }
-
- file = debugfs_create_file("record_ftrace", 0600, pstore_ftrace_dir,
- NULL, &pstore_knob_fops);
- if (!file) {
- pr_err("%s: unable to create record_ftrace file\n", __func__);
- goto err_file;
- }
- return;
-err_file:
- debugfs_remove(pstore_ftrace_dir);
+ debugfs_create_file("record_ftrace", 0600, pstore_ftrace_dir, NULL,
+ &pstore_knob_fops);
}
void pstore_unregister_ftrace(void)
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 89a80b568a17..7fbe8f058220 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -318,22 +318,21 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record)
goto fail;
inode->i_mode = S_IFREG | 0444;
inode->i_fop = &pstore_file_operations;
- private = kzalloc(sizeof(*private), GFP_KERNEL);
- if (!private)
- goto fail_alloc;
- private->record = record;
-
scnprintf(name, sizeof(name), "%s-%s-%llu%s",
pstore_type_to_name(record->type),
record->psi->name, record->id,
record->compressed ? ".enc.z" : "");
+ private = kzalloc(sizeof(*private), GFP_KERNEL);
+ if (!private)
+ goto fail_inode;
+
dentry = d_alloc_name(root, name);
if (!dentry)
goto fail_private;
+ private->record = record;
inode->i_size = private->total_size = size;
-
inode->i_private = private;
if (record->time.tv_sec)
@@ -349,7 +348,7 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record)
fail_private:
free_pstore_private(private);
-fail_alloc:
+fail_inode:
iput(inode);
fail:
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 5b7709894415..2bb3468fc93a 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -655,6 +655,7 @@ static int ramoops_parse_dt(struct platform_device *pdev,
struct ramoops_platform_data *pdata)
{
struct device_node *of_node = pdev->dev.of_node;
+ struct device_node *parent_node;
struct resource *res;
u32 value;
int ret;
@@ -689,6 +690,26 @@ static int ramoops_parse_dt(struct platform_device *pdev,
#undef parse_size
+ /*
+ * Some old Chromebooks relied on the kernel setting the
+ * console_size and pmsg_size to the record size since that's
+ * what the downstream kernel did. These same Chromebooks had
+ * "ramoops" straight under the root node which isn't
+ * according to the current upstream bindings (though it was
+ * arguably acceptable under a prior version of the bindings).
+ * Let's make those old Chromebooks work by detecting that
+ * we're not a child of "reserved-memory" and mimicking the
+ * expected behavior.
+ */
+ parent_node = of_get_parent(of_node);
+ if (!of_node_name_eq(parent_node, "reserved-memory") &&
+ !pdata->console_size && !pdata->ftrace_size &&
+ !pdata->pmsg_size && !pdata->ecc_info.ecc_size) {
+ pdata->console_size = pdata->record_size;
+ pdata->pmsg_size = pdata->record_size;
+ }
+ of_node_put(parent_node);
+
return 0;
}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 58f15a083dd1..be9c471cdbc8 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -223,9 +223,9 @@ static void put_quota_format(struct quota_format_type *fmt)
/*
* Dquot List Management:
- * The quota code uses three lists for dquot management: the inuse_list,
- * free_dquots, and dquot_hash[] array. A single dquot structure may be
- * on all three lists, depending on its current state.
+ * The quota code uses four lists for dquot management: the inuse_list,
+ * free_dquots, dqi_dirty_list, and dquot_hash[] array. A single dquot
+ * structure may be on some of those lists, depending on its current state.
*
* All dquots are placed to the end of inuse_list when first created, and this
* list is used for invalidate operation, which must look at every dquot.
@@ -236,6 +236,11 @@ static void put_quota_format(struct quota_format_type *fmt)
* dqstats.free_dquots gives the number of dquots on the list. When
* dquot is invalidated it's completely released from memory.
*
+ * Dirty dquots are added to the dqi_dirty_list of quota_info when mark
+ * dirtied, and this list is searched when writing dirty dquots back to
+ * quota file. Note that some filesystems do dirty dquot tracking on their
+ * own (e.g. in a journal) and thus don't use dqi_dirty_list.
+ *
* Dquots with a specific identity (device, type and id) are placed on
* one of the dquot_hash[] hash chains. The provides an efficient search
* mechanism to locate a specific dquot.
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index fd5dd806f1b9..cb13fb76dbee 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -331,9 +331,9 @@ static int quota_state_to_flags(struct qc_state *state)
return flags;
}
-static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs)
+static int quota_getstate(struct super_block *sb, int type,
+ struct fs_quota_stat *fqs)
{
- int type;
struct qc_state state;
int ret;
@@ -349,14 +349,7 @@ static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs)
if (!fqs->qs_flags)
return -ENOSYS;
fqs->qs_incoredqs = state.s_incoredqs;
- /*
- * GETXSTATE quotactl has space for just one set of time limits so
- * report them for the first enabled quota type
- */
- for (type = 0; type < MAXQUOTAS; type++)
- if (state.s_state[type].flags & QCI_ACCT_ENABLED)
- break;
- BUG_ON(type == MAXQUOTAS);
+
fqs->qs_btimelimit = state.s_state[type].spc_timelimit;
fqs->qs_itimelimit = state.s_state[type].ino_timelimit;
fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit;
@@ -391,22 +384,22 @@ static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs)
return 0;
}
-static int quota_getxstate(struct super_block *sb, void __user *addr)
+static int quota_getxstate(struct super_block *sb, int type, void __user *addr)
{
struct fs_quota_stat fqs;
int ret;
if (!sb->s_qcop->get_state)
return -ENOSYS;
- ret = quota_getstate(sb, &fqs);
+ ret = quota_getstate(sb, type, &fqs);
if (!ret && copy_to_user(addr, &fqs, sizeof(fqs)))
return -EFAULT;
return ret;
}
-static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs)
+static int quota_getstatev(struct super_block *sb, int type,
+ struct fs_quota_statv *fqs)
{
- int type;
struct qc_state state;
int ret;
@@ -422,14 +415,7 @@ static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs)
if (!fqs->qs_flags)
return -ENOSYS;
fqs->qs_incoredqs = state.s_incoredqs;
- /*
- * GETXSTATV quotactl has space for just one set of time limits so
- * report them for the first enabled quota type
- */
- for (type = 0; type < MAXQUOTAS; type++)
- if (state.s_state[type].flags & QCI_ACCT_ENABLED)
- break;
- BUG_ON(type == MAXQUOTAS);
+
fqs->qs_btimelimit = state.s_state[type].spc_timelimit;
fqs->qs_itimelimit = state.s_state[type].ino_timelimit;
fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit;
@@ -455,7 +441,7 @@ static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs)
return 0;
}
-static int quota_getxstatev(struct super_block *sb, void __user *addr)
+static int quota_getxstatev(struct super_block *sb, int type, void __user *addr)
{
struct fs_quota_statv fqs;
int ret;
@@ -474,7 +460,7 @@ static int quota_getxstatev(struct super_block *sb, void __user *addr)
default:
return -EINVAL;
}
- ret = quota_getstatev(sb, &fqs);
+ ret = quota_getstatev(sb, type, &fqs);
if (!ret && copy_to_user(addr, &fqs, sizeof(fqs)))
return -EFAULT;
return ret;
@@ -744,9 +730,9 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
case Q_XQUOTARM:
return quota_rmxquota(sb, addr);
case Q_XGETQSTAT:
- return quota_getxstate(sb, addr);
+ return quota_getxstate(sb, type, addr);
case Q_XGETQSTATV:
- return quota_getxstatev(sb, addr);
+ return quota_getxstatev(sb, type, addr);
case Q_XSETQLIM:
return quota_setxquota(sb, type, id, addr);
case Q_XGETQUOTA:
diff --git a/fs/read_write.c b/fs/read_write.c
index c543d965e288..1f5088dec566 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1565,6 +1565,58 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
}
#endif
+/**
+ * generic_copy_file_range - copy data between two files
+ * @file_in: file structure to read from
+ * @pos_in: file offset to read from
+ * @file_out: file structure to write data to
+ * @pos_out: file offset to write data to
+ * @len: amount of data to copy
+ * @flags: copy flags
+ *
+ * This is a generic filesystem helper to copy data from one file to another.
+ * It has no constraints on the source or destination file owners - the files
+ * can belong to different superblocks and different filesystem types. Short
+ * copies are allowed.
+ *
+ * This should be called from the @file_out filesystem, as per the
+ * ->copy_file_range() method.
+ *
+ * Returns the number of bytes copied or a negative error indicating the
+ * failure.
+ */
+
+ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t len, unsigned int flags)
+{
+ return do_splice_direct(file_in, &pos_in, file_out, &pos_out,
+ len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
+}
+EXPORT_SYMBOL(generic_copy_file_range);
+
+static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t len, unsigned int flags)
+{
+ /*
+ * Although we now allow filesystems to handle cross sb copy, passing
+ * a file of the wrong filesystem type to filesystem driver can result
+ * in an attempt to dereference the wrong type of ->private_data, so
+ * avoid doing that until we really have a good reason. NFS defines
+ * several different file_system_type structures, but they all end up
+ * using the same ->copy_file_range() function pointer.
+ */
+ if (file_out->f_op->copy_file_range &&
+ file_out->f_op->copy_file_range == file_in->f_op->copy_file_range)
+ return file_out->f_op->copy_file_range(file_in, pos_in,
+ file_out, pos_out,
+ len, flags);
+
+ return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len,
+ flags);
+}
+
/*
* copy_file_range() differs from regular file read and write in that it
* specifically allows return partial success. When it does so is up to
@@ -1574,17 +1626,15 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t len, unsigned int flags)
{
- struct inode *inode_in = file_inode(file_in);
- struct inode *inode_out = file_inode(file_out);
ssize_t ret;
if (flags != 0)
return -EINVAL;
- if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
- return -EISDIR;
- if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
- return -EINVAL;
+ ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
+ flags);
+ if (unlikely(ret))
+ return ret;
ret = rw_verify_area(READ, file_in, &pos_in, len);
if (unlikely(ret))
@@ -1594,15 +1644,6 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
if (unlikely(ret))
return ret;
- if (!(file_in->f_mode & FMODE_READ) ||
- !(file_out->f_mode & FMODE_WRITE) ||
- (file_out->f_flags & O_APPEND))
- return -EBADF;
-
- /* this could be relaxed once a method supports cross-fs copies */
- if (inode_in->i_sb != inode_out->i_sb)
- return -EXDEV;
-
if (len == 0)
return 0;
@@ -1612,7 +1653,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
* Try cloning first, this is supported by more file systems, and
* more efficient if both clone and copy are supported (e.g. NFS).
*/
- if (file_in->f_op->remap_file_range) {
+ if (file_in->f_op->remap_file_range &&
+ file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
loff_t cloned;
cloned = file_in->f_op->remap_file_range(file_in, pos_in,
@@ -1625,16 +1667,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
}
}
- if (file_out->f_op->copy_file_range) {
- ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out,
- pos_out, len, flags);
- if (ret != -EOPNOTSUPP)
- goto done;
- }
-
- ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
- len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
-
+ ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len,
+ flags);
+ WARN_ON_ONCE(ret == -EOPNOTSUPP);
done:
if (ret > 0) {
fsnotify_access(file_in);
@@ -1951,25 +1986,10 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
return ret;
/* If can't alter the file contents, we're done. */
- if (!(remap_flags & REMAP_FILE_DEDUP)) {
- /* Update the timestamps, since we can alter file contents. */
- if (!(file_out->f_mode & FMODE_NOCMTIME)) {
- ret = file_update_time(file_out);
- if (ret)
- return ret;
- }
-
- /*
- * Clear the security bits if the process is not being run by
- * root. This keeps people from modifying setuid and setgid
- * binaries.
- */
- ret = file_remove_privs(file_out);
- if (ret)
- return ret;
- }
+ if (!(remap_flags & REMAP_FILE_DEDUP))
+ ret = file_modified(file_out);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(generic_remap_file_range_prep);
@@ -1977,29 +1997,21 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t len, unsigned int remap_flags)
{
- struct inode *inode_in = file_inode(file_in);
- struct inode *inode_out = file_inode(file_out);
loff_t ret;
WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP);
- if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
- return -EISDIR;
- if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
- return -EINVAL;
-
/*
* FICLONE/FICLONERANGE ioctls enforce that src and dest files are on
* the same mount. Practically, they only need to be on the same file
* system.
*/
- if (inode_in->i_sb != inode_out->i_sb)
+ if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
return -EXDEV;
- if (!(file_in->f_mode & FMODE_READ) ||
- !(file_out->f_mode & FMODE_WRITE) ||
- (file_out->f_flags & O_APPEND))
- return -EBADF;
+ ret = generic_file_rw_checks(file_in, file_out);
+ if (ret < 0)
+ return ret;
if (!file_in->f_op->remap_file_range)
return -EOPNOTSUPP;
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index acbbaf7a0bb2..45e1a5d11af3 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -74,13 +74,11 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
err = -EPERM;
goto setflags_out;
}
- if (((flags ^ REISERFS_I(inode)->
- i_attrs) & (REISERFS_IMMUTABLE_FL |
- REISERFS_APPEND_FL))
- && !capable(CAP_LINUX_IMMUTABLE)) {
- err = -EPERM;
+ err = vfs_ioc_setflags_prepare(inode,
+ REISERFS_I(inode)->i_attrs,
+ flags);
+ if (err)
goto setflags_out;
- }
if ((flags & REISERFS_NOTAIL_FL) &&
S_ISREG(inode->i_mode)) {
int result;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index abe27ec43176..04f09689cd6d 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -384,6 +384,17 @@ void seq_escape(struct seq_file *m, const char *s, const char *esc)
}
EXPORT_SYMBOL(seq_escape);
+void seq_escape_mem_ascii(struct seq_file *m, const char *src, size_t isz)
+{
+ char *buf;
+ size_t size = seq_get_buf(m, &buf);
+ int ret;
+
+ ret = string_escape_mem_ascii(src, isz, buf, size);
+ seq_commit(m, ret < size ? ret : -1);
+}
+EXPORT_SYMBOL(seq_escape_mem_ascii);
+
void seq_vprintf(struct seq_file *m, const char *f, va_list args)
{
int len;
diff --git a/fs/splice.c b/fs/splice.c
index 14cb602d9a2f..98412721f056 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1356,7 +1356,7 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov,
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
struct iov_iter iter;
- long error;
+ ssize_t error;
struct fd f;
int type;
@@ -1367,7 +1367,7 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov,
error = import_iovec(type, uiov, nr_segs,
ARRAY_SIZE(iovstack), &iov, &iter);
- if (!error) {
+ if (error >= 0) {
error = do_vmsplice(f.file, &iter, flags);
kfree(iov);
}
@@ -1382,7 +1382,7 @@ COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, io
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
struct iov_iter iter;
- long error;
+ ssize_t error;
struct fd f;
int type;
@@ -1393,7 +1393,7 @@ COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, io
error = compat_import_iovec(type, iov32, nr_segs,
ARRAY_SIZE(iovstack), &iov, &iter);
- if (!error) {
+ if (error >= 0) {
error = do_vmsplice(f.file, &iter, flags);
kfree(iov);
}
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 57038604d4a8..d41c21fef138 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -175,6 +175,26 @@ int sysfs_create_group(struct kobject *kobj,
}
EXPORT_SYMBOL_GPL(sysfs_create_group);
+static int internal_create_groups(struct kobject *kobj, int update,
+ const struct attribute_group **groups)
+{
+ int error = 0;
+ int i;
+
+ if (!groups)
+ return 0;
+
+ for (i = 0; groups[i]; i++) {
+ error = internal_create_group(kobj, update, groups[i]);
+ if (error) {
+ while (--i >= 0)
+ sysfs_remove_group(kobj, groups[i]);
+ break;
+ }
+ }
+ return error;
+}
+
/**
* sysfs_create_groups - given a directory kobject, create a bunch of attribute groups
* @kobj: The kobject to create the group on
@@ -191,25 +211,29 @@ EXPORT_SYMBOL_GPL(sysfs_create_group);
int sysfs_create_groups(struct kobject *kobj,
const struct attribute_group **groups)
{
- int error = 0;
- int i;
-
- if (!groups)
- return 0;
-
- for (i = 0; groups[i]; i++) {
- error = sysfs_create_group(kobj, groups[i]);
- if (error) {
- while (--i >= 0)
- sysfs_remove_group(kobj, groups[i]);
- break;
- }
- }
- return error;
+ return internal_create_groups(kobj, 0, groups);
}
EXPORT_SYMBOL_GPL(sysfs_create_groups);
/**
+ * sysfs_update_groups - given a directory kobject, create a bunch of attribute groups
+ * @kobj: The kobject to update the group on
+ * @groups: The attribute groups to update, NULL terminated
+ *
+ * This function update a bunch of attribute groups. If an error occurs when
+ * updating a group, all previously updated groups will be removed together
+ * with already existing (not updated) attributes.
+ *
+ * Returns 0 on success or error code from sysfs_update_group on failure.
+ */
+int sysfs_update_groups(struct kobject *kobj,
+ const struct attribute_group **groups)
+{
+ return internal_create_groups(kobj, 1, groups);
+}
+EXPORT_SYMBOL_GPL(sysfs_update_groups);
+
+/**
* sysfs_update_group - given a directory kobject, update an attribute group
* @kobj: The kobject to update the group on
* @grp: The attribute group to update
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index a5bab190a297..eeeae0475da9 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -505,9 +505,12 @@ static int __tracefs_remove(struct dentry *dentry, struct dentry *parent)
switch (dentry->d_inode->i_mode & S_IFMT) {
case S_IFDIR:
ret = simple_rmdir(parent->d_inode, dentry);
+ if (!ret)
+ fsnotify_rmdir(parent->d_inode, dentry);
break;
default:
simple_unlink(parent->d_inode, dentry);
+ fsnotify_unlink(parent->d_inode, dentry);
break;
}
if (!ret)
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig
index 06c35c64162b..69932bcfa920 100644
--- a/fs/ubifs/Kconfig
+++ b/fs/ubifs/Kconfig
@@ -6,8 +6,10 @@ config UBIFS_FS
select CRYPTO if UBIFS_FS_ADVANCED_COMPR
select CRYPTO if UBIFS_FS_LZO
select CRYPTO if UBIFS_FS_ZLIB
+ select CRYPTO if UBIFS_FS_ZSTD
select CRYPTO_LZO if UBIFS_FS_LZO
select CRYPTO_DEFLATE if UBIFS_FS_ZLIB
+ select CRYPTO_ZSTD if UBIFS_FS_ZSTD
select CRYPTO_HASH_INFO
select UBIFS_FS_XATTR if FS_ENCRYPTION
depends on MTD_UBI
@@ -38,6 +40,14 @@ config UBIFS_FS_ZLIB
help
Zlib compresses better than LZO but it is slower. Say 'Y' if unsure.
+config UBIFS_FS_ZSTD
+ bool "ZSTD compression support" if UBIFS_FS_ADVANCED_COMPR
+ depends on UBIFS_FS
+ default y
+ help
+ ZSTD compresses is a big win in speed over Zlib and
+ in compression ratio over LZO. Say 'Y' if unsure.
+
config UBIFS_ATIME_SUPPORT
bool "Access time support"
default n
@@ -77,8 +87,9 @@ config UBIFS_FS_SECURITY
config UBIFS_FS_AUTHENTICATION
bool "UBIFS authentication support"
- depends on KEYS
+ select KEYS
select CRYPTO_HMAC
+ select SYSTEM_DATA_VERIFICATION
help
Enable authentication support for UBIFS. This feature offers protection
against offline changes for both data and metadata of the filesystem.
diff --git a/fs/ubifs/auth.c b/fs/ubifs/auth.c
index 38718026ad0b..d9af2de9084a 100644
--- a/fs/ubifs/auth.c
+++ b/fs/ubifs/auth.c
@@ -10,10 +10,12 @@
*/
#include <linux/crypto.h>
+#include <linux/verification.h>
#include <crypto/hash.h>
#include <crypto/sha.h>
#include <crypto/algapi.h>
#include <keys/user-type.h>
+#include <keys/asymmetric-type.h>
#include "ubifs.h"
@@ -199,6 +201,77 @@ int __ubifs_node_check_hash(const struct ubifs_info *c, const void *node,
}
/**
+ * ubifs_sb_verify_signature - verify the signature of a superblock
+ * @c: UBIFS file-system description object
+ * @sup: The superblock node
+ *
+ * To support offline signed images the superblock can be signed with a
+ * PKCS#7 signature. The signature is placed directly behind the superblock
+ * node in an ubifs_sig_node.
+ *
+ * Returns 0 when the signature can be successfully verified or a negative
+ * error code if not.
+ */
+int ubifs_sb_verify_signature(struct ubifs_info *c,
+ const struct ubifs_sb_node *sup)
+{
+ int err;
+ struct ubifs_scan_leb *sleb;
+ struct ubifs_scan_node *snod;
+ const struct ubifs_sig_node *signode;
+
+ sleb = ubifs_scan(c, UBIFS_SB_LNUM, UBIFS_SB_NODE_SZ, c->sbuf, 0);
+ if (IS_ERR(sleb)) {
+ err = PTR_ERR(sleb);
+ return err;
+ }
+
+ if (sleb->nodes_cnt == 0) {
+ ubifs_err(c, "Unable to find signature node");
+ err = -EINVAL;
+ goto out_destroy;
+ }
+
+ snod = list_first_entry(&sleb->nodes, struct ubifs_scan_node, list);
+
+ if (snod->type != UBIFS_SIG_NODE) {
+ ubifs_err(c, "Signature node is of wrong type");
+ err = -EINVAL;
+ goto out_destroy;
+ }
+
+ signode = snod->node;
+
+ if (le32_to_cpu(signode->len) > snod->len + sizeof(struct ubifs_sig_node)) {
+ ubifs_err(c, "invalid signature len %d", le32_to_cpu(signode->len));
+ err = -EINVAL;
+ goto out_destroy;
+ }
+
+ if (le32_to_cpu(signode->type) != UBIFS_SIGNATURE_TYPE_PKCS7) {
+ ubifs_err(c, "Signature type %d is not supported\n",
+ le32_to_cpu(signode->type));
+ err = -EINVAL;
+ goto out_destroy;
+ }
+
+ err = verify_pkcs7_signature(sup, sizeof(struct ubifs_sb_node),
+ signode->sig, le32_to_cpu(signode->len),
+ NULL, VERIFYING_UNSPECIFIED_SIGNATURE,
+ NULL, NULL);
+
+ if (err)
+ ubifs_err(c, "Failed to verify signature");
+ else
+ ubifs_msg(c, "Successfully verified super block signature");
+
+out_destroy:
+ ubifs_scan_destroy(sleb);
+
+ return err;
+}
+
+/**
* ubifs_init_authentication - initialize UBIFS authentication support
* @c: UBIFS file-system description object
*
@@ -227,7 +300,7 @@ int ubifs_init_authentication(struct ubifs_info *c)
snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
c->auth_hash_name);
- keyring_key = request_key(&key_type_logon, c->auth_key_name, NULL, NULL);
+ keyring_key = request_key(&key_type_logon, c->auth_key_name, NULL);
if (IS_ERR(keyring_key)) {
ubifs_err(c, "Failed to request key: %ld",
@@ -478,3 +551,16 @@ int ubifs_hmac_wkm(struct ubifs_info *c, u8 *hmac)
return err;
return 0;
}
+
+/*
+ * ubifs_hmac_zero - test if a HMAC is zero
+ * @c: UBIFS file-system description object
+ * @hmac: the HMAC to test
+ *
+ * This function tests if a HMAC is zero and returns true if it is
+ * and false otherwise.
+ */
+bool ubifs_hmac_zero(struct ubifs_info *c, const u8 *hmac)
+{
+ return !memchr_inv(hmac, 0, c->hmac_desc_len);
+}
diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c
index 99c53ad11e93..3a92e6af69b2 100644
--- a/fs/ubifs/compress.c
+++ b/fs/ubifs/compress.c
@@ -59,6 +59,24 @@ static struct ubifs_compressor zlib_compr = {
};
#endif
+#ifdef CONFIG_UBIFS_FS_ZSTD
+static DEFINE_MUTEX(zstd_enc_mutex);
+static DEFINE_MUTEX(zstd_dec_mutex);
+
+static struct ubifs_compressor zstd_compr = {
+ .compr_type = UBIFS_COMPR_ZSTD,
+ .comp_mutex = &zstd_enc_mutex,
+ .decomp_mutex = &zstd_dec_mutex,
+ .name = "zstd",
+ .capi_name = "zstd",
+};
+#else
+static struct ubifs_compressor zstd_compr = {
+ .compr_type = UBIFS_COMPR_ZSTD,
+ .name = "zstd",
+};
+#endif
+
/* All UBIFS compressors */
struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
@@ -216,13 +234,19 @@ int __init ubifs_compressors_init(void)
if (err)
return err;
- err = compr_init(&zlib_compr);
+ err = compr_init(&zstd_compr);
if (err)
goto out_lzo;
+ err = compr_init(&zlib_compr);
+ if (err)
+ goto out_zstd;
+
ubifs_compressors[UBIFS_COMPR_NONE] = &none_compr;
return 0;
+out_zstd:
+ compr_exit(&zstd_compr);
out_lzo:
compr_exit(&lzo_compr);
return err;
@@ -235,4 +259,5 @@ void ubifs_compressors_exit(void)
{
compr_exit(&lzo_compr);
compr_exit(&zlib_compr);
+ compr_exit(&zstd_compr);
}
diff --git a/fs/ubifs/crypto.c b/fs/ubifs/crypto.c
index 4aaedf2d7f44..22be7aeb96c4 100644
--- a/fs/ubifs/crypto.c
+++ b/fs/ubifs/crypto.c
@@ -29,8 +29,8 @@ int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn,
{
struct ubifs_info *c = inode->i_sb->s_fs_info;
void *p = &dn->data;
- struct page *ret;
unsigned int pad_len = round_up(in_len, UBIFS_CIPHER_BLOCK_SIZE);
+ int err;
ubifs_assert(c, pad_len <= *out_len);
dn->compr_size = cpu_to_le16(in_len);
@@ -39,11 +39,11 @@ int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn,
if (pad_len != in_len)
memset(p + in_len, 0, pad_len - in_len);
- ret = fscrypt_encrypt_page(inode, virt_to_page(&dn->data), pad_len,
- offset_in_page(&dn->data), block, GFP_NOFS);
- if (IS_ERR(ret)) {
- ubifs_err(c, "fscrypt_encrypt_page failed: %ld", PTR_ERR(ret));
- return PTR_ERR(ret);
+ err = fscrypt_encrypt_block_inplace(inode, virt_to_page(p), pad_len,
+ offset_in_page(p), block, GFP_NOFS);
+ if (err) {
+ ubifs_err(c, "fscrypt_encrypt_block_inplace() failed: %d", err);
+ return err;
}
*out_len = pad_len;
@@ -64,10 +64,11 @@ int ubifs_decrypt(const struct inode *inode, struct ubifs_data_node *dn,
}
ubifs_assert(c, dlen <= UBIFS_BLOCK_SIZE);
- err = fscrypt_decrypt_page(inode, virt_to_page(&dn->data), dlen,
- offset_in_page(&dn->data), block);
+ err = fscrypt_decrypt_block_inplace(inode, virt_to_page(&dn->data),
+ dlen, offset_in_page(&dn->data),
+ block);
if (err) {
- ubifs_err(c, "fscrypt_decrypt_page failed: %i", err);
+ ubifs_err(c, "fscrypt_decrypt_block_inplace() failed: %d", err);
return err;
}
*out_len = clen;
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 138c5b07d803..a5f10d79e0dd 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -2800,115 +2800,69 @@ static const struct file_operations dfs_fops = {
* dbg_debugfs_init_fs - initialize debugfs for UBIFS instance.
* @c: UBIFS file-system description object
*
- * This function creates all debugfs files for this instance of UBIFS. Returns
- * zero in case of success and a negative error code in case of failure.
+ * This function creates all debugfs files for this instance of UBIFS.
*
* Note, the only reason we have not merged this function with the
* 'ubifs_debugging_init()' function is because it is better to initialize
* debugfs interfaces at the very end of the mount process, and remove them at
* the very beginning of the mount process.
*/
-int dbg_debugfs_init_fs(struct ubifs_info *c)
+void dbg_debugfs_init_fs(struct ubifs_info *c)
{
- int err, n;
+ int n;
const char *fname;
- struct dentry *dent;
struct ubifs_debug_info *d = c->dbg;
- if (!IS_ENABLED(CONFIG_DEBUG_FS))
- return 0;
-
n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME,
c->vi.ubi_num, c->vi.vol_id);
if (n == UBIFS_DFS_DIR_LEN) {
/* The array size is too small */
fname = UBIFS_DFS_DIR_NAME;
- dent = ERR_PTR(-EINVAL);
- goto out;
+ return;
}
fname = d->dfs_dir_name;
- dent = debugfs_create_dir(fname, dfs_rootdir);
- if (IS_ERR_OR_NULL(dent))
- goto out;
- d->dfs_dir = dent;
+ d->dfs_dir = debugfs_create_dir(fname, dfs_rootdir);
fname = "dump_lprops";
- dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- d->dfs_dump_lprops = dent;
+ d->dfs_dump_lprops = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c,
+ &dfs_fops);
fname = "dump_budg";
- dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- d->dfs_dump_budg = dent;
+ d->dfs_dump_budg = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c,
+ &dfs_fops);
fname = "dump_tnc";
- dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- d->dfs_dump_tnc = dent;
+ d->dfs_dump_tnc = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c,
+ &dfs_fops);
fname = "chk_general";
- dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
- &dfs_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- d->dfs_chk_gen = dent;
+ d->dfs_chk_gen = debugfs_create_file(fname, S_IRUSR | S_IWUSR,
+ d->dfs_dir, c, &dfs_fops);
fname = "chk_index";
- dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
- &dfs_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- d->dfs_chk_index = dent;
+ d->dfs_chk_index = debugfs_create_file(fname, S_IRUSR | S_IWUSR,
+ d->dfs_dir, c, &dfs_fops);
fname = "chk_orphans";
- dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
- &dfs_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- d->dfs_chk_orph = dent;
+ d->dfs_chk_orph = debugfs_create_file(fname, S_IRUSR | S_IWUSR,
+ d->dfs_dir, c, &dfs_fops);
fname = "chk_lprops";
- dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
- &dfs_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- d->dfs_chk_lprops = dent;
+ d->dfs_chk_lprops = debugfs_create_file(fname, S_IRUSR | S_IWUSR,
+ d->dfs_dir, c, &dfs_fops);
fname = "chk_fs";
- dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
- &dfs_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- d->dfs_chk_fs = dent;
+ d->dfs_chk_fs = debugfs_create_file(fname, S_IRUSR | S_IWUSR,
+ d->dfs_dir, c, &dfs_fops);
fname = "tst_recovery";
- dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
- &dfs_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- d->dfs_tst_rcvry = dent;
+ d->dfs_tst_rcvry = debugfs_create_file(fname, S_IRUSR | S_IWUSR,
+ d->dfs_dir, c, &dfs_fops);
fname = "ro_error";
- dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
- &dfs_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- d->dfs_ro_error = dent;
-
- return 0;
-
-out_remove:
- debugfs_remove_recursive(d->dfs_dir);
-out:
- err = dent ? PTR_ERR(dent) : -ENODEV;
- ubifs_err(c, "cannot create \"%s\" debugfs file or directory, error %d\n",
- fname, err);
- return err;
+ d->dfs_ro_error = debugfs_create_file(fname, S_IRUSR | S_IWUSR,
+ d->dfs_dir, c, &dfs_fops);
}
/**
@@ -2917,8 +2871,7 @@ out:
*/
void dbg_debugfs_exit_fs(struct ubifs_info *c)
{
- if (IS_ENABLED(CONFIG_DEBUG_FS))
- debugfs_remove_recursive(c->dbg->dfs_dir);
+ debugfs_remove_recursive(c->dbg->dfs_dir);
}
struct ubifs_global_debug_info ubifs_dbg;
@@ -2994,75 +2947,38 @@ static const struct file_operations dfs_global_fops = {
*
* UBIFS uses debugfs file-system to expose various debugging knobs to
* user-space. This function creates "ubifs" directory in the debugfs
- * file-system. Returns zero in case of success and a negative error code in
- * case of failure.
+ * file-system.
*/
-int dbg_debugfs_init(void)
+void dbg_debugfs_init(void)
{
- int err;
const char *fname;
- struct dentry *dent;
-
- if (!IS_ENABLED(CONFIG_DEBUG_FS))
- return 0;
fname = "ubifs";
- dent = debugfs_create_dir(fname, NULL);
- if (IS_ERR_OR_NULL(dent))
- goto out;
- dfs_rootdir = dent;
+ dfs_rootdir = debugfs_create_dir(fname, NULL);
fname = "chk_general";
- dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
- &dfs_global_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- dfs_chk_gen = dent;
+ dfs_chk_gen = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir,
+ NULL, &dfs_global_fops);
fname = "chk_index";
- dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
- &dfs_global_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- dfs_chk_index = dent;
+ dfs_chk_index = debugfs_create_file(fname, S_IRUSR | S_IWUSR,
+ dfs_rootdir, NULL, &dfs_global_fops);
fname = "chk_orphans";
- dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
- &dfs_global_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- dfs_chk_orph = dent;
+ dfs_chk_orph = debugfs_create_file(fname, S_IRUSR | S_IWUSR,
+ dfs_rootdir, NULL, &dfs_global_fops);
fname = "chk_lprops";
- dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
- &dfs_global_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- dfs_chk_lprops = dent;
+ dfs_chk_lprops = debugfs_create_file(fname, S_IRUSR | S_IWUSR,
+ dfs_rootdir, NULL, &dfs_global_fops);
fname = "chk_fs";
- dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
- &dfs_global_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- dfs_chk_fs = dent;
+ dfs_chk_fs = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir,
+ NULL, &dfs_global_fops);
fname = "tst_recovery";
- dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
- &dfs_global_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- dfs_tst_rcvry = dent;
-
- return 0;
-
-out_remove:
- debugfs_remove_recursive(dfs_rootdir);
-out:
- err = dent ? PTR_ERR(dent) : -ENODEV;
- pr_err("UBIFS error (pid %d): cannot create \"%s\" debugfs file or directory, error %d\n",
- current->pid, fname, err);
- return err;
+ dfs_tst_rcvry = debugfs_create_file(fname, S_IRUSR | S_IWUSR,
+ dfs_rootdir, NULL, &dfs_global_fops);
}
/**
@@ -3070,8 +2986,7 @@ out:
*/
void dbg_debugfs_exit(void)
{
- if (IS_ENABLED(CONFIG_DEBUG_FS))
- debugfs_remove_recursive(dfs_rootdir);
+ debugfs_remove_recursive(dfs_rootdir);
}
void ubifs_assert_failed(struct ubifs_info *c, const char *expr,
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index eb26097b6f70..7763639a426b 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -297,9 +297,9 @@ int dbg_leb_unmap(struct ubifs_info *c, int lnum);
int dbg_leb_map(struct ubifs_info *c, int lnum);
/* Debugfs-related stuff */
-int dbg_debugfs_init(void);
+void dbg_debugfs_init(void);
void dbg_debugfs_exit(void);
-int dbg_debugfs_init_fs(struct ubifs_info *c);
+void dbg_debugfs_init_fs(struct ubifs_info *c);
void dbg_debugfs_exit_fs(struct ubifs_info *c);
#endif /* !__UBIFS_DEBUG_H__ */
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 4f1a397fda69..034ad14710d1 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -107,18 +107,11 @@ static int setflags(struct inode *inode, int flags)
if (err)
return err;
- /*
- * The IMMUTABLE and APPEND_ONLY flags can only be changed by
- * the relevant capability.
- */
mutex_lock(&ui->ui_mutex);
oldflags = ubifs2ioctl(ui->flags);
- if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
- if (!capable(CAP_LINUX_IMMUTABLE)) {
- err = -EPERM;
- goto out_unlock;
- }
- }
+ err = vfs_ioc_setflags_prepare(inode, oldflags, flags);
+ if (err)
+ goto out_unlock;
ui->flags = ioctl2ubifs(flags);
ubifs_set_inode_flags(inode);
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index cd85d7d4c515..b6ac9c4281ef 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -438,10 +438,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
*ltail_lnum = c->lhead_lnum;
c->lhead_offs += len;
- if (c->lhead_offs == c->leb_size) {
- c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
- c->lhead_offs = 0;
- }
+ ubifs_assert(c, c->lhead_offs < c->leb_size);
remove_buds(c);
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index b42a768709c0..52a85c01397e 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -48,6 +48,39 @@ int ubifs_compare_master_node(struct ubifs_info *c, void *m1, void *m2)
return 0;
}
+/* mst_node_check_hash - Check hash of a master node
+ * @c: UBIFS file-system description object
+ * @mst: The master node
+ * @expected: The expected hash of the master node
+ *
+ * This checks the hash of a master node against a given expected hash.
+ * Note that we have two master nodes on a UBIFS image which have different
+ * sequence numbers and consequently different CRCs. To be able to match
+ * both master nodes we exclude the common node header containing the sequence
+ * number and CRC from the hash.
+ *
+ * Returns 0 if the hashes are equal, a negative error code otherwise.
+ */
+static int mst_node_check_hash(const struct ubifs_info *c,
+ const struct ubifs_mst_node *mst,
+ const u8 *expected)
+{
+ u8 calc[UBIFS_MAX_HASH_LEN];
+ const void *node = mst;
+
+ SHASH_DESC_ON_STACK(shash, c->hash_tfm);
+
+ shash->tfm = c->hash_tfm;
+
+ crypto_shash_digest(shash, node + sizeof(struct ubifs_ch),
+ UBIFS_MST_NODE_SZ - sizeof(struct ubifs_ch), calc);
+
+ if (ubifs_check_hash(c, expected, calc))
+ return -EPERM;
+
+ return 0;
+}
+
/**
* scan_for_master - search the valid master node.
* @c: UBIFS file-system description object
@@ -102,14 +135,22 @@ static int scan_for_master(struct ubifs_info *c)
if (!ubifs_authenticated(c))
return 0;
- err = ubifs_node_verify_hmac(c, c->mst_node,
- sizeof(struct ubifs_mst_node),
- offsetof(struct ubifs_mst_node, hmac));
- if (err) {
- ubifs_err(c, "Failed to verify master node HMAC");
- return -EPERM;
+ if (ubifs_hmac_zero(c, c->mst_node->hmac)) {
+ err = mst_node_check_hash(c, c->mst_node,
+ c->sup_node->hash_mst);
+ if (err)
+ ubifs_err(c, "Failed to verify master node hash");
+ } else {
+ err = ubifs_node_verify_hmac(c, c->mst_node,
+ sizeof(struct ubifs_mst_node),
+ offsetof(struct ubifs_mst_node, hmac));
+ if (err)
+ ubifs_err(c, "Failed to verify master node HMAC");
}
+ if (err)
+ return -EPERM;
+
return 0;
out:
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index cb72688032cd..b52624e28fa1 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -126,25 +126,11 @@ static void __orphan_drop(struct ubifs_info *c, struct ubifs_orphan *o)
kfree(o);
}
-static void orphan_delete(struct ubifs_info *c, ino_t inum)
+static void orphan_delete(struct ubifs_info *c, struct ubifs_orphan *orph)
{
- struct ubifs_orphan *orph, *child_orph, *tmp_o;
-
- spin_lock(&c->orphan_lock);
-
- orph = lookup_orphan(c, inum);
- if (!orph) {
- spin_unlock(&c->orphan_lock);
- ubifs_err(c, "missing orphan ino %lu", (unsigned long)inum);
- dump_stack();
-
- return;
- }
-
if (orph->del) {
spin_unlock(&c->orphan_lock);
- dbg_gen("deleted twice ino %lu",
- (unsigned long)inum);
+ dbg_gen("deleted twice ino %lu", orph->inum);
return;
}
@@ -153,19 +139,11 @@ static void orphan_delete(struct ubifs_info *c, ino_t inum)
orph->dnext = c->orph_dnext;
c->orph_dnext = orph;
spin_unlock(&c->orphan_lock);
- dbg_gen("delete later ino %lu",
- (unsigned long)inum);
+ dbg_gen("delete later ino %lu", orph->inum);
return;
}
- list_for_each_entry_safe(child_orph, tmp_o, &orph->child_list, child_list) {
- list_del(&child_orph->child_list);
- __orphan_drop(c, child_orph);
- }
-
__orphan_drop(c, orph);
-
- spin_unlock(&c->orphan_lock);
}
/**
@@ -223,7 +201,27 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum)
*/
void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum)
{
- orphan_delete(c, inum);
+ struct ubifs_orphan *orph, *child_orph, *tmp_o;
+
+ spin_lock(&c->orphan_lock);
+
+ orph = lookup_orphan(c, inum);
+ if (!orph) {
+ spin_unlock(&c->orphan_lock);
+ ubifs_err(c, "missing orphan ino %lu", (unsigned long)inum);
+ dump_stack();
+
+ return;
+ }
+
+ list_for_each_entry_safe(child_orph, tmp_o, &orph->child_list, child_list) {
+ list_del(&child_orph->child_list);
+ orphan_delete(c, child_orph);
+ }
+
+ orphan_delete(c, orph);
+
+ spin_unlock(&c->orphan_lock);
}
/**
@@ -630,6 +628,7 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
{
struct ubifs_scan_node *snod;
struct ubifs_orph_node *orph;
+ struct ubifs_ino_node *ino = NULL;
unsigned long long cmt_no;
ino_t inum;
int i, n, err, first = 1;
@@ -676,23 +675,40 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
if (first)
first = 0;
+ ino = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS);
+ if (!ino)
+ return -ENOMEM;
+
n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3;
for (i = 0; i < n; i++) {
union ubifs_key key1, key2;
inum = le64_to_cpu(orph->inos[i]);
- dbg_rcvry("deleting orphaned inode %lu",
- (unsigned long)inum);
-
- lowest_ino_key(c, &key1, inum);
- highest_ino_key(c, &key2, inum);
- err = ubifs_tnc_remove_range(c, &key1, &key2);
+ ino_key_init(c, &key1, inum);
+ err = ubifs_tnc_lookup(c, &key1, ino);
if (err)
- return err;
+ goto out_free;
+
+ /*
+ * Check whether an inode can really get deleted.
+ * linkat() with O_TMPFILE allows rebirth of an inode.
+ */
+ if (ino->nlink == 0) {
+ dbg_rcvry("deleting orphaned inode %lu",
+ (unsigned long)inum);
+
+ lowest_ino_key(c, &key1, inum);
+ highest_ino_key(c, &key2, inum);
+
+ err = ubifs_tnc_remove_range(c, &key1, &key2);
+ if (err)
+ goto out_ro;
+ }
+
err = insert_dead_orphan(c, inum);
if (err)
- return err;
+ goto out_free;
}
*last_cmt_no = cmt_no;
@@ -704,7 +720,15 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
*last_flagged = 0;
}
- return 0;
+ err = 0;
+out_free:
+ kfree(ino);
+ return err;
+
+out_ro:
+ ubifs_ro_mode(c, err);
+ kfree(ino);
+ return err;
}
/**
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 3fc589881825..f116f7b3f9e5 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -818,7 +818,7 @@ static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs,
goto out_err;
}
if (cs_node->ch.node_type != UBIFS_CS_NODE) {
- ubifs_err(c, "Node a CS node, type is %d", cs_node->ch.node_type);
+ ubifs_err(c, "Not a CS node, type is %d", cs_node->ch.node_type);
goto out_err;
}
if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) {
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 12c2afdb5804..a551eb3e9b89 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -578,17 +578,26 @@ static int authenticate_sb_node(struct ubifs_info *c,
return -EINVAL;
}
- err = ubifs_hmac_wkm(c, hmac_wkm);
- if (err)
- return err;
-
- if (ubifs_check_hmac(c, hmac_wkm, sup->hmac_wkm)) {
- ubifs_err(c, "provided key does not fit");
- return -ENOKEY;
+ /*
+ * The super block node can either be authenticated by a HMAC or
+ * by a signature in a ubifs_sig_node directly following the
+ * super block node to support offline image creation.
+ */
+ if (ubifs_hmac_zero(c, sup->hmac)) {
+ err = ubifs_sb_verify_signature(c, sup);
+ } else {
+ err = ubifs_hmac_wkm(c, hmac_wkm);
+ if (err)
+ return err;
+ if (ubifs_check_hmac(c, hmac_wkm, sup->hmac_wkm)) {
+ ubifs_err(c, "provided key does not fit");
+ return -ENOKEY;
+ }
+ err = ubifs_node_verify_hmac(c, sup, sizeof(*sup),
+ offsetof(struct ubifs_sb_node,
+ hmac));
}
- err = ubifs_node_verify_hmac(c, sup, sizeof(*sup),
- offsetof(struct ubifs_sb_node, hmac));
if (err)
ubifs_err(c, "Failed to authenticate superblock: %d", err);
@@ -744,21 +753,16 @@ int ubifs_read_superblock(struct ubifs_info *c)
}
/* Automatically increase file system size to the maximum size */
- c->old_leb_cnt = c->leb_cnt;
if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) {
+ int old_leb_cnt = c->leb_cnt;
+
c->leb_cnt = min_t(int, c->max_leb_cnt, c->vi.size);
- if (c->ro_mount)
- dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs",
- c->old_leb_cnt, c->leb_cnt);
- else {
- dbg_mnt("Auto resizing (sb) from %d LEBs to %d LEBs",
- c->old_leb_cnt, c->leb_cnt);
- sup->leb_cnt = cpu_to_le32(c->leb_cnt);
- err = ubifs_write_sb_node(c, sup);
- if (err)
- goto out;
- c->old_leb_cnt = c->leb_cnt;
- }
+ sup->leb_cnt = cpu_to_le32(c->leb_cnt);
+
+ c->superblock_need_write = 1;
+
+ dbg_mnt("Auto resizing from %d LEBs to %d LEBs",
+ old_leb_cnt, c->leb_cnt);
}
c->log_bytes = (long long)c->log_lebs * c->leb_size;
@@ -916,9 +920,7 @@ int ubifs_fixup_free_space(struct ubifs_info *c)
c->space_fixup = 0;
sup->flags &= cpu_to_le32(~UBIFS_FLG_SPACE_FIXUP);
- err = ubifs_write_sb_node(c, sup);
- if (err)
- return err;
+ c->superblock_need_write = 1;
ubifs_msg(c, "free space fixup complete");
return err;
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 6cfc494050be..2c0803b0ac3a 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -566,6 +566,8 @@ static int init_constants_early(struct ubifs_info *c)
c->ranges[UBIFS_AUTH_NODE].min_len = UBIFS_AUTH_NODE_SZ;
c->ranges[UBIFS_AUTH_NODE].max_len = UBIFS_AUTH_NODE_SZ +
UBIFS_MAX_HMAC_LEN;
+ c->ranges[UBIFS_SIG_NODE].min_len = UBIFS_SIG_NODE_SZ;
+ c->ranges[UBIFS_SIG_NODE].max_len = c->leb_size - UBIFS_SB_NODE_SZ;
c->ranges[UBIFS_INO_NODE].min_len = UBIFS_INO_NODE_SZ;
c->ranges[UBIFS_INO_NODE].max_len = UBIFS_MAX_INO_NODE_SZ;
@@ -1043,6 +1045,8 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
c->mount_opts.compr_type = UBIFS_COMPR_LZO;
else if (!strcmp(name, "zlib"))
c->mount_opts.compr_type = UBIFS_COMPR_ZLIB;
+ else if (!strcmp(name, "zstd"))
+ c->mount_opts.compr_type = UBIFS_COMPR_ZSTD;
else {
ubifs_err(c, "unknown compressor \"%s\"", name); //FIXME: is c ready?
kfree(name);
@@ -1296,8 +1300,7 @@ static int mount_ubifs(struct ubifs_info *c)
if (err)
goto out_free;
- sz = ALIGN(c->max_idx_node_sz, c->min_io_size);
- sz = ALIGN(sz + c->max_idx_node_sz, c->min_io_size);
+ sz = ALIGN(c->max_idx_node_sz, c->min_io_size) * 2;
c->cbuf = kmalloc(sz, GFP_NOFS);
if (!c->cbuf) {
err = -ENOMEM;
@@ -1360,6 +1363,26 @@ static int mount_ubifs(struct ubifs_info *c)
goto out_lpt;
}
+ /*
+ * Handle offline signed images: Now that the master node is
+ * written and its validation no longer depends on the hash
+ * in the superblock, we can update the offline signed
+ * superblock with a HMAC version,
+ */
+ if (ubifs_authenticated(c) && ubifs_hmac_zero(c, c->sup_node->hmac)) {
+ err = ubifs_hmac_wkm(c, c->sup_node->hmac_wkm);
+ if (err)
+ goto out_lpt;
+ c->superblock_need_write = 1;
+ }
+
+ if (!c->ro_mount && c->superblock_need_write) {
+ err = ubifs_write_sb_node(c, c->sup_node);
+ if (err)
+ goto out_lpt;
+ c->superblock_need_write = 0;
+ }
+
err = dbg_check_idx_size(c, c->bi.old_idx_sz);
if (err)
goto out_lpt;
@@ -1465,9 +1488,7 @@ static int mount_ubifs(struct ubifs_info *c)
if (err)
goto out_infos;
- err = dbg_debugfs_init_fs(c);
- if (err)
- goto out_infos;
+ dbg_debugfs_init_fs(c);
c->mounting = 0;
@@ -1644,15 +1665,6 @@ static int ubifs_remount_rw(struct ubifs_info *c)
if (err)
goto out;
- if (c->old_leb_cnt != c->leb_cnt) {
- struct ubifs_sb_node *sup = c->sup_node;
-
- sup->leb_cnt = cpu_to_le32(c->leb_cnt);
- err = ubifs_write_sb_node(c, sup);
- if (err)
- goto out;
- }
-
if (c->need_recovery) {
ubifs_msg(c, "completing deferred recovery");
err = ubifs_write_rcvrd_mst_node(c);
@@ -1684,6 +1696,16 @@ static int ubifs_remount_rw(struct ubifs_info *c)
goto out;
}
+ if (c->superblock_need_write) {
+ struct ubifs_sb_node *sup = c->sup_node;
+
+ err = ubifs_write_sb_node(c, sup);
+ if (err)
+ goto out;
+
+ c->superblock_need_write = 0;
+ }
+
c->ileb_buf = vmalloc(c->leb_size);
if (!c->ileb_buf) {
err = -ENOMEM;
@@ -2352,9 +2374,7 @@ static int __init ubifs_init(void)
if (err)
goto out_shrinker;
- err = dbg_debugfs_init();
- if (err)
- goto out_compr;
+ dbg_debugfs_init();
err = register_filesystem(&ubifs_fs_type);
if (err) {
@@ -2366,7 +2386,6 @@ static int __init ubifs_init(void)
out_dbg:
dbg_debugfs_exit();
-out_compr:
ubifs_compressors_exit();
out_shrinker:
unregister_shrinker(&ubifs_shrinker_info);
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index f5a823cb0e43..e8e7b0e9532e 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -1158,8 +1158,8 @@ static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c,
* o exact match, i.e. the found zero-level znode contains key @key, then %1
* is returned and slot number of the matched branch is stored in @n;
* o not exact match, which means that zero-level znode does not contain
- * @key, then %0 is returned and slot number of the closest branch is stored
- * in @n;
+ * @key, then %0 is returned and slot number of the closest branch or %-1
+ * is stored in @n; In this case calling tnc_next() is mandatory.
* o @key is so small that it is even less than the lowest key of the
* leftmost zero-level node, then %0 is returned and %0 is stored in @n.
*
@@ -1882,13 +1882,19 @@ int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
static int search_dh_cookie(struct ubifs_info *c, const union ubifs_key *key,
struct ubifs_dent_node *dent, uint32_t cookie,
- struct ubifs_znode **zn, int *n)
+ struct ubifs_znode **zn, int *n, int exact)
{
int err;
struct ubifs_znode *znode = *zn;
struct ubifs_zbranch *zbr;
union ubifs_key *dkey;
+ if (!exact) {
+ err = tnc_next(c, &znode, n);
+ if (err)
+ return err;
+ }
+
for (;;) {
zbr = &znode->zbranch[*n];
dkey = &zbr->key;
@@ -1930,7 +1936,7 @@ static int do_lookup_dh(struct ubifs_info *c, const union ubifs_key *key,
if (unlikely(err < 0))
goto out_unlock;
- err = search_dh_cookie(c, key, dent, cookie, &znode, &n);
+ err = search_dh_cookie(c, key, dent, cookie, &znode, &n, err);
out_unlock:
mutex_unlock(&c->tnc_mutex);
@@ -2723,7 +2729,7 @@ int ubifs_tnc_remove_dh(struct ubifs_info *c, const union ubifs_key *key,
if (unlikely(err < 0))
goto out_free;
- err = search_dh_cookie(c, key, dent, cookie, &znode, &n);
+ err = search_dh_cookie(c, key, dent, cookie, &znode, &n, err);
if (err)
goto out_free;
}
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index 86f0f2be116c..3c9792cbb6ff 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -275,6 +275,8 @@ enum {
#define UBIFS_CS_NODE_SZ sizeof(struct ubifs_cs_node)
#define UBIFS_ORPH_NODE_SZ sizeof(struct ubifs_orph_node)
#define UBIFS_AUTH_NODE_SZ sizeof(struct ubifs_auth_node)
+#define UBIFS_SIG_NODE_SZ sizeof(struct ubifs_sig_node)
+
/* Extended attribute entry nodes are identical to directory entry nodes */
#define UBIFS_XENT_NODE_SZ UBIFS_DENT_NODE_SZ
/* Only this does not have to be multiple of 8 bytes */
@@ -301,6 +303,8 @@ enum {
*/
#define UBIFS_XATTR_NAME_ENCRYPTION_CONTEXT "c"
+/* Type field in ubifs_sig_node */
+#define UBIFS_SIGNATURE_TYPE_PKCS7 1
/*
* On-flash inode flags.
@@ -336,12 +340,14 @@ enum {
* UBIFS_COMPR_NONE: no compression
* UBIFS_COMPR_LZO: LZO compression
* UBIFS_COMPR_ZLIB: ZLIB compression
+ * UBIFS_COMPR_ZSTD: ZSTD compression
* UBIFS_COMPR_TYPES_CNT: count of supported compression types
*/
enum {
UBIFS_COMPR_NONE,
UBIFS_COMPR_LZO,
UBIFS_COMPR_ZLIB,
+ UBIFS_COMPR_ZSTD,
UBIFS_COMPR_TYPES_CNT,
};
@@ -361,6 +367,7 @@ enum {
* UBIFS_CS_NODE: commit start node
* UBIFS_ORPH_NODE: orphan node
* UBIFS_AUTH_NODE: authentication node
+ * UBIFS_SIG_NODE: signature node
* UBIFS_NODE_TYPES_CNT: count of supported node types
*
* Note, we index arrays by these numbers, so keep them low and contiguous.
@@ -381,6 +388,7 @@ enum {
UBIFS_CS_NODE,
UBIFS_ORPH_NODE,
UBIFS_AUTH_NODE,
+ UBIFS_SIG_NODE,
UBIFS_NODE_TYPES_CNT,
};
@@ -638,6 +646,8 @@ struct ubifs_pad_node {
* @hmac_wkm: HMAC of a well known message (the string "UBIFS") as a convenience
* to the user to check if the correct key is passed.
* @hash_algo: The hash algo used for this filesystem (one of enum hash_algo)
+ * @hash_mst: hash of the master node, only valid for signed images in which the
+ * master node does not contain a hmac
*/
struct ubifs_sb_node {
struct ubifs_ch ch;
@@ -668,7 +678,8 @@ struct ubifs_sb_node {
__u8 hmac[UBIFS_MAX_HMAC_LEN];
__u8 hmac_wkm[UBIFS_MAX_HMAC_LEN];
__le16 hash_algo;
- __u8 padding2[3838];
+ __u8 hash_mst[UBIFS_MAX_HASH_LEN];
+ __u8 padding2[3774];
} __packed;
/**
@@ -771,6 +782,23 @@ struct ubifs_auth_node {
} __packed;
/**
+ * struct ubifs_sig_node - node for signing other nodes
+ * @ch: common header
+ * @type: type of the signature, currently only UBIFS_SIGNATURE_TYPE_PKCS7
+ * supported
+ * @len: The length of the signature data
+ * @padding: reserved for future, zeroes
+ * @sig: The signature data
+ */
+struct ubifs_sig_node {
+ struct ubifs_ch ch;
+ __le32 type;
+ __le32 len;
+ __u8 padding[32];
+ __u8 sig[];
+} __packed;
+
+/**
* struct ubifs_branch - key/reference/length branch
* @lnum: LEB number of the target node
* @offs: offset within @lnum
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 745b23e5b406..c55f212dcb75 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1104,7 +1104,6 @@ struct ubifs_debug_info;
* used to store indexing nodes (@leb_size - @max_idx_node_sz)
* @leb_cnt: count of logical eraseblocks
* @max_leb_cnt: maximum count of logical eraseblocks
- * @old_leb_cnt: count of logical eraseblocks before re-size
* @ro_media: the underlying UBI volume is read-only
* @ro_mount: the file-system was mounted as read-only
* @ro_error: UBIFS switched to R/O mode because an error happened
@@ -1295,6 +1294,7 @@ struct ubifs_info {
unsigned int rw_incompat:1;
unsigned int assert_action:2;
unsigned int authenticated:1;
+ unsigned int superblock_need_write:1;
struct mutex tnc_mutex;
struct ubifs_zbranch zroot;
@@ -1352,7 +1352,6 @@ struct ubifs_info {
int idx_leb_size;
int leb_cnt;
int max_leb_cnt;
- int old_leb_cnt;
unsigned int ro_media:1;
unsigned int ro_mount:1;
unsigned int ro_error:1;
@@ -1680,6 +1679,9 @@ static inline int ubifs_auth_node_sz(const struct ubifs_info *c)
else
return 0;
}
+int ubifs_sb_verify_signature(struct ubifs_info *c,
+ const struct ubifs_sb_node *sup);
+bool ubifs_hmac_zero(struct ubifs_info *c, const u8 *hmac);
int ubifs_hmac_wkm(struct ubifs_info *c, u8 *hmac);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index e7276932e433..9bb18311a22f 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -470,13 +470,15 @@ static struct buffer_head *udf_getblk(struct inode *inode, udf_pblk_t block,
return NULL;
}
-/* Extend the file by 'blocks' blocks, return the number of extents added */
+/* Extend the file with new blocks totaling 'new_block_bytes',
+ * return the number of extents added
+ */
static int udf_do_extend_file(struct inode *inode,
struct extent_position *last_pos,
struct kernel_long_ad *last_ext,
- sector_t blocks)
+ loff_t new_block_bytes)
{
- sector_t add;
+ uint32_t add;
int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
struct super_block *sb = inode->i_sb;
struct kernel_lb_addr prealloc_loc = {};
@@ -486,7 +488,7 @@ static int udf_do_extend_file(struct inode *inode,
/* The previous extent is fake and we should not extend by anything
* - there's nothing to do... */
- if (!blocks && fake)
+ if (!new_block_bytes && fake)
return 0;
iinfo = UDF_I(inode);
@@ -517,13 +519,12 @@ static int udf_do_extend_file(struct inode *inode,
/* Can we merge with the previous extent? */
if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) ==
EXT_NOT_RECORDED_NOT_ALLOCATED) {
- add = ((1 << 30) - sb->s_blocksize -
- (last_ext->extLength & UDF_EXTENT_LENGTH_MASK)) >>
- sb->s_blocksize_bits;
- if (add > blocks)
- add = blocks;
- blocks -= add;
- last_ext->extLength += add << sb->s_blocksize_bits;
+ add = (1 << 30) - sb->s_blocksize -
+ (last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
+ if (add > new_block_bytes)
+ add = new_block_bytes;
+ new_block_bytes -= add;
+ last_ext->extLength += add;
}
if (fake) {
@@ -544,28 +545,27 @@ static int udf_do_extend_file(struct inode *inode,
}
/* Managed to do everything necessary? */
- if (!blocks)
+ if (!new_block_bytes)
goto out;
/* All further extents will be NOT_RECORDED_NOT_ALLOCATED */
last_ext->extLocation.logicalBlockNum = 0;
last_ext->extLocation.partitionReferenceNum = 0;
- add = (1 << (30-sb->s_blocksize_bits)) - 1;
- last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
- (add << sb->s_blocksize_bits);
+ add = (1 << 30) - sb->s_blocksize;
+ last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | add;
/* Create enough extents to cover the whole hole */
- while (blocks > add) {
- blocks -= add;
+ while (new_block_bytes > add) {
+ new_block_bytes -= add;
err = udf_add_aext(inode, last_pos, &last_ext->extLocation,
last_ext->extLength, 1);
if (err)
return err;
count++;
}
- if (blocks) {
+ if (new_block_bytes) {
last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
- (blocks << sb->s_blocksize_bits);
+ new_block_bytes;
err = udf_add_aext(inode, last_pos, &last_ext->extLocation,
last_ext->extLength, 1);
if (err)
@@ -596,6 +596,24 @@ out:
return count;
}
+/* Extend the final block of the file to final_block_len bytes */
+static void udf_do_extend_final_block(struct inode *inode,
+ struct extent_position *last_pos,
+ struct kernel_long_ad *last_ext,
+ uint32_t final_block_len)
+{
+ struct super_block *sb = inode->i_sb;
+ uint32_t added_bytes;
+
+ added_bytes = final_block_len -
+ (last_ext->extLength & (sb->s_blocksize - 1));
+ last_ext->extLength += added_bytes;
+ UDF_I(inode)->i_lenExtents += added_bytes;
+
+ udf_write_aext(inode, last_pos, &last_ext->extLocation,
+ last_ext->extLength, 1);
+}
+
static int udf_extend_file(struct inode *inode, loff_t newsize)
{
@@ -605,10 +623,12 @@ static int udf_extend_file(struct inode *inode, loff_t newsize)
int8_t etype;
struct super_block *sb = inode->i_sb;
sector_t first_block = newsize >> sb->s_blocksize_bits, offset;
+ unsigned long partial_final_block;
int adsize;
struct udf_inode_info *iinfo = UDF_I(inode);
struct kernel_long_ad extent;
- int err;
+ int err = 0;
+ int within_final_block;
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
adsize = sizeof(struct short_ad);
@@ -618,18 +638,8 @@ static int udf_extend_file(struct inode *inode, loff_t newsize)
BUG();
etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset);
+ within_final_block = (etype != -1);
- /* File has extent covering the new size (could happen when extending
- * inside a block)? */
- if (etype != -1)
- return 0;
- if (newsize & (sb->s_blocksize - 1))
- offset++;
- /* Extended file just to the boundary of the last file block? */
- if (offset == 0)
- return 0;
-
- /* Truncate is extending the file by 'offset' blocks */
if ((!epos.bh && epos.offset == udf_file_entry_alloc_offset(inode)) ||
(epos.bh && epos.offset == sizeof(struct allocExtDesc))) {
/* File has no extents at all or has empty last
@@ -643,7 +653,22 @@ static int udf_extend_file(struct inode *inode, loff_t newsize)
&extent.extLength, 0);
extent.extLength |= etype << 30;
}
- err = udf_do_extend_file(inode, &epos, &extent, offset);
+
+ partial_final_block = newsize & (sb->s_blocksize - 1);
+
+ /* File has extent covering the new size (could happen when extending
+ * inside a block)?
+ */
+ if (within_final_block) {
+ /* Extending file within the last file block */
+ udf_do_extend_final_block(inode, &epos, &extent,
+ partial_final_block);
+ } else {
+ loff_t add = ((loff_t)offset << sb->s_blocksize_bits) |
+ partial_final_block;
+ err = udf_do_extend_file(inode, &epos, &extent, add);
+ }
+
if (err < 0)
goto out;
err = 0;
@@ -745,6 +770,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
/* Are we beyond EOF? */
if (etype == -1) {
int ret;
+ loff_t hole_len;
isBeyondEOF = true;
if (count) {
if (c)
@@ -760,7 +786,8 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
startnum = (offset > 0);
}
/* Create extents for the hole between EOF and offset */
- ret = udf_do_extend_file(inode, &prev_epos, laarr, offset);
+ hole_len = (loff_t)offset << inode->i_blkbits;
+ ret = udf_do_extend_file(inode, &prev_epos, laarr, hole_len);
if (ret < 0) {
*err = ret;
newblock = 0;
diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c
index 6afab4fdce90..71ca4d047d65 100644
--- a/fs/unicode/utf8-core.c
+++ b/fs/unicode/utf8-core.c
@@ -73,6 +73,34 @@ int utf8_strncasecmp(const struct unicode_map *um,
}
EXPORT_SYMBOL(utf8_strncasecmp);
+/* String cf is expected to be a valid UTF-8 casefolded
+ * string.
+ */
+int utf8_strncasecmp_folded(const struct unicode_map *um,
+ const struct qstr *cf,
+ const struct qstr *s1)
+{
+ const struct utf8data *data = utf8nfdicf(um->version);
+ struct utf8cursor cur1;
+ int c1, c2;
+ int i = 0;
+
+ if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
+ return -EINVAL;
+
+ do {
+ c1 = utf8byte(&cur1);
+ c2 = cf->name[i++];
+ if (c1 < 0)
+ return -EINVAL;
+ if (c1 != c2)
+ return 1;
+ } while (c1);
+
+ return 0;
+}
+EXPORT_SYMBOL(utf8_strncasecmp_folded);
+
int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen)
{
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 91831975363b..b74a47169297 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -62,6 +62,7 @@ xfs-y += xfs_aops.o \
xfs_attr_inactive.o \
xfs_attr_list.o \
xfs_bmap_util.o \
+ xfs_bio_io.o \
xfs_buf.o \
xfs_dir2_readdir.o \
xfs_discard.o \
@@ -80,9 +81,11 @@ xfs-y += xfs_aops.o \
xfs_iops.o \
xfs_inode.o \
xfs_itable.o \
+ xfs_iwalk.o \
xfs_message.o \
xfs_mount.o \
xfs_mru_cache.o \
+ xfs_pwork.o \
xfs_reflink.o \
xfs_stats.o \
xfs_super.o \
@@ -104,12 +107,8 @@ xfs-y += xfs_log.o \
xfs_rmap_item.o \
xfs_log_recover.o \
xfs_trans_ail.o \
- xfs_trans_bmap.o \
xfs_trans_buf.o \
- xfs_trans_extfree.o \
- xfs_trans_inode.o \
- xfs_trans_refcount.o \
- xfs_trans_rmap.o \
+ xfs_trans_inode.o
# optional features
xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index fdd9d6ede25c..16bb9a328678 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -3,12 +3,7 @@
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include <linux/mm.h>
#include <linux/sched/mm.h>
-#include <linux/highmem.h>
-#include <linux/slab.h>
-#include <linux/swap.h>
-#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include "kmem.h"
#include "xfs_message.h"
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index 8e6b3ba81c03..267655acd426 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -124,4 +124,12 @@ kmem_zone_zalloc(kmem_zone_t *zone, xfs_km_flags_t flags)
return kmem_zone_alloc(zone, flags | KM_ZERO);
}
+static inline struct page *
+kmem_to_page(void *addr)
+{
+ if (is_vmalloc_addr(addr))
+ return vmalloc_to_page(addr);
+ return virt_to_page(addr);
+}
+
#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index b0c89f54d1bb..5de296b34ab1 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -10,6 +10,7 @@
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_btree.h"
@@ -44,6 +45,12 @@ xfs_get_aghdr_buf(
return bp;
}
+static inline bool is_log_ag(struct xfs_mount *mp, struct aghdr_init_data *id)
+{
+ return mp->m_sb.sb_logstart > 0 &&
+ id->agno == XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart);
+}
+
/*
* Generic btree root block init function
*/
@@ -53,40 +60,85 @@ xfs_btroot_init(
struct xfs_buf *bp,
struct aghdr_init_data *id)
{
- xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno, 0);
+ xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno);
}
-/*
- * Alloc btree root block init functions
- */
+/* Finish initializing a free space btree. */
static void
-xfs_bnoroot_init(
+xfs_freesp_init_recs(
struct xfs_mount *mp,
struct xfs_buf *bp,
struct aghdr_init_data *id)
{
struct xfs_alloc_rec *arec;
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
- xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno, 0);
arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
+
+ if (is_log_ag(mp, id)) {
+ struct xfs_alloc_rec *nrec;
+ xfs_agblock_t start = XFS_FSB_TO_AGBNO(mp,
+ mp->m_sb.sb_logstart);
+
+ ASSERT(start >= mp->m_ag_prealloc_blocks);
+ if (start != mp->m_ag_prealloc_blocks) {
+ /*
+ * Modify first record to pad stripe align of log
+ */
+ arec->ar_blockcount = cpu_to_be32(start -
+ mp->m_ag_prealloc_blocks);
+ nrec = arec + 1;
+
+ /*
+ * Insert second record at start of internal log
+ * which then gets trimmed.
+ */
+ nrec->ar_startblock = cpu_to_be32(
+ be32_to_cpu(arec->ar_startblock) +
+ be32_to_cpu(arec->ar_blockcount));
+ arec = nrec;
+ be16_add_cpu(&block->bb_numrecs, 1);
+ }
+ /*
+ * Change record start to after the internal log
+ */
+ be32_add_cpu(&arec->ar_startblock, mp->m_sb.sb_logblocks);
+ }
+
+ /*
+ * Calculate the record block count and check for the case where
+ * the log might have consumed all available space in the AG. If
+ * so, reset the record count to 0 to avoid exposure of an invalid
+ * record start block.
+ */
arec->ar_blockcount = cpu_to_be32(id->agsize -
be32_to_cpu(arec->ar_startblock));
+ if (!arec->ar_blockcount)
+ block->bb_numrecs = 0;
}
+/*
+ * Alloc btree root block init functions
+ */
static void
-xfs_cntroot_init(
+xfs_bnoroot_init(
struct xfs_mount *mp,
struct xfs_buf *bp,
struct aghdr_init_data *id)
{
- struct xfs_alloc_rec *arec;
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno);
+ xfs_freesp_init_recs(mp, bp, id);
+}
- xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno, 0);
- arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
- arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
- arec->ar_blockcount = cpu_to_be32(id->agsize -
- be32_to_cpu(arec->ar_startblock));
+static void
+xfs_cntroot_init(
+ struct xfs_mount *mp,
+ struct xfs_buf *bp,
+ struct aghdr_init_data *id)
+{
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno);
+ xfs_freesp_init_recs(mp, bp, id);
}
/*
@@ -101,7 +153,7 @@ xfs_rmaproot_init(
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_rmap_rec *rrec;
- xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno, 0);
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno);
/*
* mark the AG header regions as static metadata The BNO
@@ -149,6 +201,18 @@ xfs_rmaproot_init(
rrec->rm_offset = 0;
be16_add_cpu(&block->bb_numrecs, 1);
}
+
+ /* account for the log space */
+ if (is_log_ag(mp, id)) {
+ rrec = XFS_RMAP_REC_ADDR(block,
+ be16_to_cpu(block->bb_numrecs) + 1);
+ rrec->rm_startblock = cpu_to_be32(
+ XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart));
+ rrec->rm_blockcount = cpu_to_be32(mp->m_sb.sb_logblocks);
+ rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_LOG);
+ rrec->rm_offset = 0;
+ be16_add_cpu(&block->bb_numrecs, 1);
+ }
}
/*
@@ -209,6 +273,14 @@ xfs_agfblock_init(
agf->agf_refcount_level = cpu_to_be32(1);
agf->agf_refcount_blocks = cpu_to_be32(1);
}
+
+ if (is_log_ag(mp, id)) {
+ int64_t logblocks = mp->m_sb.sb_logblocks;
+
+ be32_add_cpu(&agf->agf_freeblks, -logblocks);
+ agf->agf_longest = cpu_to_be32(id->agsize -
+ XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart) - logblocks);
+ }
}
static void
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index e2ba2a3b63b2..87a9747f1d36 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -9,20 +9,12 @@
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_alloc.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
#include "xfs_trans.h"
-#include "xfs_bit.h"
-#include "xfs_bmap.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_ag_resv.h"
-#include "xfs_trans_space.h"
#include "xfs_rmap_btree.h"
#include "xfs_btree.h"
#include "xfs_refcount_btree.h"
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index a9ff3cf82cce..372ad55631fc 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -13,7 +13,6 @@
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
-#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_rmap.h"
#include "xfs_alloc_btree.h"
@@ -21,7 +20,6 @@
#include "xfs_extent_busy.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
-#include "xfs_cksum.h"
#include "xfs_trace.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
@@ -41,8 +39,6 @@ struct workqueue_struct *xfs_alloc_wq;
STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
-STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
- xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
/*
* Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in
@@ -555,7 +551,7 @@ static xfs_failaddr_t
xfs_agfl_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
int i;
@@ -596,7 +592,7 @@ static void
xfs_agfl_read_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
/*
@@ -621,7 +617,7 @@ static void
xfs_agfl_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_buf_log_item *bip = bp->b_log_item;
xfs_failaddr_t fa;
@@ -700,6 +696,107 @@ xfs_alloc_update_counters(
*/
/*
+ * Deal with the case where only small freespaces remain. Either return the
+ * contents of the last freespace record, or allocate space from the freelist if
+ * there is nothing in the tree.
+ */
+STATIC int /* error */
+xfs_alloc_ag_vextent_small(
+ struct xfs_alloc_arg *args, /* allocation argument structure */
+ struct xfs_btree_cur *ccur, /* optional by-size cursor */
+ xfs_agblock_t *fbnop, /* result block number */
+ xfs_extlen_t *flenp, /* result length */
+ int *stat) /* status: 0-freelist, 1-normal/none */
+{
+ int error = 0;
+ xfs_agblock_t fbno = NULLAGBLOCK;
+ xfs_extlen_t flen = 0;
+ int i = 0;
+
+ /*
+ * If a cntbt cursor is provided, try to allocate the largest record in
+ * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
+ * allocation. Make sure to respect minleft even when pulling from the
+ * freelist.
+ */
+ if (ccur)
+ error = xfs_btree_decrement(ccur, 0, &i);
+ if (error)
+ goto error;
+ if (i) {
+ error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
+ if (error)
+ goto error;
+ XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error);
+ goto out;
+ }
+
+ if (args->minlen != 1 || args->alignment != 1 ||
+ args->resv == XFS_AG_RESV_AGFL ||
+ (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) <=
+ args->minleft))
+ goto out;
+
+ error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
+ if (error)
+ goto error;
+ if (fbno == NULLAGBLOCK)
+ goto out;
+
+ xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
+ xfs_alloc_allow_busy_reuse(args->datatype));
+
+ if (xfs_alloc_is_userdata(args->datatype)) {
+ struct xfs_buf *bp;
+
+ bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno);
+ if (!bp) {
+ error = -EFSCORRUPTED;
+ goto error;
+ }
+ xfs_trans_binval(args->tp, bp);
+ }
+ *fbnop = args->agbno = fbno;
+ *flenp = args->len = 1;
+ XFS_WANT_CORRUPTED_GOTO(args->mp,
+ fbno < be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
+ error);
+ args->wasfromfl = 1;
+ trace_xfs_alloc_small_freelist(args);
+
+ /*
+ * If we're feeding an AGFL block to something that doesn't live in the
+ * free space, we need to clear out the OWN_AG rmap.
+ */
+ error = xfs_rmap_free(args->tp, args->agbp, args->agno, fbno, 1,
+ &XFS_RMAP_OINFO_AG);
+ if (error)
+ goto error;
+
+ *stat = 0;
+ return 0;
+
+out:
+ /*
+ * Can't do the allocation, give up.
+ */
+ if (flen < args->minlen) {
+ args->agbno = NULLAGBLOCK;
+ trace_xfs_alloc_small_notenough(args);
+ flen = 0;
+ }
+ *fbnop = fbno;
+ *flenp = flen;
+ *stat = 1;
+ trace_xfs_alloc_small_done(args);
+ return 0;
+
+error:
+ trace_xfs_alloc_small_error(args);
+ return error;
+}
+
+/*
* Allocate a variable extent in the allocation group agno.
* Type and bno are used to determine where in the allocation group the
* extent will start.
@@ -1583,112 +1680,6 @@ out_nominleft:
}
/*
- * Deal with the case where only small freespaces remain.
- * Either return the contents of the last freespace record,
- * or allocate space from the freelist if there is nothing in the tree.
- */
-STATIC int /* error */
-xfs_alloc_ag_vextent_small(
- xfs_alloc_arg_t *args, /* allocation argument structure */
- xfs_btree_cur_t *ccur, /* by-size cursor */
- xfs_agblock_t *fbnop, /* result block number */
- xfs_extlen_t *flenp, /* result length */
- int *stat) /* status: 0-freelist, 1-normal/none */
-{
- int error;
- xfs_agblock_t fbno;
- xfs_extlen_t flen;
- int i;
-
- if ((error = xfs_btree_decrement(ccur, 0, &i)))
- goto error0;
- if (i) {
- if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
- }
- /*
- * Nothing in the btree, try the freelist. Make sure
- * to respect minleft even when pulling from the
- * freelist.
- */
- else if (args->minlen == 1 && args->alignment == 1 &&
- args->resv != XFS_AG_RESV_AGFL &&
- (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
- > args->minleft)) {
- error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
- if (error)
- goto error0;
- if (fbno != NULLAGBLOCK) {
- xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
- xfs_alloc_allow_busy_reuse(args->datatype));
-
- if (xfs_alloc_is_userdata(args->datatype)) {
- xfs_buf_t *bp;
-
- bp = xfs_btree_get_bufs(args->mp, args->tp,
- args->agno, fbno, 0);
- if (!bp) {
- error = -EFSCORRUPTED;
- goto error0;
- }
- xfs_trans_binval(args->tp, bp);
- }
- args->len = 1;
- args->agbno = fbno;
- XFS_WANT_CORRUPTED_GOTO(args->mp,
- args->agbno + args->len <=
- be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
- error0);
- args->wasfromfl = 1;
- trace_xfs_alloc_small_freelist(args);
-
- /*
- * If we're feeding an AGFL block to something that
- * doesn't live in the free space, we need to clear
- * out the OWN_AG rmap.
- */
- error = xfs_rmap_free(args->tp, args->agbp, args->agno,
- fbno, 1, &XFS_RMAP_OINFO_AG);
- if (error)
- goto error0;
-
- *stat = 0;
- return 0;
- }
- /*
- * Nothing in the freelist.
- */
- else
- flen = 0;
- }
- /*
- * Can't allocate from the freelist for some reason.
- */
- else {
- fbno = NULLAGBLOCK;
- flen = 0;
- }
- /*
- * Can't do the allocation, give up.
- */
- if (flen < args->minlen) {
- args->agbno = NULLAGBLOCK;
- trace_xfs_alloc_small_notenough(args);
- flen = 0;
- }
- *fbnop = fbno;
- *flenp = flen;
- *stat = 1;
- trace_xfs_alloc_small_done(args);
- return 0;
-
-error0:
- trace_xfs_alloc_small_error(args);
- return error;
-}
-
-/*
* Free the extent starting at agno/bno for length.
*/
STATIC int
@@ -2095,7 +2086,7 @@ xfs_free_agfl_block(
if (error)
return error;
- bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno, 0);
+ bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno);
if (!bp)
return -EFSCORRUPTED;
xfs_trans_binval(tp, bp);
@@ -2586,7 +2577,7 @@ static xfs_failaddr_t
xfs_agf_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
if (xfs_sb_version_hascrc(&mp->m_sb)) {
@@ -2644,7 +2635,7 @@ static void
xfs_agf_read_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -2661,7 +2652,7 @@ static void
xfs_agf_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_buf_log_item *bip = bp->b_log_item;
xfs_failaddr_t fa;
@@ -3146,7 +3137,7 @@ xfs_alloc_has_record(
/*
* Walk all the blocks in the AGFL. The @walk_fn can return any negative
- * error code or XFS_BTREE_QUERY_RANGE_ABORT.
+ * error code or XFS_ITER_*.
*/
int
xfs_agfl_walk(
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 9fe949f6055e..2a94543857a1 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -17,7 +17,6 @@
#include "xfs_extent_busy.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
#include "xfs_trans.h"
@@ -292,7 +291,7 @@ static xfs_failaddr_t
xfs_allocbt_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_perag *pag = bp->b_pag;
xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index c441f41f14e8..d48fcf11cc35 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -9,23 +9,18 @@
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_attr_sf.h"
#include "xfs_inode.h"
-#include "xfs_alloc.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
#include "xfs_bmap_btree.h"
#include "xfs_attr.h"
#include "xfs_attr_leaf.h"
#include "xfs_attr_remote.h"
-#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_trans_space.h"
#include "xfs_trace.h"
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
index 3b0dce06e454..ff28ebf3b635 100644
--- a/fs/xfs/libxfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -112,7 +112,13 @@ typedef struct xfs_attr_list_context {
struct xfs_inode *dp; /* inode */
struct attrlist_cursor_kern *cursor; /* position in list */
char *alist; /* output buffer */
- int seen_enough; /* T/F: seen enough of list? */
+
+ /*
+ * Abort attribute list iteration if non-zero. Can be used to pass
+ * error values to the xfs_attr_list caller.
+ */
+ int seen_enough;
+
ssize_t count; /* num used entries */
int dupcnt; /* count dup hashvals seen */
int bufsize; /* total buffer size */
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 1f6e3965ff74..70eb941d02e4 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -10,14 +10,12 @@
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
#include "xfs_bmap_btree.h"
#include "xfs_bmap.h"
#include "xfs_attr_sf.h"
@@ -27,7 +25,6 @@
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_buf_item.h"
-#include "xfs_cksum.h"
#include "xfs_dir2.h"
#include "xfs_log.h"
@@ -240,7 +237,7 @@ xfs_attr3_leaf_verify(
struct xfs_buf *bp)
{
struct xfs_attr3_icleaf_hdr ichdr;
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_attr_leafblock *leaf = bp->b_addr;
struct xfs_attr_leaf_entry *entries;
uint32_t end; /* must be 32bit - see below */
@@ -313,7 +310,7 @@ static void
xfs_attr3_leaf_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_buf_log_item *bip = bp->b_log_item;
struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr;
xfs_failaddr_t fa;
@@ -343,7 +340,7 @@ static void
xfs_attr3_leaf_read_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -865,7 +862,7 @@ xfs_attr_shortform_allfit(
struct xfs_attr3_icleaf_hdr leafhdr;
int bytes;
int i;
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
leaf = bp->b_addr;
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
@@ -1525,7 +1522,7 @@ xfs_attr_leaf_order(
{
struct xfs_attr3_icleaf_hdr ichdr1;
struct xfs_attr3_icleaf_hdr ichdr2;
- struct xfs_mount *mp = leaf1_bp->b_target->bt_mount;
+ struct xfs_mount *mp = leaf1_bp->b_mount;
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr1, leaf1_bp->b_addr);
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr2, leaf2_bp->b_addr);
@@ -2568,7 +2565,7 @@ xfs_attr_leaf_lasthash(
{
struct xfs_attr3_icleaf_hdr ichdr;
struct xfs_attr_leaf_entry *entries;
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, bp->b_addr);
entries = xfs_attr3_leaf_entryp(bp->b_addr);
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 65ff600a8067..4eb30d357045 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -16,18 +16,10 @@
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_inode.h"
-#include "xfs_alloc.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
#include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_attr_remote.h"
-#include "xfs_trans_space.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
-#include "xfs_buf_item.h"
#include "xfs_error.h"
#define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */
@@ -111,7 +103,7 @@ __xfs_attr3_rmt_read_verify(
bool check_crc,
xfs_failaddr_t *failaddr)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
char *ptr;
int len;
xfs_daddr_t bno;
@@ -175,7 +167,7 @@ static void
xfs_attr3_rmt_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
int blksize = mp->m_attr_geo->blksize;
char *ptr;
@@ -535,7 +527,7 @@ xfs_attr_rmtval_set(
dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
- bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0);
+ bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt);
if (!bp)
return -ENOMEM;
bp->b_ops = &xfs_attr3_rmt_buf_ops;
diff --git a/fs/xfs/libxfs/xfs_bit.c b/fs/xfs/libxfs/xfs_bit.c
index 40ce5f3094d1..7071ff98fdbc 100644
--- a/fs/xfs/libxfs/xfs_bit.c
+++ b/fs/xfs/libxfs/xfs_bit.c
@@ -5,7 +5,6 @@
*/
#include "xfs.h"
#include "xfs_log_format.h"
-#include "xfs_bit.h"
/*
* XFS bit manipulation routines, used in non-realtime code.
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 356ebd1cbe82..baf0b72c0a37 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -13,14 +13,10 @@
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_dir2.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_extfree_item.h"
#include "xfs_alloc.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
@@ -32,7 +28,6 @@
#include "xfs_trans_space.h"
#include "xfs_buf_item.h"
#include "xfs_trace.h"
-#include "xfs_symlink.h"
#include "xfs_attr_leaf.h"
#include "xfs_filestream.h"
#include "xfs_rmap.h"
@@ -370,7 +365,7 @@ xfs_bmap_check_leaf_extents(
bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
if (!bp) {
bp_release = 1;
- error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
+ error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
if (error)
@@ -454,7 +449,7 @@ xfs_bmap_check_leaf_extents(
bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
if (!bp) {
bp_release = 1;
- error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
+ error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
if (error)
@@ -619,7 +614,7 @@ xfs_bmap_btree_to_extents(
XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
xfs_btree_check_lptr(cur, cbno, 1));
#endif
- error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF,
+ error = xfs_btree_read_bufl(mp, tp, cbno, &cbp, XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
if (error)
return error;
@@ -732,7 +727,7 @@ xfs_bmap_extents_to_btree(
cur->bc_private.b.allocated++;
ip->i_d.di_nblocks++;
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
- abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
+ abp = xfs_btree_get_bufl(mp, tp, args.fsbno);
if (!abp) {
error = -EFSCORRUPTED;
goto out_unreserve_dquot;
@@ -878,7 +873,7 @@ xfs_bmap_local_to_extents(
ASSERT(args.fsbno != NULLFSBLOCK);
ASSERT(args.len == 1);
tp->t_firstblock = args.fsbno;
- bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
+ bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno);
/*
* Initialize the block, copy the data and log the remote buffer.
@@ -1203,7 +1198,7 @@ xfs_iread_extents(
* pointer (leftmost) at each level.
*/
while (level-- > 0) {
- error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+ error = xfs_btree_read_bufl(mp, tp, bno, &bp,
XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
if (error)
goto out;
@@ -1276,7 +1271,7 @@ xfs_iread_extents(
*/
if (bno == NULLFSBLOCK)
break;
- error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+ error = xfs_btree_read_bufl(mp, tp, bno, &bp,
XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
if (error)
goto out;
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index aff82ed112c9..fbb18ba5d905 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -11,10 +11,8 @@
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
#include "xfs_alloc.h"
#include "xfs_btree.h"
#include "xfs_bmap_btree.h"
@@ -22,7 +20,6 @@
#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
#include "xfs_rmap.h"
/*
@@ -411,7 +408,7 @@ static xfs_failaddr_t
xfs_bmbt_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
xfs_failaddr_t fa;
unsigned int level;
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index bbdae2b4559f..f1048efa4268 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -11,16 +11,13 @@
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
#include "xfs_buf_item.h"
#include "xfs_btree.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
#include "xfs_alloc.h"
#include "xfs_log.h"
@@ -276,7 +273,7 @@ xfs_btree_lblock_calc_crc(
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_buf_log_item *bip = bp->b_log_item;
- if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+ if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb))
return;
if (bip)
block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
@@ -288,7 +285,7 @@ xfs_btree_lblock_verify_crc(
struct xfs_buf *bp)
{
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
if (xfs_sb_version_hascrc(&mp->m_sb)) {
if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
@@ -314,7 +311,7 @@ xfs_btree_sblock_calc_crc(
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_buf_log_item *bip = bp->b_log_item;
- if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+ if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb))
return;
if (bip)
block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
@@ -326,7 +323,7 @@ xfs_btree_sblock_verify_crc(
struct xfs_buf *bp)
{
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
if (xfs_sb_version_hascrc(&mp->m_sb)) {
if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
@@ -691,14 +688,13 @@ xfs_buf_t * /* buffer for fsbno */
xfs_btree_get_bufl(
xfs_mount_t *mp, /* file system mount point */
xfs_trans_t *tp, /* transaction pointer */
- xfs_fsblock_t fsbno, /* file system block number */
- uint lock) /* lock flags for get_buf */
+ xfs_fsblock_t fsbno) /* file system block number */
{
xfs_daddr_t d; /* real disk block address */
ASSERT(fsbno != NULLFSBLOCK);
d = XFS_FSB_TO_DADDR(mp, fsbno);
- return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
+ return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0);
}
/*
@@ -710,15 +706,14 @@ xfs_btree_get_bufs(
xfs_mount_t *mp, /* file system mount point */
xfs_trans_t *tp, /* transaction pointer */
xfs_agnumber_t agno, /* allocation group number */
- xfs_agblock_t agbno, /* allocation group block number */
- uint lock) /* lock flags for get_buf */
+ xfs_agblock_t agbno) /* allocation group block number */
{
xfs_daddr_t d; /* real disk block address */
ASSERT(agno != NULLAGNUMBER);
ASSERT(agbno != NULLAGBLOCK);
d = XFS_AGB_TO_DADDR(mp, agno, agbno);
- return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
+ return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0);
}
/*
@@ -845,7 +840,6 @@ xfs_btree_read_bufl(
struct xfs_mount *mp, /* file system mount point */
struct xfs_trans *tp, /* transaction pointer */
xfs_fsblock_t fsbno, /* file system block number */
- uint lock, /* lock flags for read_buf */
struct xfs_buf **bpp, /* buffer for fsbno */
int refval, /* ref count value for buffer */
const struct xfs_buf_ops *ops)
@@ -858,7 +852,7 @@ xfs_btree_read_bufl(
return -EFSCORRUPTED;
d = XFS_FSB_TO_DADDR(mp, fsbno);
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
- mp->m_bsize, lock, &bp, ops);
+ mp->m_bsize, 0, &bp, ops);
if (error)
return error;
if (bp)
@@ -1185,11 +1179,10 @@ xfs_btree_init_block(
xfs_btnum_t btnum,
__u16 level,
__u16 numrecs,
- __u64 owner,
- unsigned int flags)
+ __u64 owner)
{
xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
- btnum, level, numrecs, owner, flags);
+ btnum, level, numrecs, owner, 0);
}
STATIC void
@@ -1288,7 +1281,6 @@ STATIC int
xfs_btree_get_buf_block(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *ptr,
- int flags,
struct xfs_btree_block **block,
struct xfs_buf **bpp)
{
@@ -1296,14 +1288,11 @@ xfs_btree_get_buf_block(
xfs_daddr_t d;
int error;
- /* need to sort out how callers deal with failures first */
- ASSERT(!(flags & XBF_TRYLOCK));
-
error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
if (error)
return error;
*bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
- mp->m_bsize, flags);
+ mp->m_bsize, 0);
if (!*bpp)
return -ENOMEM;
@@ -2706,7 +2695,7 @@ __xfs_btree_split(
XFS_BTREE_STATS_INC(cur, alloc);
/* Set up the new block as "right". */
- error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
+ error = xfs_btree_get_buf_block(cur, &rptr, &right, &rbp);
if (error)
goto error0;
@@ -2961,7 +2950,7 @@ xfs_btree_new_iroot(
XFS_BTREE_STATS_INC(cur, alloc);
/* Copy the root into a real block. */
- error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
+ error = xfs_btree_get_buf_block(cur, &nptr, &cblock, &cbp);
if (error)
goto error0;
@@ -3058,7 +3047,7 @@ xfs_btree_new_root(
XFS_BTREE_STATS_INC(cur, alloc);
/* Set up the new block. */
- error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
+ error = xfs_btree_get_buf_block(cur, &lptr, &new, &nbp);
if (error)
goto error0;
@@ -4433,7 +4422,7 @@ xfs_btree_lblock_v5hdr_verify(
struct xfs_buf *bp,
uint64_t owner)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
if (!xfs_sb_version_hascrc(&mp->m_sb))
@@ -4454,7 +4443,7 @@ xfs_btree_lblock_verify(
struct xfs_buf *bp,
unsigned int max_recs)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
/* numrecs verification */
@@ -4484,7 +4473,7 @@ xfs_failaddr_t
xfs_btree_sblock_v5hdr_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_perag *pag = bp->b_pag;
@@ -4510,7 +4499,7 @@ xfs_btree_sblock_verify(
struct xfs_buf *bp,
unsigned int max_recs)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
xfs_agblock_t agno;
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index e3b3e9dce5da..fa3cd8ab9aba 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -301,8 +301,7 @@ struct xfs_buf * /* buffer for fsbno */
xfs_btree_get_bufl(
struct xfs_mount *mp, /* file system mount point */
struct xfs_trans *tp, /* transaction pointer */
- xfs_fsblock_t fsbno, /* file system block number */
- uint lock); /* lock flags for get_buf */
+ xfs_fsblock_t fsbno); /* file system block number */
/*
* Get a buffer for the block, return it with no data read.
@@ -313,8 +312,7 @@ xfs_btree_get_bufs(
struct xfs_mount *mp, /* file system mount point */
struct xfs_trans *tp, /* transaction pointer */
xfs_agnumber_t agno, /* allocation group number */
- xfs_agblock_t agbno, /* allocation group block number */
- uint lock); /* lock flags for get_buf */
+ xfs_agblock_t agbno); /* allocation group block number */
/*
* Check for the cursor referring to the last block at the given level.
@@ -345,7 +343,6 @@ xfs_btree_read_bufl(
struct xfs_mount *mp, /* file system mount point */
struct xfs_trans *tp, /* transaction pointer */
xfs_fsblock_t fsbno, /* file system block number */
- uint lock, /* lock flags for read_buf */
struct xfs_buf **bpp, /* buffer for fsbno */
int refval, /* ref count value for buffer */
const struct xfs_buf_ops *ops);
@@ -383,8 +380,7 @@ xfs_btree_init_block(
xfs_btnum_t btnum,
__u16 level,
__u16 numrecs,
- __u64 owner,
- unsigned int flags);
+ __u64 owner);
void
xfs_btree_init_block_int(
@@ -469,8 +465,8 @@ uint xfs_btree_compute_maxlevels(uint *limits, unsigned long len);
unsigned long long xfs_btree_calc_size(uint *limits, unsigned long long len);
/* return codes */
-#define XFS_BTREE_QUERY_RANGE_CONTINUE 0 /* keep iterating */
-#define XFS_BTREE_QUERY_RANGE_ABORT 1 /* stop iterating */
+#define XFS_BTREE_QUERY_RANGE_CONTINUE (XFS_ITER_CONTINUE) /* keep iterating */
+#define XFS_BTREE_QUERY_RANGE_ABORT (XFS_ITER_ABORT) /* stop iterating */
typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur,
union xfs_btree_rec *rec, void *priv);
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index e2737e2ac2ae..d1c77fd0815d 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -12,20 +12,14 @@
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_alloc.h"
#include "xfs_bmap.h"
-#include "xfs_attr.h"
#include "xfs_attr_leaf.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
#include "xfs_buf_item.h"
#include "xfs_log.h"
@@ -126,7 +120,7 @@ xfs_da3_blkinfo_verify(
struct xfs_buf *bp,
struct xfs_da3_blkinfo *hdr3)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_da_blkinfo *hdr = &hdr3->hdr;
if (!xfs_verify_magic16(bp, hdr->magic))
@@ -148,7 +142,7 @@ static xfs_failaddr_t
xfs_da3_node_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_da_intnode *hdr = bp->b_addr;
struct xfs_da3_icnode_hdr ichdr;
const struct xfs_dir_ops *ops;
@@ -186,7 +180,7 @@ static void
xfs_da3_node_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_buf_log_item *bip = bp->b_log_item;
struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c
index b39053dcb643..b1ae572496b6 100644
--- a/fs/xfs/libxfs/xfs_da_format.c
+++ b/fs/xfs/libxfs/xfs_da_format.c
@@ -11,11 +11,8 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_dir2.h"
-#include "xfs_dir2_priv.h"
/*
* Shortform directory ops
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 1c6bf2105939..eb2be2a6a25a 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -9,8 +9,6 @@
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_trans.h"
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 156ce95c9c45..67840723edbb 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -5,20 +5,16 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
-#include "xfs_ialloc.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_trace.h"
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index b7d6d78f4ce2..a6fb0cc2085e 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -6,22 +6,19 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_buf_item.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
#include "xfs_log.h"
/*
@@ -50,7 +47,7 @@ static xfs_failaddr_t
xfs_dir3_block_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
if (!xfs_verify_magic(bp, hdr3->magic))
@@ -71,7 +68,7 @@ static void
xfs_dir3_block_read_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -88,7 +85,7 @@ static void
xfs_dir3_block_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_buf_log_item *bip = bp->b_log_item;
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index b7b9ce002cb9..2c79be4c3153 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -6,19 +6,16 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_dir2.h"
-#include "xfs_dir2_priv.h"
#include "xfs_error.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
-#include "xfs_cksum.h"
#include "xfs_log.h"
static xfs_failaddr_t xfs_dir2_data_freefind_verify(
@@ -50,14 +47,13 @@ __xfs_dir3_data_check(
int i; /* leaf index */
int lastfree; /* last entry was unused */
xfs_dir2_leaf_entry_t *lep=NULL; /* block leaf entries */
- xfs_mount_t *mp; /* filesystem mount point */
+ struct xfs_mount *mp = bp->b_mount;
char *p; /* current data position */
int stale; /* count of stale leaves */
struct xfs_name name;
const struct xfs_dir_ops *ops;
struct xfs_da_geometry *geo;
- mp = bp->b_target->bt_mount;
geo = mp->m_dir_geo;
/*
@@ -249,7 +245,7 @@ static xfs_failaddr_t
xfs_dir3_data_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
if (!xfs_verify_magic(bp, hdr3->magic))
@@ -298,7 +294,7 @@ static void
xfs_dir3_data_read_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -315,7 +311,7 @@ static void
xfs_dir3_data_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_buf_log_item *bip = bp->b_log_item;
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index 9c2a0a13ed61..a53e4585a2f3 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -6,12 +6,11 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
#include "xfs_dir2.h"
@@ -20,8 +19,6 @@
#include "xfs_trace.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
-#include "xfs_cksum.h"
-#include "xfs_log.h"
/*
* Local function declarations.
@@ -144,7 +141,7 @@ static xfs_failaddr_t
xfs_dir3_leaf_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_dir2_leaf *leaf = bp->b_addr;
xfs_failaddr_t fa;
@@ -159,7 +156,7 @@ static void
xfs_dir3_leaf_read_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -176,7 +173,7 @@ static void
xfs_dir3_leaf_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_buf_log_item *bip = bp->b_log_item;
struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 16731d2d684b..afcc6642690a 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -6,12 +6,11 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
#include "xfs_dir2.h"
@@ -20,7 +19,6 @@
#include "xfs_trace.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
-#include "xfs_cksum.h"
#include "xfs_log.h"
/*
@@ -84,7 +82,7 @@ static xfs_failaddr_t
xfs_dir3_free_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_dir2_free_hdr *hdr = bp->b_addr;
if (!xfs_verify_magic(bp, hdr->magic))
@@ -110,7 +108,7 @@ static void
xfs_dir3_free_read_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -127,7 +125,7 @@ static void
xfs_dir3_free_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_buf_log_item *bip = bp->b_log_item;
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index 585dfdb7b6b6..033589257f54 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -5,16 +5,13 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_error.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_trace.h"
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index 88fa11071f9f..e8bd688a4073 100644
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -16,8 +16,6 @@
#include "xfs_trans.h"
#include "xfs_qm.h"
#include "xfs_error.h"
-#include "xfs_cksum.h"
-#include "xfs_trace.h"
int
xfs_calc_dquots_per_chunk(
@@ -224,7 +222,7 @@ static xfs_failaddr_t
xfs_dquot_buf_verify_struct(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
return xfs_dquot_buf_verify(mp, bp, false);
}
@@ -233,7 +231,7 @@ static void
xfs_dquot_buf_read_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
if (!xfs_dquot_buf_verify_crc(mp, bp, false))
return;
@@ -250,7 +248,7 @@ static void
xfs_dquot_buf_readahead_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
if (!xfs_dquot_buf_verify_crc(mp, bp, true) ||
xfs_dquot_buf_verify(mp, bp, true) != NULL) {
@@ -268,7 +266,7 @@ static void
xfs_dquot_buf_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_dquot_buf_verify(mp, bp, false);
}
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 9bb3c48843ec..c968b60cee15 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -1071,7 +1071,7 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
#define XFS_INO_MASK(k) (uint32_t)((1ULL << (k)) - 1)
#define XFS_INO_OFFSET_BITS(mp) (mp)->m_sb.sb_inopblog
#define XFS_INO_AGBNO_BITS(mp) (mp)->m_sb.sb_agblklog
-#define XFS_INO_AGINO_BITS(mp) (mp)->m_agino_log
+#define XFS_INO_AGINO_BITS(mp) ((mp)->m_ino_geo.agino_log)
#define XFS_INO_AGNO_BITS(mp) (mp)->m_agno_log
#define XFS_INO_BITS(mp) \
XFS_INO_AGNO_BITS(mp) + XFS_INO_AGINO_BITS(mp)
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index e7382c780ed7..52d03a3a02a4 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -97,7 +97,7 @@ struct getbmapx {
* For use by backup and restore programs to set the XFS on-disk inode
* fields di_dmevmask and di_dmstate. These must be set to exactly and
* only values previously obtained via xfs_bulkstat! (Specifically the
- * xfs_bstat_t fields bs_dmevmask and bs_dmstate.)
+ * struct xfs_bstat fields bs_dmevmask and bs_dmstate.)
*/
#ifndef HAVE_FSDMIDATA
struct fsdmidata {
@@ -328,7 +328,7 @@ typedef struct xfs_bstime {
__s32 tv_nsec; /* and nanoseconds */
} xfs_bstime_t;
-typedef struct xfs_bstat {
+struct xfs_bstat {
__u64 bs_ino; /* inode number */
__u16 bs_mode; /* type and mode */
__u16 bs_nlink; /* number of links */
@@ -356,7 +356,53 @@ typedef struct xfs_bstat {
__u32 bs_dmevmask; /* DMIG event mask */
__u16 bs_dmstate; /* DMIG state info */
__u16 bs_aextents; /* attribute number of extents */
-} xfs_bstat_t;
+};
+
+/* New bulkstat structure that reports v5 features and fixes padding issues */
+struct xfs_bulkstat {
+ uint64_t bs_ino; /* inode number */
+ uint64_t bs_size; /* file size */
+
+ uint64_t bs_blocks; /* number of blocks */
+ uint64_t bs_xflags; /* extended flags */
+
+ uint64_t bs_atime; /* access time, seconds */
+ uint64_t bs_mtime; /* modify time, seconds */
+
+ uint64_t bs_ctime; /* inode change time, seconds */
+ uint64_t bs_btime; /* creation time, seconds */
+
+ uint32_t bs_gen; /* generation count */
+ uint32_t bs_uid; /* user id */
+ uint32_t bs_gid; /* group id */
+ uint32_t bs_projectid; /* project id */
+
+ uint32_t bs_atime_nsec; /* access time, nanoseconds */
+ uint32_t bs_mtime_nsec; /* modify time, nanoseconds */
+ uint32_t bs_ctime_nsec; /* inode change time, nanoseconds */
+ uint32_t bs_btime_nsec; /* creation time, nanoseconds */
+
+ uint32_t bs_blksize; /* block size */
+ uint32_t bs_rdev; /* device value */
+ uint32_t bs_cowextsize_blks; /* cow extent size hint, blocks */
+ uint32_t bs_extsize_blks; /* extent size hint, blocks */
+
+ uint32_t bs_nlink; /* number of links */
+ uint32_t bs_extents; /* number of extents */
+ uint32_t bs_aextents; /* attribute number of extents */
+ uint16_t bs_version; /* structure version */
+ uint16_t bs_forkoff; /* inode fork offset in bytes */
+
+ uint16_t bs_sick; /* sick inode metadata */
+ uint16_t bs_checked; /* checked inode metadata */
+ uint16_t bs_mode; /* type and mode */
+ uint16_t bs_pad2; /* zeroed */
+
+ uint64_t bs_pad[7]; /* zeroed */
+};
+
+#define XFS_BULKSTAT_VERSION_V1 (1)
+#define XFS_BULKSTAT_VERSION_V5 (5)
/* bs_sick flags */
#define XFS_BS_SICK_INODE (1 << 0) /* inode core */
@@ -374,7 +420,7 @@ typedef struct xfs_bstat {
* to retain compatibility with "old" filesystems).
*/
static inline uint32_t
-bstat_get_projid(struct xfs_bstat *bs)
+bstat_get_projid(const struct xfs_bstat *bs)
{
return (uint32_t)bs->bs_projid_hi << 16 | bs->bs_projid_lo;
}
@@ -382,23 +428,79 @@ bstat_get_projid(struct xfs_bstat *bs)
/*
* The user-level BulkStat Request interface structure.
*/
-typedef struct xfs_fsop_bulkreq {
+struct xfs_fsop_bulkreq {
__u64 __user *lastip; /* last inode # pointer */
__s32 icount; /* count of entries in buffer */
void __user *ubuffer;/* user buffer for inode desc. */
__s32 __user *ocount; /* output count pointer */
-} xfs_fsop_bulkreq_t;
-
+};
/*
* Structures returned from xfs_inumbers routine (XFS_IOC_FSINUMBERS).
*/
-typedef struct xfs_inogrp {
+struct xfs_inogrp {
__u64 xi_startino; /* starting inode number */
__s32 xi_alloccount; /* # bits set in allocmask */
__u64 xi_allocmask; /* mask of allocated inodes */
-} xfs_inogrp_t;
+};
+/* New inumbers structure that reports v5 features and fixes padding issues */
+struct xfs_inumbers {
+ uint64_t xi_startino; /* starting inode number */
+ uint64_t xi_allocmask; /* mask of allocated inodes */
+ uint8_t xi_alloccount; /* # bits set in allocmask */
+ uint8_t xi_version; /* version */
+ uint8_t xi_padding[6]; /* zero */
+};
+
+#define XFS_INUMBERS_VERSION_V1 (1)
+#define XFS_INUMBERS_VERSION_V5 (5)
+
+/* Header for bulk inode requests. */
+struct xfs_bulk_ireq {
+ uint64_t ino; /* I/O: start with this inode */
+ uint32_t flags; /* I/O: operation flags */
+ uint32_t icount; /* I: count of entries in buffer */
+ uint32_t ocount; /* O: count of entries filled out */
+ uint32_t agno; /* I: see comment for IREQ_AGNO */
+ uint64_t reserved[5]; /* must be zero */
+};
+
+/*
+ * Only return results from the specified @agno. If @ino is zero, start
+ * with the first inode of @agno.
+ */
+#define XFS_BULK_IREQ_AGNO (1 << 0)
+
+/*
+ * Return bulkstat information for a single inode, where @ino value is a
+ * special value, not a literal inode number. See the XFS_BULK_IREQ_SPECIAL_*
+ * values below. Not compatible with XFS_BULK_IREQ_AGNO.
+ */
+#define XFS_BULK_IREQ_SPECIAL (1 << 1)
+
+#define XFS_BULK_IREQ_FLAGS_ALL (XFS_BULK_IREQ_AGNO | \
+ XFS_BULK_IREQ_SPECIAL)
+
+/* Operate on the root directory inode. */
+#define XFS_BULK_IREQ_SPECIAL_ROOT (1)
+
+/*
+ * ioctl structures for v5 bulkstat and inumbers requests
+ */
+struct xfs_bulkstat_req {
+ struct xfs_bulk_ireq hdr;
+ struct xfs_bulkstat bulkstat[];
+};
+#define XFS_BULKSTAT_REQ_SIZE(nr) (sizeof(struct xfs_bulkstat_req) + \
+ (nr) * sizeof(struct xfs_bulkstat))
+
+struct xfs_inumbers_req {
+ struct xfs_bulk_ireq hdr;
+ struct xfs_inumbers inumbers[];
+};
+#define XFS_INUMBERS_REQ_SIZE(nr) (sizeof(struct xfs_inumbers_req) + \
+ (nr) * sizeof(struct xfs_inumbers))
/*
* Error injection.
@@ -529,7 +631,7 @@ typedef struct xfs_swapext
xfs_off_t sx_offset; /* offset into file */
xfs_off_t sx_length; /* leng from offset */
char sx_pad[16]; /* pad space, unused */
- xfs_bstat_t sx_stat; /* stat of target b4 copy */
+ struct xfs_bstat sx_stat; /* stat of target b4 copy */
} xfs_swapext_t;
/*
@@ -701,6 +803,8 @@ struct xfs_scrub_metadata {
#define XFS_IOC_FSGEOMETRY_V4 _IOR ('X', 124, struct xfs_fsop_geom_v4)
#define XFS_IOC_GOINGDOWN _IOR ('X', 125, uint32_t)
#define XFS_IOC_FSGEOMETRY _IOR ('X', 126, struct xfs_fsop_geom)
+#define XFS_IOC_BULKSTAT _IOR ('X', 127, struct xfs_bulkstat_req)
+#define XFS_IOC_INUMBERS _IOR ('X', 128, struct xfs_inumbers_req)
/* XFS_IOC_GETFSUUID ---------- deprecated 140 */
diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h
index 49ddfeac19f2..272005ac8c88 100644
--- a/fs/xfs/libxfs/xfs_health.h
+++ b/fs/xfs/libxfs/xfs_health.h
@@ -185,6 +185,6 @@ xfs_inode_is_healthy(struct xfs_inode *ip)
void xfs_fsop_geom_health(struct xfs_mount *mp, struct xfs_fsop_geom *geo);
void xfs_ag_geom_health(struct xfs_perag *pag, struct xfs_ag_geometry *ageo);
-void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bstat *bs);
+void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bulkstat *bs);
#endif /* __XFS_HEALTH_H__ */
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index fe9898875097..04377ab75863 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -12,17 +12,14 @@
#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_ialloc.h"
#include "xfs_ialloc_btree.h"
#include "xfs_alloc.h"
-#include "xfs_rtalloc.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_bmap.h"
-#include "xfs_cksum.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_icreate_item.h"
@@ -31,20 +28,6 @@
#include "xfs_log.h"
#include "xfs_rmap.h"
-
-/*
- * Allocation group level functions.
- */
-int
-xfs_ialloc_cluster_alignment(
- struct xfs_mount *mp)
-{
- if (xfs_sb_version_hasalign(&mp->m_sb) &&
- mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
- return mp->m_sb.sb_inoalignmt;
- return 1;
-}
-
/*
* Lookup a record by ino in the btree given by cur.
*/
@@ -299,7 +282,7 @@ xfs_ialloc_inode_init(
* sizes, manipulate the inodes in buffers which are multiples of the
* blocks size.
*/
- nbufs = length / mp->m_blocks_per_cluster;
+ nbufs = length / M_IGEO(mp)->blocks_per_cluster;
/*
* Figure out what version number to use in the inodes we create. If
@@ -343,9 +326,10 @@ xfs_ialloc_inode_init(
* Get the block.
*/
d = XFS_AGB_TO_DADDR(mp, agno, agbno +
- (j * mp->m_blocks_per_cluster));
+ (j * M_IGEO(mp)->blocks_per_cluster));
fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
- mp->m_bsize * mp->m_blocks_per_cluster,
+ mp->m_bsize *
+ M_IGEO(mp)->blocks_per_cluster,
XBF_UNMAPPED);
if (!fbuf)
return -ENOMEM;
@@ -353,7 +337,7 @@ xfs_ialloc_inode_init(
/* Initialize the inode buffers and log them appropriately. */
fbuf->b_ops = &xfs_inode_buf_ops;
xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length));
- for (i = 0; i < mp->m_inodes_per_cluster; i++) {
+ for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) {
int ioffset = i << mp->m_sb.sb_inodelog;
uint isize = xfs_dinode_size(version);
@@ -616,24 +600,26 @@ error:
* Allocate new inodes in the allocation group specified by agbp.
* Return 0 for success, else error code.
*/
-STATIC int /* error code or 0 */
+STATIC int
xfs_ialloc_ag_alloc(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_buf_t *agbp, /* alloc group buffer */
- int *alloc)
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ int *alloc)
{
- xfs_agi_t *agi; /* allocation group header */
- xfs_alloc_arg_t args; /* allocation argument structure */
- xfs_agnumber_t agno;
- int error;
- xfs_agino_t newino; /* new first inode's number */
- xfs_agino_t newlen; /* new number of inodes */
- int isaligned = 0; /* inode allocation at stripe unit */
- /* boundary */
- uint16_t allocmask = (uint16_t) -1; /* init. to full chunk */
+ struct xfs_agi *agi;
+ struct xfs_alloc_arg args;
+ xfs_agnumber_t agno;
+ int error;
+ xfs_agino_t newino; /* new first inode's number */
+ xfs_agino_t newlen; /* new number of inodes */
+ int isaligned = 0; /* inode allocation at stripe */
+ /* unit boundary */
+ /* init. to full chunk */
+ uint16_t allocmask = (uint16_t) -1;
struct xfs_inobt_rec_incore rec;
- struct xfs_perag *pag;
- int do_sparse = 0;
+ struct xfs_perag *pag;
+ struct xfs_ino_geometry *igeo = M_IGEO(tp->t_mountp);
+ int do_sparse = 0;
memset(&args, 0, sizeof(args));
args.tp = tp;
@@ -644,7 +630,7 @@ xfs_ialloc_ag_alloc(
#ifdef DEBUG
/* randomly do sparse inode allocations */
if (xfs_sb_version_hassparseinodes(&tp->t_mountp->m_sb) &&
- args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks)
+ igeo->ialloc_min_blks < igeo->ialloc_blks)
do_sparse = prandom_u32() & 1;
#endif
@@ -652,12 +638,12 @@ xfs_ialloc_ag_alloc(
* Locking will ensure that we don't have two callers in here
* at one time.
*/
- newlen = args.mp->m_ialloc_inos;
- if (args.mp->m_maxicount &&
+ newlen = igeo->ialloc_inos;
+ if (igeo->maxicount &&
percpu_counter_read_positive(&args.mp->m_icount) + newlen >
- args.mp->m_maxicount)
+ igeo->maxicount)
return -ENOSPC;
- args.minlen = args.maxlen = args.mp->m_ialloc_blks;
+ args.minlen = args.maxlen = igeo->ialloc_blks;
/*
* First try to allocate inodes contiguous with the last-allocated
* chunk of inodes. If the filesystem is striped, this will fill
@@ -667,7 +653,7 @@ xfs_ialloc_ag_alloc(
newino = be32_to_cpu(agi->agi_newino);
agno = be32_to_cpu(agi->agi_seqno);
args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
- args.mp->m_ialloc_blks;
+ igeo->ialloc_blks;
if (do_sparse)
goto sparse_alloc;
if (likely(newino != NULLAGINO &&
@@ -690,10 +676,10 @@ xfs_ialloc_ag_alloc(
* but not to use them in the actual exact allocation.
*/
args.alignment = 1;
- args.minalignslop = args.mp->m_cluster_align - 1;
+ args.minalignslop = igeo->cluster_align - 1;
/* Allow space for the inode btree to split. */
- args.minleft = args.mp->m_in_maxlevels - 1;
+ args.minleft = igeo->inobt_maxlevels - 1;
if ((error = xfs_alloc_vextent(&args)))
return error;
@@ -720,12 +706,12 @@ xfs_ialloc_ag_alloc(
* pieces, so don't need alignment anyway.
*/
isaligned = 0;
- if (args.mp->m_sinoalign) {
+ if (igeo->ialloc_align) {
ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
args.alignment = args.mp->m_dalign;
isaligned = 1;
} else
- args.alignment = args.mp->m_cluster_align;
+ args.alignment = igeo->cluster_align;
/*
* Need to figure out where to allocate the inode blocks.
* Ideally they should be spaced out through the a.g.
@@ -741,7 +727,7 @@ xfs_ialloc_ag_alloc(
/*
* Allow space for the inode btree to split.
*/
- args.minleft = args.mp->m_in_maxlevels - 1;
+ args.minleft = igeo->inobt_maxlevels - 1;
if ((error = xfs_alloc_vextent(&args)))
return error;
}
@@ -754,7 +740,7 @@ xfs_ialloc_ag_alloc(
args.type = XFS_ALLOCTYPE_NEAR_BNO;
args.agbno = be32_to_cpu(agi->agi_root);
args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
- args.alignment = args.mp->m_cluster_align;
+ args.alignment = igeo->cluster_align;
if ((error = xfs_alloc_vextent(&args)))
return error;
}
@@ -764,7 +750,7 @@ xfs_ialloc_ag_alloc(
* the sparse allocation length is smaller than a full chunk.
*/
if (xfs_sb_version_hassparseinodes(&args.mp->m_sb) &&
- args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks &&
+ igeo->ialloc_min_blks < igeo->ialloc_blks &&
args.fsbno == NULLFSBLOCK) {
sparse_alloc:
args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -773,7 +759,7 @@ sparse_alloc:
args.alignment = args.mp->m_sb.sb_spino_align;
args.prod = 1;
- args.minlen = args.mp->m_ialloc_min_blks;
+ args.minlen = igeo->ialloc_min_blks;
args.maxlen = args.minlen;
/*
@@ -789,7 +775,7 @@ sparse_alloc:
args.min_agbno = args.mp->m_sb.sb_inoalignmt;
args.max_agbno = round_down(args.mp->m_sb.sb_agblocks,
args.mp->m_sb.sb_inoalignmt) -
- args.mp->m_ialloc_blks;
+ igeo->ialloc_blks;
error = xfs_alloc_vextent(&args);
if (error)
@@ -1006,7 +992,7 @@ xfs_ialloc_ag_select(
* space needed for alignment of inode chunks when checking the
* longest contiguous free space in the AG - this prevents us
* from getting ENOSPC because we have free space larger than
- * m_ialloc_blks but alignment constraints prevent us from using
+ * ialloc_blks but alignment constraints prevent us from using
* it.
*
* If we can't find an AG with space for full alignment slack to
@@ -1015,9 +1001,9 @@ xfs_ialloc_ag_select(
* if we fail allocation due to alignment issues then it is most
* likely a real ENOSPC condition.
*/
- ineed = mp->m_ialloc_min_blks;
+ ineed = M_IGEO(mp)->ialloc_min_blks;
if (flags && ineed > 1)
- ineed += mp->m_cluster_align;
+ ineed += M_IGEO(mp)->cluster_align;
longest = pag->pagf_longest;
if (!longest)
longest = pag->pagf_flcount > 0;
@@ -1703,6 +1689,7 @@ xfs_dialloc(
int noroom = 0;
xfs_agnumber_t start_agno;
struct xfs_perag *pag;
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
int okalloc = 1;
if (*IO_agbp) {
@@ -1733,9 +1720,9 @@ xfs_dialloc(
* Read rough value of mp->m_icount by percpu_counter_read_positive,
* which will sacrifice the preciseness but improve the performance.
*/
- if (mp->m_maxicount &&
- percpu_counter_read_positive(&mp->m_icount) + mp->m_ialloc_inos
- > mp->m_maxicount) {
+ if (igeo->maxicount &&
+ percpu_counter_read_positive(&mp->m_icount) + igeo->ialloc_inos
+ > igeo->maxicount) {
noroom = 1;
okalloc = 0;
}
@@ -1852,7 +1839,8 @@ xfs_difree_inode_chunk(
if (!xfs_inobt_issparse(rec->ir_holemask)) {
/* not sparse, calculate extent info directly */
xfs_bmap_add_free(tp, XFS_AGB_TO_FSB(mp, agno, sagbno),
- mp->m_ialloc_blks, &XFS_RMAP_OINFO_INODES);
+ M_IGEO(mp)->ialloc_blks,
+ &XFS_RMAP_OINFO_INODES);
return;
}
@@ -2261,7 +2249,7 @@ xfs_imap_lookup(
/* check that the returned record contains the required inode */
if (rec.ir_startino > agino ||
- rec.ir_startino + mp->m_ialloc_inos <= agino)
+ rec.ir_startino + M_IGEO(mp)->ialloc_inos <= agino)
return -EINVAL;
/* for untrusted inodes check it is allocated first */
@@ -2352,7 +2340,7 @@ xfs_imap(
* If the inode cluster size is the same as the blocksize or
* smaller we get to the buffer by simple arithmetics.
*/
- if (mp->m_blocks_per_cluster == 1) {
+ if (M_IGEO(mp)->blocks_per_cluster == 1) {
offset = XFS_INO_TO_OFFSET(mp, ino);
ASSERT(offset < mp->m_sb.sb_inopblock);
@@ -2368,8 +2356,8 @@ xfs_imap(
* find the location. Otherwise we have to do a btree
* lookup to find the location.
*/
- if (mp->m_inoalign_mask) {
- offset_agbno = agbno & mp->m_inoalign_mask;
+ if (M_IGEO(mp)->inoalign_mask) {
+ offset_agbno = agbno & M_IGEO(mp)->inoalign_mask;
chunk_agbno = agbno - offset_agbno;
} else {
error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
@@ -2381,13 +2369,13 @@ xfs_imap(
out_map:
ASSERT(agbno >= chunk_agbno);
cluster_agbno = chunk_agbno +
- ((offset_agbno / mp->m_blocks_per_cluster) *
- mp->m_blocks_per_cluster);
+ ((offset_agbno / M_IGEO(mp)->blocks_per_cluster) *
+ M_IGEO(mp)->blocks_per_cluster);
offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
XFS_INO_TO_OFFSET(mp, ino);
imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno);
- imap->im_len = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster);
+ imap->im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster);
imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog);
/*
@@ -2409,20 +2397,6 @@ out_map:
}
/*
- * Compute and fill in value of m_in_maxlevels.
- */
-void
-xfs_ialloc_compute_maxlevels(
- xfs_mount_t *mp) /* file system mount structure */
-{
- uint inodes;
-
- inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG;
- mp->m_in_maxlevels = xfs_btree_compute_maxlevels(mp->m_inobt_mnr,
- inodes);
-}
-
-/*
* Log specified fields for the ag hdr (inode section). The growth of the agi
* structure over time requires that we interpret the buffer as two logical
* regions delineated by the end of the unlinked list. This is due to the size
@@ -2493,7 +2467,7 @@ static xfs_failaddr_t
xfs_agi_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_agi *agi = XFS_BUF_TO_AGI(bp);
int i;
@@ -2545,7 +2519,7 @@ static void
xfs_agi_read_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -2562,7 +2536,7 @@ static void
xfs_agi_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_buf_log_item *bip = bp->b_log_item;
xfs_failaddr_t fa;
@@ -2768,3 +2742,110 @@ xfs_ialloc_count_inodes(
*freecount = ci.freecount;
return 0;
}
+
+/*
+ * Initialize inode-related geometry information.
+ *
+ * Compute the inode btree min and max levels and set maxicount.
+ *
+ * Set the inode cluster size. This may still be overridden by the file
+ * system block size if it is larger than the chosen cluster size.
+ *
+ * For v5 filesystems, scale the cluster size with the inode size to keep a
+ * constant ratio of inode per cluster buffer, but only if mkfs has set the
+ * inode alignment value appropriately for larger cluster sizes.
+ *
+ * Then compute the inode cluster alignment information.
+ */
+void
+xfs_ialloc_setup_geometry(
+ struct xfs_mount *mp)
+{
+ struct xfs_sb *sbp = &mp->m_sb;
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
+ uint64_t icount;
+ uint inodes;
+
+ /* Compute inode btree geometry. */
+ igeo->agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
+ igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
+ igeo->inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0);
+ igeo->inobt_mnr[0] = igeo->inobt_mxr[0] / 2;
+ igeo->inobt_mnr[1] = igeo->inobt_mxr[1] / 2;
+
+ igeo->ialloc_inos = max_t(uint16_t, XFS_INODES_PER_CHUNK,
+ sbp->sb_inopblock);
+ igeo->ialloc_blks = igeo->ialloc_inos >> sbp->sb_inopblog;
+
+ if (sbp->sb_spino_align)
+ igeo->ialloc_min_blks = sbp->sb_spino_align;
+ else
+ igeo->ialloc_min_blks = igeo->ialloc_blks;
+
+ /* Compute and fill in value of m_ino_geo.inobt_maxlevels. */
+ inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG;
+ igeo->inobt_maxlevels = xfs_btree_compute_maxlevels(igeo->inobt_mnr,
+ inodes);
+
+ /* Set the maximum inode count for this filesystem. */
+ if (sbp->sb_imax_pct) {
+ /*
+ * Make sure the maximum inode count is a multiple
+ * of the units we allocate inodes in.
+ */
+ icount = sbp->sb_dblocks * sbp->sb_imax_pct;
+ do_div(icount, 100);
+ do_div(icount, igeo->ialloc_blks);
+ igeo->maxicount = XFS_FSB_TO_INO(mp,
+ icount * igeo->ialloc_blks);
+ } else {
+ igeo->maxicount = 0;
+ }
+
+ /*
+ * Compute the desired size of an inode cluster buffer size, which
+ * starts at 8K and (on v5 filesystems) scales up with larger inode
+ * sizes.
+ *
+ * Preserve the desired inode cluster size because the sparse inodes
+ * feature uses that desired size (not the actual size) to compute the
+ * sparse inode alignment. The mount code validates this value, so we
+ * cannot change the behavior.
+ */
+ igeo->inode_cluster_size_raw = XFS_INODE_BIG_CLUSTER_SIZE;
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ int new_size = igeo->inode_cluster_size_raw;
+
+ new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
+ if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
+ igeo->inode_cluster_size_raw = new_size;
+ }
+
+ /* Calculate inode cluster ratios. */
+ if (igeo->inode_cluster_size_raw > mp->m_sb.sb_blocksize)
+ igeo->blocks_per_cluster = XFS_B_TO_FSBT(mp,
+ igeo->inode_cluster_size_raw);
+ else
+ igeo->blocks_per_cluster = 1;
+ igeo->inode_cluster_size = XFS_FSB_TO_B(mp, igeo->blocks_per_cluster);
+ igeo->inodes_per_cluster = XFS_FSB_TO_INO(mp, igeo->blocks_per_cluster);
+
+ /* Calculate inode cluster alignment. */
+ if (xfs_sb_version_hasalign(&mp->m_sb) &&
+ mp->m_sb.sb_inoalignmt >= igeo->blocks_per_cluster)
+ igeo->cluster_align = mp->m_sb.sb_inoalignmt;
+ else
+ igeo->cluster_align = 1;
+ igeo->inoalign_mask = igeo->cluster_align - 1;
+ igeo->cluster_align_inodes = XFS_FSB_TO_INO(mp, igeo->cluster_align);
+
+ /*
+ * If we are using stripe alignment, check whether
+ * the stripe unit is a multiple of the inode alignment
+ */
+ if (mp->m_dalign && igeo->inoalign_mask &&
+ !(mp->m_dalign & igeo->inoalign_mask))
+ igeo->ialloc_align = mp->m_dalign;
+ else
+ igeo->ialloc_align = 0;
+}
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index e936b7cc9389..323592d563d5 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -23,16 +23,6 @@ struct xfs_icluster {
* sparse chunks */
};
-/* Calculate and return the number of filesystem blocks per inode cluster */
-static inline int
-xfs_icluster_size_fsb(
- struct xfs_mount *mp)
-{
- if (mp->m_sb.sb_blocksize >= mp->m_inode_cluster_size)
- return 1;
- return mp->m_inode_cluster_size >> mp->m_sb.sb_blocklog;
-}
-
/*
* Make an inode pointer out of the buffer/offset.
*/
@@ -96,13 +86,6 @@ xfs_imap(
uint flags); /* flags for inode btree lookup */
/*
- * Compute and fill in value of m_in_maxlevels.
- */
-void
-xfs_ialloc_compute_maxlevels(
- struct xfs_mount *mp); /* file system mount structure */
-
-/*
* Log specified fields for the ag hdr (inode section)
*/
void
@@ -168,5 +151,6 @@ int xfs_inobt_insert_rec(struct xfs_btree_cur *cur, uint16_t holemask,
int *stat);
int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
+void xfs_ialloc_setup_geometry(struct xfs_mount *mp);
#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index bc2dfacd2f4a..b82992f795aa 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -11,14 +11,12 @@
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_mount.h"
-#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_ialloc.h"
#include "xfs_ialloc_btree.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
#include "xfs_trans.h"
#include "xfs_rmap.h"
@@ -28,7 +26,7 @@ xfs_inobt_get_minrecs(
struct xfs_btree_cur *cur,
int level)
{
- return cur->bc_mp->m_inobt_mnr[level != 0];
+ return M_IGEO(cur->bc_mp)->inobt_mnr[level != 0];
}
STATIC struct xfs_btree_cur *
@@ -164,7 +162,7 @@ xfs_inobt_get_maxrecs(
struct xfs_btree_cur *cur,
int level)
{
- return cur->bc_mp->m_inobt_mxr[level != 0];
+ return M_IGEO(cur->bc_mp)->inobt_mxr[level != 0];
}
STATIC void
@@ -255,7 +253,7 @@ static xfs_failaddr_t
xfs_inobt_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
xfs_failaddr_t fa;
unsigned int level;
@@ -281,10 +279,11 @@ xfs_inobt_verify(
/* level verification */
level = be16_to_cpu(block->bb_level);
- if (level >= mp->m_in_maxlevels)
+ if (level >= M_IGEO(mp)->inobt_maxlevels)
return __this_address;
- return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]);
+ return xfs_btree_sblock_verify(bp,
+ M_IGEO(mp)->inobt_mxr[level != 0]);
}
static void
@@ -546,7 +545,7 @@ xfs_inobt_max_size(
xfs_agblock_t agblocks = xfs_ag_block_count(mp, agno);
/* Bail out if we're uninitialized, which can happen in mkfs. */
- if (mp->m_inobt_mxr[0] == 0)
+ if (M_IGEO(mp)->inobt_mxr[0] == 0)
return 0;
/*
@@ -558,11 +557,41 @@ xfs_inobt_max_size(
XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == agno)
agblocks -= mp->m_sb.sb_logblocks;
- return xfs_btree_calc_size(mp->m_inobt_mnr,
+ return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr,
(uint64_t)agblocks * mp->m_sb.sb_inopblock /
XFS_INODES_PER_CHUNK);
}
+/* Read AGI and create inobt cursor. */
+int
+xfs_inobt_cur(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_agnumber_t agno,
+ xfs_btnum_t which,
+ struct xfs_btree_cur **curpp,
+ struct xfs_buf **agi_bpp)
+{
+ struct xfs_btree_cur *cur;
+ int error;
+
+ ASSERT(*agi_bpp == NULL);
+ ASSERT(*curpp == NULL);
+
+ error = xfs_ialloc_read_agi(mp, tp, agno, agi_bpp);
+ if (error)
+ return error;
+
+ cur = xfs_inobt_init_cursor(mp, tp, *agi_bpp, agno, which);
+ if (!cur) {
+ xfs_trans_brelse(tp, *agi_bpp);
+ *agi_bpp = NULL;
+ return -ENOMEM;
+ }
+ *curpp = cur;
+ return 0;
+}
+
static int
xfs_inobt_count_blocks(
struct xfs_mount *mp,
@@ -571,15 +600,14 @@ xfs_inobt_count_blocks(
xfs_btnum_t btnum,
xfs_extlen_t *tree_blocks)
{
- struct xfs_buf *agbp;
- struct xfs_btree_cur *cur;
+ struct xfs_buf *agbp = NULL;
+ struct xfs_btree_cur *cur = NULL;
int error;
- error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+ error = xfs_inobt_cur(mp, tp, agno, btnum, &cur, &agbp);
if (error)
return error;
- cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum);
error = xfs_btree_count_blocks(cur, tree_blocks);
xfs_btree_del_cursor(cur, error);
xfs_trans_brelse(tp, agbp);
@@ -619,5 +647,5 @@ xfs_iallocbt_calc_size(
struct xfs_mount *mp,
unsigned long long len)
{
- return xfs_btree_calc_size(mp->m_inobt_mnr, len);
+ return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr, len);
}
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h
index ebdd0c6b8766..951305ecaae1 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.h
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.h
@@ -64,5 +64,8 @@ int xfs_finobt_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used);
extern xfs_extlen_t xfs_iallocbt_calc_size(struct xfs_mount *mp,
unsigned long long len);
+int xfs_inobt_cur(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_agnumber_t agno, xfs_btnum_t btnum,
+ struct xfs_btree_cur **curpp, struct xfs_buf **agi_bpp);
#endif /* __XFS_IALLOC_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
index bc690f2409fa..27aa3f2bc4bc 100644
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -3,18 +3,14 @@
* Copyright (c) 2017 Christoph Hellwig.
*/
-#include <linux/cache.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
#include "xfs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_inode.h"
-#include "xfs_inode_fork.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_bmap.h"
#include "xfs_trace.h"
/*
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index e021d5133ccb..28ab3c5255e1 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -10,11 +10,9 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
-#include "xfs_cksum.h"
#include "xfs_icache.h"
#include "xfs_trans.h"
#include "xfs_ialloc.h"
@@ -33,12 +31,9 @@ xfs_inobp_check(
xfs_buf_t *bp)
{
int i;
- int j;
xfs_dinode_t *dip;
- j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
-
- for (i = 0; i < j; i++) {
+ for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) {
dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize);
if (!dip->di_next_unlinked) {
xfs_alert(mp,
@@ -80,7 +75,7 @@ xfs_inode_buf_verify(
struct xfs_buf *bp,
bool readahead)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_agnumber_t agno;
int i;
int ni;
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index f9acf1d436f6..bf3e04018246 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -3,10 +3,10 @@
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include <linux/log2.h>
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
@@ -19,12 +19,10 @@
#include "xfs_bmap.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_attr_sf.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_dir2_priv.h"
#include "xfs_attr_leaf.h"
-#include "xfs_shared.h"
kmem_zone_t *xfs_ifork_zone;
diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c
index 1b542ec11d5d..7f55eb3f3653 100644
--- a/fs/xfs/libxfs/xfs_log_rlimit.c
+++ b/fs/xfs/libxfs/xfs_log_rlimit.c
@@ -12,9 +12,7 @@
#include "xfs_mount.h"
#include "xfs_da_format.h"
#include "xfs_trans_space.h"
-#include "xfs_inode.h"
#include "xfs_da_btree.h"
-#include "xfs_attr_leaf.h"
#include "xfs_bmap_btree.h"
/*
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 542aa1475b5f..51bb9bdb0e84 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -9,7 +9,6 @@
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_btree.h"
@@ -19,7 +18,6 @@
#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
#include "xfs_trans.h"
#include "xfs_bit.h"
#include "xfs_refcount.h"
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 5d9de9b21726..38529dbacd55 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -12,12 +12,10 @@
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_btree.h"
-#include "xfs_bmap.h"
#include "xfs_refcount_btree.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
#include "xfs_trans.h"
#include "xfs_bit.h"
#include "xfs_rmap.h"
@@ -203,7 +201,7 @@ STATIC xfs_failaddr_t
xfs_refcountbt_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_perag *pag = bp->b_pag;
xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 8ed885507dd8..e6aeb390b2fb 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -10,24 +10,17 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
-#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_btree.h"
#include "xfs_trans.h"
#include "xfs_alloc.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
-#include "xfs_trans_space.h"
#include "xfs_trace.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
-#include "xfs_extent_busy.h"
-#include "xfs_bmap.h"
#include "xfs_inode.h"
-#include "xfs_ialloc.h"
/*
* Lookup the first record less than or equal to [bno, len, owner, offset]
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 5d1f8884c888..fc78efa52c94 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -9,18 +9,14 @@
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_alloc.h"
#include "xfs_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
#include "xfs_error.h"
#include "xfs_extent_busy.h"
#include "xfs_ag_resv.h"
@@ -292,7 +288,7 @@ static xfs_failaddr_t
xfs_rmapbt_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_perag *pag = bp->b_pag;
xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index eaaff67e9626..8ea1efc97b41 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -13,15 +13,7 @@
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
#include "xfs_trans.h"
-#include "xfs_trans_space.h"
-#include "xfs_trace.h"
-#include "xfs_buf.h"
-#include "xfs_icache.h"
#include "xfs_rtalloc.h"
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index e76a3e5d28d7..a08dd8f40346 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -10,26 +10,19 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
-#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_inode.h"
#include "xfs_ialloc.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
#include "xfs_log.h"
#include "xfs_rmap_btree.h"
-#include "xfs_bmap.h"
#include "xfs_refcount_btree.h"
#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_health.h"
/*
@@ -686,7 +679,7 @@ xfs_sb_read_verify(
struct xfs_buf *bp)
{
struct xfs_sb sb;
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp);
int error;
@@ -752,7 +745,7 @@ xfs_sb_write_verify(
struct xfs_buf *bp)
{
struct xfs_sb sb;
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_buf_log_item *bip = bp->b_log_item;
int error;
@@ -800,12 +793,14 @@ const struct xfs_buf_ops xfs_sb_quiet_buf_ops = {
*
* Mount initialization code establishing various mount
* fields from the superblock associated with the given
- * mount structure
+ * mount structure.
+ *
+ * Inode geometry are calculated in xfs_ialloc_setup_geometry.
*/
void
xfs_sb_mount_common(
- struct xfs_mount *mp,
- struct xfs_sb *sbp)
+ struct xfs_mount *mp,
+ struct xfs_sb *sbp)
{
mp->m_agfrotor = mp->m_agirotor = 0;
mp->m_maxagi = mp->m_sb.sb_agcount;
@@ -813,7 +808,6 @@ xfs_sb_mount_common(
mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
- mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
mp->m_blockmask = sbp->sb_blocksize - 1;
mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
mp->m_blockwmask = mp->m_blockwsize - 1;
@@ -823,11 +817,6 @@ xfs_sb_mount_common(
mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2;
mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2;
- mp->m_inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
- mp->m_inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0);
- mp->m_inobt_mnr[0] = mp->m_inobt_mxr[0] / 2;
- mp->m_inobt_mnr[1] = mp->m_inobt_mxr[1] / 2;
-
mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1);
mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0);
mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2;
@@ -844,14 +833,6 @@ xfs_sb_mount_common(
mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2;
mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
- mp->m_ialloc_inos = max_t(uint16_t, XFS_INODES_PER_CHUNK,
- sbp->sb_inopblock);
- mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
-
- if (sbp->sb_spino_align)
- mp->m_ialloc_min_blks = sbp->sb_spino_align;
- else
- mp->m_ialloc_min_blks = mp->m_ialloc_blks;
mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
mp->m_ag_max_usable = xfs_alloc_ag_max_usable(mp);
}
@@ -939,7 +920,7 @@ xfs_log_sb(
struct xfs_trans *tp)
{
struct xfs_mount *mp = tp->t_mountp;
- struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0);
+ struct xfs_buf *bp = xfs_trans_getsb(tp, mp);
mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
@@ -1005,7 +986,7 @@ xfs_update_secondary_sbs(
bp = xfs_buf_get(mp->m_ddev_targp,
XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
- XFS_FSS_TO_BB(mp, 1), 0);
+ XFS_FSS_TO_BB(mp, 1));
/*
* If we get an error reading or writing alternate superblocks,
* continue. xfs_repair chooses the "best" superblock based
@@ -1069,7 +1050,7 @@ xfs_sync_sb_buf(
if (error)
return error;
- bp = xfs_trans_getsb(tp, mp, 0);
+ bp = xfs_trans_getsb(tp, mp);
xfs_log_sb(tp);
xfs_trans_bhold(tp, bp);
xfs_trans_set_sync(tp);
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 4e909791aeac..e0641b7337b3 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -65,7 +65,6 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp,
#define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */
#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */
#define XFS_TRANS_NO_WRITECOUNT 0x40 /* do not elevate SB writecount */
-#define XFS_TRANS_NOFS 0x80 /* pass KM_NOFS to kmem_alloc */
/*
* LOWMODE is used by the allocator to activate the lowspace algorithm - when
* free space is running low the extent allocator may choose to allocate an
@@ -136,4 +135,52 @@ void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
struct xfs_inode *ip, struct xfs_ifork *ifp);
xfs_failaddr_t xfs_symlink_shortform_verify(struct xfs_inode *ip);
+/* Computed inode geometry for the filesystem. */
+struct xfs_ino_geometry {
+ /* Maximum inode count in this filesystem. */
+ uint64_t maxicount;
+
+ /* Actual inode cluster buffer size, in bytes. */
+ unsigned int inode_cluster_size;
+
+ /*
+ * Desired inode cluster buffer size, in bytes. This value is not
+ * rounded up to at least one filesystem block, which is necessary for
+ * the sole purpose of validating sb_spino_align. Runtime code must
+ * only ever use inode_cluster_size.
+ */
+ unsigned int inode_cluster_size_raw;
+
+ /* Inode cluster sizes, adjusted to be at least 1 fsb. */
+ unsigned int inodes_per_cluster;
+ unsigned int blocks_per_cluster;
+
+ /* Inode cluster alignment. */
+ unsigned int cluster_align;
+ unsigned int cluster_align_inodes;
+ unsigned int inoalign_mask; /* mask sb_inoalignmt if used */
+
+ unsigned int inobt_mxr[2]; /* max inobt btree records */
+ unsigned int inobt_mnr[2]; /* min inobt btree records */
+ unsigned int inobt_maxlevels; /* max inobt btree levels. */
+
+ /* Size of inode allocations under normal operation. */
+ unsigned int ialloc_inos;
+ unsigned int ialloc_blks;
+
+ /* Minimum inode blocks for a sparse allocation. */
+ unsigned int ialloc_min_blks;
+
+ /* stripe unit inode alignment */
+ unsigned int ialloc_align;
+
+ unsigned int agino_log; /* #bits for agino in inum */
+};
+
+/* Keep iterating the data structure. */
+#define XFS_ITER_CONTINUE (0)
+
+/* Stop iterating the data structure. */
+#define XFS_ITER_ABORT (1)
+
#endif /* __XFS_SHARED_H__ */
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index a0ccc253c43d..3b8260ca7d1b 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -11,12 +11,8 @@
#include "xfs_shared.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
#include "xfs_error.h"
-#include "xfs_trace.h"
-#include "xfs_symlink.h"
-#include "xfs_cksum.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_log.h"
@@ -90,7 +86,7 @@ static xfs_failaddr_t
xfs_symlink_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_dsymlink_hdr *dsl = bp->b_addr;
if (!xfs_sb_version_hascrc(&mp->m_sb))
@@ -116,7 +112,7 @@ static void
xfs_symlink_read_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
/* no verification of non-crc buffers */
@@ -136,7 +132,7 @@ static void
xfs_symlink_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_buf_log_item *bip = bp->b_log_item;
xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 83f4ee2afc49..d12bbd526e7c 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -15,12 +15,10 @@
#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_bmap_btree.h"
-#include "xfs_ialloc.h"
#include "xfs_quota.h"
#include "xfs_trans.h"
#include "xfs_qm.h"
#include "xfs_trans_space.h"
-#include "xfs_trace.h"
#define _ALLOC true
#define _FREE false
@@ -136,9 +134,10 @@ STATIC uint
xfs_calc_inobt_res(
struct xfs_mount *mp)
{
- return xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
- XFS_FSB_TO_B(mp, 1));
+ return xfs_calc_buf_res(M_IGEO(mp)->inobt_maxlevels,
+ XFS_FSB_TO_B(mp, 1)) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
+ XFS_FSB_TO_B(mp, 1));
}
/*
@@ -167,7 +166,7 @@ xfs_calc_finobt_res(
* includes:
*
* the allocation btrees: 2 trees * (max depth - 1) * block size
- * the inode chunk: m_ialloc_blks * N
+ * the inode chunk: m_ino_geo.ialloc_blks * N
*
* The size N of the inode chunk reservation depends on whether it is for
* allocation or free and which type of create transaction is in use. An inode
@@ -193,7 +192,7 @@ xfs_calc_inode_chunk_res(
size = XFS_FSB_TO_B(mp, 1);
}
- res += xfs_calc_buf_res(mp->m_ialloc_blks, size);
+ res += xfs_calc_buf_res(M_IGEO(mp)->ialloc_blks, size);
return res;
}
@@ -307,7 +306,7 @@ xfs_calc_iunlink_remove_reservation(
struct xfs_mount *mp)
{
return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
- 2 * max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
+ 2 * M_IGEO(mp)->inode_cluster_size;
}
/*
@@ -345,7 +344,7 @@ STATIC uint
xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
{
return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
- max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
+ M_IGEO(mp)->inode_cluster_size;
}
/*
diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h
index a62fb950bef1..88221c7a04cc 100644
--- a/fs/xfs/libxfs/xfs_trans_space.h
+++ b/fs/xfs/libxfs/xfs_trans_space.h
@@ -56,9 +56,9 @@
#define XFS_DIRREMOVE_SPACE_RES(mp) \
XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK)
#define XFS_IALLOC_SPACE_RES(mp) \
- ((mp)->m_ialloc_blks + \
+ (M_IGEO(mp)->ialloc_blks + \
(xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \
- ((mp)->m_in_maxlevels - 1)))
+ (M_IGEO(mp)->inobt_maxlevels - 1)))
/*
* Space reservation values for various transactions.
@@ -94,7 +94,8 @@
#define XFS_SYMLINK_SPACE_RES(mp,nl,b) \
(XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b))
#define XFS_IFREE_SPACE_RES(mp) \
- (xfs_sb_version_hasfinobt(&mp->m_sb) ? (mp)->m_in_maxlevels : 0)
+ (xfs_sb_version_hasfinobt(&mp->m_sb) ? \
+ M_IGEO(mp)->inobt_maxlevels : 0)
#endif /* __XFS_TRANS_SPACE_H__ */
diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c
index d51acc95bc00..4f595546a639 100644
--- a/fs/xfs/libxfs/xfs_types.c
+++ b/fs/xfs/libxfs/xfs_types.c
@@ -7,19 +7,10 @@
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
#include "xfs_shared.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
-#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_rmap.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_ialloc.h"
/* Find the size of the AG, in blocks. */
xfs_agblock_t
@@ -87,14 +78,14 @@ xfs_agino_range(
* Calculate the first inode, which will be in the first
* cluster-aligned block after the AGFL.
*/
- bno = round_up(XFS_AGFL_BLOCK(mp) + 1, mp->m_cluster_align);
+ bno = round_up(XFS_AGFL_BLOCK(mp) + 1, M_IGEO(mp)->cluster_align);
*first = XFS_AGB_TO_AGINO(mp, bno);
/*
* Calculate the last inode, which will be at the end of the
* last (aligned) cluster that can be allocated in the AG.
*/
- bno = round_down(eoag, mp->m_cluster_align);
+ bno = round_down(eoag, M_IGEO(mp)->cluster_align);
*last = XFS_AGB_TO_AGINO(mp, bno) - 1;
}
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index adaeabdefdd3..16b09b941441 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -9,20 +9,13 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
#include "xfs_sb.h"
-#include "xfs_inode.h"
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_rmap.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
-#include "scrub/trace.h"
/* Superblock */
@@ -646,7 +639,7 @@ xchk_agfl_block(
xchk_agfl_block_xref(sc, agbno);
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
- return XFS_BTREE_QUERY_RANGE_ABORT;
+ return XFS_ITER_ABORT;
return 0;
}
@@ -737,7 +730,7 @@ xchk_agfl(
/* Check the blocks in the AGFL. */
error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(sc->sa.agf_bp),
sc->sa.agfl_bp, xchk_agfl_block, &sai);
- if (error == XFS_BTREE_QUERY_RANGE_ABORT) {
+ if (error == XFS_ITER_ABORT) {
error = 0;
goto out_free;
}
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index 64e31f87d490..7a1a38b636a9 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -9,22 +9,17 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
-#include "xfs_inode.h"
#include "xfs_alloc.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc.h"
#include "xfs_ialloc_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
-#include "xfs_refcount.h"
#include "xfs_refcount_btree.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
index 44883e9112ad..a43d1813c4ff 100644
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -9,19 +9,12 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_alloc.h"
#include "xfs_rmap.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
-#include "scrub/trace.h"
/*
* Set us up to scrub free space btrees.
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
index dce74ec57038..1afc58bf71dd 100644
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@@ -9,26 +9,62 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
-#include "xfs_dir2.h"
#include "xfs_attr.h"
#include "xfs_attr_leaf.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/dabtree.h"
-#include "scrub/trace.h"
+#include "scrub/attr.h"
-#include <linux/posix_acl_xattr.h>
-#include <linux/xattr.h>
+/*
+ * Allocate enough memory to hold an attr value and attr block bitmaps,
+ * reallocating the buffer if necessary. Buffer contents are not preserved
+ * across a reallocation.
+ */
+int
+xchk_setup_xattr_buf(
+ struct xfs_scrub *sc,
+ size_t value_size,
+ xfs_km_flags_t flags)
+{
+ size_t sz;
+ struct xchk_xattr_buf *ab = sc->buf;
+
+ /*
+ * We need enough space to read an xattr value from the file or enough
+ * space to hold three copies of the xattr free space bitmap. We don't
+ * need the buffer space for both purposes at the same time.
+ */
+ sz = 3 * sizeof(long) * BITS_TO_LONGS(sc->mp->m_attr_geo->blksize);
+ sz = max_t(size_t, sz, value_size);
+
+ /*
+ * If there's already a buffer, figure out if we need to reallocate it
+ * to accommodate a larger size.
+ */
+ if (ab) {
+ if (sz <= ab->sz)
+ return 0;
+ kmem_free(ab);
+ sc->buf = NULL;
+ }
+
+ /*
+ * Don't zero the buffer upon allocation to avoid runtime overhead.
+ * All users must be careful never to read uninitialized contents.
+ */
+ ab = kmem_alloc_large(sizeof(*ab) + sz, flags);
+ if (!ab)
+ return -ENOMEM;
+
+ ab->sz = sz;
+ sc->buf = ab;
+ return 0;
+}
/* Set us up to scrub an inode's extended attributes. */
int
@@ -36,19 +72,18 @@ xchk_setup_xattr(
struct xfs_scrub *sc,
struct xfs_inode *ip)
{
- size_t sz;
+ int error;
/*
- * Allocate the buffer without the inode lock held. We need enough
- * space to read every xattr value in the file or enough space to
- * hold three copies of the xattr free space bitmap. (Not both at
- * the same time.)
+ * We failed to get memory while checking attrs, so this time try to
+ * get all the memory we're ever going to need. Allocate the buffer
+ * without the inode lock held, which means we can sleep.
*/
- sz = max_t(size_t, XATTR_SIZE_MAX, 3 * sizeof(long) *
- BITS_TO_LONGS(sc->mp->m_attr_geo->blksize));
- sc->buf = kmem_zalloc_large(sz, KM_SLEEP);
- if (!sc->buf)
- return -ENOMEM;
+ if (sc->flags & XCHK_TRY_HARDER) {
+ error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, KM_SLEEP);
+ if (error)
+ return error;
+ }
return xchk_setup_inode_contents(sc, ip, 0);
}
@@ -83,7 +118,7 @@ xchk_xattr_listent(
sx = container_of(context, struct xchk_xattr, context);
if (xchk_should_terminate(sx->sc, &error)) {
- context->seen_enough = 1;
+ context->seen_enough = error;
return;
}
@@ -99,6 +134,19 @@ xchk_xattr_listent(
return;
}
+ /*
+ * Try to allocate enough memory to extrat the attr value. If that
+ * doesn't work, we overload the seen_enough variable to convey
+ * the error message back to the main scrub function.
+ */
+ error = xchk_setup_xattr_buf(sx->sc, valuelen, KM_MAYFAIL);
+ if (error == -ENOMEM)
+ error = -EDEADLOCK;
+ if (error) {
+ context->seen_enough = error;
+ return;
+ }
+
args.flags = ATTR_KERNOTIME;
if (flags & XFS_ATTR_ROOT)
args.flags |= ATTR_ROOT;
@@ -111,8 +159,8 @@ xchk_xattr_listent(
args.namelen = namelen;
args.hashval = xfs_da_hashname(args.name, args.namelen);
args.trans = context->tp;
- args.value = sx->sc->buf;
- args.valuelen = XATTR_SIZE_MAX;
+ args.value = xchk_xattr_valuebuf(sx->sc);
+ args.valuelen = valuelen;
error = xfs_attr_get_ilocked(context->dp, &args);
if (error == -EEXIST)
@@ -125,7 +173,7 @@ xchk_xattr_listent(
args.blkno);
fail_xref:
if (sx->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
- context->seen_enough = 1;
+ context->seen_enough = XFS_ITER_ABORT;
return;
}
@@ -170,13 +218,12 @@ xchk_xattr_check_freemap(
unsigned long *map,
struct xfs_attr3_icleaf_hdr *leafhdr)
{
- unsigned long *freemap;
- unsigned long *dstmap;
+ unsigned long *freemap = xchk_xattr_freemap(sc);
+ unsigned long *dstmap = xchk_xattr_dstmap(sc);
unsigned int mapsize = sc->mp->m_attr_geo->blksize;
int i;
/* Construct bitmap of freemap contents. */
- freemap = (unsigned long *)sc->buf + BITS_TO_LONGS(mapsize);
bitmap_zero(freemap, mapsize);
for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
if (!xchk_xattr_set_map(sc, freemap,
@@ -186,7 +233,6 @@ xchk_xattr_check_freemap(
}
/* Look for bits that are set in freemap and are marked in use. */
- dstmap = freemap + BITS_TO_LONGS(mapsize);
return bitmap_and(dstmap, freemap, map, mapsize) == 0;
}
@@ -201,13 +247,13 @@ xchk_xattr_entry(
char *buf_end,
struct xfs_attr_leafblock *leaf,
struct xfs_attr3_icleaf_hdr *leafhdr,
- unsigned long *usedmap,
struct xfs_attr_leaf_entry *ent,
int idx,
unsigned int *usedbytes,
__u32 *last_hashval)
{
struct xfs_mount *mp = ds->state->mp;
+ unsigned long *usedmap = xchk_xattr_usedmap(ds->sc);
char *name_end;
struct xfs_attr_leaf_name_local *lentry;
struct xfs_attr_leaf_name_remote *rentry;
@@ -267,16 +313,26 @@ xchk_xattr_block(
struct xfs_attr_leafblock *leaf = bp->b_addr;
struct xfs_attr_leaf_entry *ent;
struct xfs_attr_leaf_entry *entries;
- unsigned long *usedmap = ds->sc->buf;
+ unsigned long *usedmap;
char *buf_end;
size_t off;
__u32 last_hashval = 0;
unsigned int usedbytes = 0;
unsigned int hdrsize;
int i;
+ int error;
if (*last_checked == blk->blkno)
return 0;
+
+ /* Allocate memory for block usage checking. */
+ error = xchk_setup_xattr_buf(ds->sc, 0, KM_MAYFAIL);
+ if (error == -ENOMEM)
+ return -EDEADLOCK;
+ if (error)
+ return error;
+ usedmap = xchk_xattr_usedmap(ds->sc);
+
*last_checked = blk->blkno;
bitmap_zero(usedmap, mp->m_attr_geo->blksize);
@@ -324,7 +380,7 @@ xchk_xattr_block(
/* Check the entry and nameval. */
xchk_xattr_entry(ds, level, buf_end, leaf, &leafhdr,
- usedmap, ent, i, &usedbytes, &last_hashval);
+ ent, i, &usedbytes, &last_hashval);
if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
goto out;
@@ -464,6 +520,10 @@ xchk_xattr(
error = xfs_attr_list_int_ilocked(&sx.context);
if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error))
goto out;
+
+ /* Did our listent function try to return any errors? */
+ if (sx.context.seen_enough < 0)
+ error = sx.context.seen_enough;
out:
return error;
}
diff --git a/fs/xfs/scrub/attr.h b/fs/xfs/scrub/attr.h
new file mode 100644
index 000000000000..13a1d2e8424d
--- /dev/null
+++ b/fs/xfs/scrub/attr.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2019 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_SCRUB_ATTR_H__
+#define __XFS_SCRUB_ATTR_H__
+
+/*
+ * Temporary storage for online scrub and repair of extended attributes.
+ */
+struct xchk_xattr_buf {
+ /* Size of @buf, in bytes. */
+ size_t sz;
+
+ /*
+ * Memory buffer -- either used for extracting attr values while
+ * walking the attributes; or for computing attr block bitmaps when
+ * checking the attribute tree.
+ *
+ * Each bitmap contains enough bits to track every byte in an attr
+ * block (rounded up to the size of an unsigned long). The attr block
+ * used space bitmap starts at the beginning of the buffer; the free
+ * space bitmap follows immediately after; and we have a third buffer
+ * for storing intermediate bitmap results.
+ */
+ uint8_t buf[0];
+};
+
+/* A place to store attribute values. */
+static inline uint8_t *
+xchk_xattr_valuebuf(
+ struct xfs_scrub *sc)
+{
+ struct xchk_xattr_buf *ab = sc->buf;
+
+ return ab->buf;
+}
+
+/* A bitmap of space usage computed by walking an attr leaf block. */
+static inline unsigned long *
+xchk_xattr_usedmap(
+ struct xfs_scrub *sc)
+{
+ struct xchk_xattr_buf *ab = sc->buf;
+
+ return (unsigned long *)ab->buf;
+}
+
+/* A bitmap of free space computed by walking attr leaf block free info. */
+static inline unsigned long *
+xchk_xattr_freemap(
+ struct xfs_scrub *sc)
+{
+ return xchk_xattr_usedmap(sc) +
+ BITS_TO_LONGS(sc->mp->m_attr_geo->blksize);
+}
+
+/* A bitmap used to hold temporary results. */
+static inline unsigned long *
+xchk_xattr_dstmap(
+ struct xfs_scrub *sc)
+{
+ return xchk_xattr_freemap(sc) +
+ BITS_TO_LONGS(sc->mp->m_attr_geo->blksize);
+}
+
+int xchk_setup_xattr_buf(struct xfs_scrub *sc, size_t value_size,
+ xfs_km_flags_t flags);
+
+#endif /* __XFS_SCRUB_ATTR_H__ */
diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c
index fdadc9e1dc49..3d47d111be5a 100644
--- a/fs/xfs/scrub/bitmap.c
+++ b/fs/xfs/scrub/bitmap.c
@@ -10,11 +10,6 @@
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_btree.h"
-#include "scrub/xfs_scrub.h"
-#include "scrub/scrub.h"
-#include "scrub/common.h"
-#include "scrub/trace.h"
-#include "scrub/repair.h"
#include "scrub/bitmap.h"
/*
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index a703cd58a90e..1bd29fdc2ab5 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -9,27 +9,19 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_inode_fork.h"
#include "xfs_alloc.h"
-#include "xfs_rtalloc.h"
#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
#include "xfs_bmap_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
-#include "xfs_refcount.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
-#include "scrub/trace.h"
/* Set us up with an inode's bmap. */
int
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index 117910db51b8..f52a7b8256f9 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -9,14 +9,7 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_inode.h"
-#include "xfs_alloc.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 973aa59975e3..18876056e5e0 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -9,22 +9,16 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_inode.h"
#include "xfs_icache.h"
-#include "xfs_itable.h"
#include "xfs_alloc.h"
#include "xfs_alloc_btree.h"
-#include "xfs_bmap.h"
-#include "xfs_bmap_btree.h"
#include "xfs_ialloc.h"
#include "xfs_ialloc_btree.h"
-#include "xfs_refcount.h"
#include "xfs_refcount_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
@@ -32,11 +26,9 @@
#include "xfs_trans_priv.h"
#include "xfs_attr.h"
#include "xfs_reflink.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
-#include "scrub/btree.h"
#include "scrub/repair.h"
#include "scrub/health.h"
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
index 90527b094878..94c4f1de1922 100644
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -9,20 +9,12 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_inode_fork.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_attr_leaf.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index a38a22785a1a..1e2e11721eb9 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -9,24 +9,14 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
#include "xfs_icache.h"
-#include "xfs_itable.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
-#include "xfs_ialloc.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
-#include "scrub/trace.h"
#include "scrub/dabtree.h"
/* Set us up to scrub directories. */
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index 07c11e3e6437..fc3f510c9034 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -9,22 +9,10 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
#include "xfs_sb.h"
-#include "xfs_inode.h"
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
-#include "xfs_rmap.h"
-#include "xfs_error.h"
-#include "xfs_errortag.h"
-#include "xfs_icache.h"
#include "xfs_health.h"
-#include "xfs_bmap.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c
index 23cf8e2f25db..b2f602811e9d 100644
--- a/fs/xfs/scrub/health.c
+++ b/fs/xfs/scrub/health.c
@@ -7,18 +7,10 @@
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
#include "xfs_sb.h"
-#include "xfs_inode.h"
#include "xfs_health.h"
#include "scrub/scrub.h"
-#include "scrub/health.h"
/*
* Scrub and In-Core Filesystem Health Assessments
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index 9b47117180cb..681758704fda 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -9,21 +9,14 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_ialloc_btree.h"
#include "xfs_icache.h"
#include "xfs_rmap.h"
-#include "xfs_log.h"
-#include "xfs_trans_priv.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
@@ -230,7 +223,7 @@ xchk_iallocbt_check_cluster(
int error = 0;
nr_inodes = min_t(unsigned int, XFS_INODES_PER_CHUNK,
- mp->m_inodes_per_cluster);
+ M_IGEO(mp)->inodes_per_cluster);
/* Map this inode cluster */
agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino + cluster_base);
@@ -251,7 +244,7 @@ xchk_iallocbt_check_cluster(
*/
ir_holemask = (irec->ir_holemask & cluster_mask);
imap.im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno);
- imap.im_len = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster);
+ imap.im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster);
imap.im_boffset = XFS_INO_TO_OFFSET(mp, irec->ir_startino) <<
mp->m_sb.sb_inodelog;
@@ -276,12 +269,12 @@ xchk_iallocbt_check_cluster(
/* If any part of this is a hole, skip it. */
if (ir_holemask) {
xchk_xref_is_not_owned_by(bs->sc, agbno,
- mp->m_blocks_per_cluster,
+ M_IGEO(mp)->blocks_per_cluster,
&XFS_RMAP_OINFO_INODES);
return 0;
}
- xchk_xref_is_owned_by(bs->sc, agbno, mp->m_blocks_per_cluster,
+ xchk_xref_is_owned_by(bs->sc, agbno, M_IGEO(mp)->blocks_per_cluster,
&XFS_RMAP_OINFO_INODES);
/* Grab the inode cluster buffer. */
@@ -333,7 +326,7 @@ xchk_iallocbt_check_clusters(
*/
for (cluster_base = 0;
cluster_base < XFS_INODES_PER_CHUNK;
- cluster_base += bs->sc->mp->m_inodes_per_cluster) {
+ cluster_base += M_IGEO(bs->sc->mp)->inodes_per_cluster) {
error = xchk_iallocbt_check_cluster(bs, irec, cluster_base);
if (error)
break;
@@ -355,6 +348,7 @@ xchk_iallocbt_rec_alignment(
{
struct xfs_mount *mp = bs->sc->mp;
struct xchk_iallocbt *iabt = bs->private;
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
/*
* finobt records have different positioning requirements than inobt
@@ -372,7 +366,7 @@ xchk_iallocbt_rec_alignment(
unsigned int imask;
imask = min_t(unsigned int, XFS_INODES_PER_CHUNK,
- mp->m_cluster_align_inodes) - 1;
+ igeo->cluster_align_inodes) - 1;
if (irec->ir_startino & imask)
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
return;
@@ -400,17 +394,17 @@ xchk_iallocbt_rec_alignment(
}
/* inobt records must be aligned to cluster and inoalignmnt size. */
- if (irec->ir_startino & (mp->m_cluster_align_inodes - 1)) {
+ if (irec->ir_startino & (igeo->cluster_align_inodes - 1)) {
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
return;
}
- if (irec->ir_startino & (mp->m_inodes_per_cluster - 1)) {
+ if (irec->ir_startino & (igeo->inodes_per_cluster - 1)) {
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
return;
}
- if (mp->m_inodes_per_cluster <= XFS_INODES_PER_CHUNK)
+ if (igeo->inodes_per_cluster <= XFS_INODES_PER_CHUNK)
return;
/*
@@ -419,7 +413,7 @@ xchk_iallocbt_rec_alignment(
* after this one.
*/
iabt->next_startino = irec->ir_startino + XFS_INODES_PER_CHUNK;
- iabt->next_cluster_ino = irec->ir_startino + mp->m_inodes_per_cluster;
+ iabt->next_cluster_ino = irec->ir_startino + igeo->inodes_per_cluster;
}
/* Scrub an inobt/finobt record. */
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index e213efc194a1..6d483ab29e63 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -9,27 +9,17 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_icache.h"
-#include "xfs_inode_buf.h"
-#include "xfs_inode_fork.h"
#include "xfs_ialloc.h"
#include "xfs_da_format.h"
#include "xfs_reflink.h"
#include "xfs_rmap.h"
-#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
-#include "scrub/trace.h"
/*
* Grab total control of the inode metadata. It doesn't matter here if
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index d5d197f1b80f..c962bd534690 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -9,21 +9,13 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
#include "xfs_icache.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
-#include "xfs_ialloc.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
-#include "scrub/trace.h"
/* Set us up to scrub parents. */
int
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index 5dfe2b5924db..0a33b4421c32 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -9,24 +9,13 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_inode_fork.h"
-#include "xfs_alloc.h"
-#include "xfs_bmap.h"
#include "xfs_quota.h"
#include "xfs_qm.h"
-#include "xfs_dquot.h"
-#include "xfs_dquot_item.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
-#include "scrub/trace.h"
/* Convert a scrub type code to a DQ flag, or return 0 if error. */
static inline uint
@@ -144,7 +133,7 @@ xchk_quota_item(
if (bsoft > bhard)
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
- if (ihard > mp->m_maxicount)
+ if (ihard > M_IGEO(mp)->maxicount)
xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset);
if (isoft > ihard)
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index 708b4158eb90..93b3793bc5b3 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -7,22 +7,12 @@
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_alloc.h"
#include "xfs_rmap.h"
#include "xfs_refcount.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
-#include "scrub/trace.h"
/*
* Set us up to scrub reference count btrees.
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index eb358f0f5e0a..4cfeec57fb05 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -9,29 +9,21 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_icache.h"
#include "xfs_alloc.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc.h"
#include "xfs_ialloc_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
-#include "xfs_refcount.h"
#include "xfs_refcount_btree.h"
#include "xfs_extent_busy.h"
#include "xfs_ag_resv.h"
-#include "xfs_trans_space.h"
#include "xfs_quota.h"
-#include "xfs_attr.h"
-#include "xfs_reflink.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -357,7 +349,7 @@ xrep_init_btblock(
bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, fsb),
XFS_FSB_TO_BB(mp, 1), 0);
xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
- xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno, 0);
+ xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno);
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF);
xfs_trans_log_buf(tp, bp, 0, bp->b_length);
bp->b_ops = ops;
@@ -672,7 +664,7 @@ xrep_findroot_agfl_walk(
{
xfs_agblock_t *agbno = priv;
- return (*agbno == bno) ? XFS_BTREE_QUERY_RANGE_ABORT : 0;
+ return (*agbno == bno) ? XFS_ITER_ABORT : 0;
}
/* Does this block match the btree information passed in? */
@@ -702,7 +694,7 @@ xrep_findroot_block(
if (owner == XFS_RMAP_OWN_AG) {
error = xfs_agfl_walk(mp, ri->agf, ri->agfl_bp,
xrep_findroot_agfl_walk, &agbno);
- if (error == XFS_BTREE_QUERY_RANGE_ABORT)
+ if (error == XFS_ITER_ABORT)
return 0;
if (error)
return error;
diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c
index 92a140c5b55e..8d4cefd761c1 100644
--- a/fs/xfs/scrub/rmap.c
+++ b/fs/xfs/scrub/rmap.c
@@ -9,21 +9,12 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_alloc.h"
-#include "xfs_ialloc.h"
#include "xfs_rmap.h"
#include "xfs_refcount.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
-#include "scrub/trace.h"
/*
* Set us up to scrub reverse mapping btrees.
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index dbe115b075f7..c642bc206c41 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -9,19 +9,12 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_alloc.h"
#include "xfs_rtalloc.h"
#include "xfs_inode.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
-#include "scrub/trace.h"
/* Set us up with the realtime metadata locked. */
int
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index f630389ee176..15c8c5f3f688 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -9,36 +9,16 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_icache.h"
-#include "xfs_itable.h"
-#include "xfs_alloc.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_bmap.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_refcount.h"
-#include "xfs_refcount_btree.h"
-#include "xfs_rmap.h"
-#include "xfs_rmap_btree.h"
#include "xfs_quota.h"
#include "xfs_qm.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
-#include "xfs_log.h"
-#include "xfs_trans_priv.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
-#include "scrub/btree.h"
#include "scrub/repair.h"
#include "scrub/health.h"
diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c
index f7ebaa946999..99c0b1234c3c 100644
--- a/fs/xfs/scrub/symlink.c
+++ b/fs/xfs/scrub/symlink.c
@@ -9,19 +9,11 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_inode_fork.h"
#include "xfs_symlink.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
-#include "scrub/trace.h"
/* Set us up to scrub a symbolic link. */
int
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index 96feaf8dcdec..9eaab2eb5ed3 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -10,15 +10,9 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_da_format.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
-#include "xfs_trans.h"
-#include "xfs_bit.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
-#include "scrub/common.h"
/* Figure out which block the btree cursor was pointing to. */
static inline xfs_fsblock_t
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 8039e35147dd..cbda40d40326 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -4,16 +4,14 @@
* All Rights Reserved.
*/
#include "xfs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_trace.h"
-#include <linux/slab.h>
-#include <linux/xattr.h>
#include <linux/posix_acl_xattr.h>
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 11f703d4a605..f16d5f196c6b 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -12,16 +12,11 @@
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
#include "xfs_iomap.h"
#include "xfs_trace.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
-#include "xfs_bmap_btree.h"
#include "xfs_reflink.h"
-#include <linux/writeback.h>
/*
* structure owned by writepages passed to individual writepage calls
@@ -138,8 +133,7 @@ xfs_setfilesize_trans_alloc(
struct xfs_trans *tp;
int error;
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0,
- XFS_TRANS_NOFS, &tp);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
if (error)
return error;
@@ -240,9 +234,17 @@ xfs_end_ioend(
struct xfs_inode *ip = XFS_I(ioend->io_inode);
xfs_off_t offset = ioend->io_offset;
size_t size = ioend->io_size;
+ unsigned int nofs_flag;
int error;
/*
+ * We can allocate memory here while doing writeback on behalf of
+ * memory reclaim. To avoid memory allocation deadlocks set the
+ * task-wide nofs context for the following operations.
+ */
+ nofs_flag = memalloc_nofs_save();
+
+ /*
* Just clean up the in-memory strutures if the fs has been shut down.
*/
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
@@ -282,6 +284,8 @@ done:
list_del_init(&ioend->io_list);
xfs_destroy_ioend(ioend, error);
}
+
+ memalloc_nofs_restore(nofs_flag);
}
/*
@@ -290,13 +294,9 @@ done:
static bool
xfs_ioend_can_merge(
struct xfs_ioend *ioend,
- int ioend_error,
struct xfs_ioend *next)
{
- int next_error;
-
- next_error = blk_status_to_errno(next->io_bio->bi_status);
- if (ioend_error != next_error)
+ if (ioend->io_bio->bi_status != next->io_bio->bi_status)
return false;
if ((ioend->io_fork == XFS_COW_FORK) ^ (next->io_fork == XFS_COW_FORK))
return false;
@@ -305,11 +305,28 @@ xfs_ioend_can_merge(
return false;
if (ioend->io_offset + ioend->io_size != next->io_offset)
return false;
- if (xfs_ioend_is_append(ioend) != xfs_ioend_is_append(next))
- return false;
return true;
}
+/*
+ * If the to be merged ioend has a preallocated transaction for file
+ * size updates we need to ensure the ioend it is merged into also
+ * has one. If it already has one we can simply cancel the transaction
+ * as it is guaranteed to be clean.
+ */
+static void
+xfs_ioend_merge_append_transactions(
+ struct xfs_ioend *ioend,
+ struct xfs_ioend *next)
+{
+ if (!ioend->io_append_trans) {
+ ioend->io_append_trans = next->io_append_trans;
+ next->io_append_trans = NULL;
+ } else {
+ xfs_setfilesize_ioend(next, -ECANCELED);
+ }
+}
+
/* Try to merge adjacent completions. */
STATIC void
xfs_ioend_try_merge(
@@ -317,25 +334,16 @@ xfs_ioend_try_merge(
struct list_head *more_ioends)
{
struct xfs_ioend *next_ioend;
- int ioend_error;
- int error;
-
- if (list_empty(more_ioends))
- return;
-
- ioend_error = blk_status_to_errno(ioend->io_bio->bi_status);
while (!list_empty(more_ioends)) {
next_ioend = list_first_entry(more_ioends, struct xfs_ioend,
io_list);
- if (!xfs_ioend_can_merge(ioend, ioend_error, next_ioend))
+ if (!xfs_ioend_can_merge(ioend, next_ioend))
break;
list_move_tail(&next_ioend->io_list, &ioend->io_list);
ioend->io_size += next_ioend->io_size;
- if (ioend->io_append_trans) {
- error = xfs_setfilesize_ioend(next_ioend, 1);
- ASSERT(error == 1);
- }
+ if (next_ioend->io_append_trans)
+ xfs_ioend_merge_append_transactions(ioend, next_ioend);
}
}
@@ -626,7 +634,7 @@ allocate_blocks:
* reference to the ioend to ensure that the ioend completion is only done once
* all bios have been submitted and the ioend is really done.
*
- * If @fail is non-zero, it means that we have a situation where some part of
+ * If @status is non-zero, it means that we have a situation where some part of
* the submission process has failed after we have marked paged for writeback
* and unlocked them. In this situation, we need to fail the bio and ioend
* rather than submit it to IO. This typically only happens on a filesystem
@@ -638,21 +646,19 @@ xfs_submit_ioend(
struct xfs_ioend *ioend,
int status)
{
+ unsigned int nofs_flag;
+
+ /*
+ * We can allocate memory here while doing writeback on behalf of
+ * memory reclaim. To avoid memory allocation deadlocks set the
+ * task-wide nofs context for the following operations.
+ */
+ nofs_flag = memalloc_nofs_save();
+
/* Convert CoW extents to regular */
if (!status && ioend->io_fork == XFS_COW_FORK) {
- /*
- * Yuk. This can do memory allocation, but is not a
- * transactional operation so everything is done in GFP_KERNEL
- * context. That can deadlock, because we hold pages in
- * writeback state and GFP_KERNEL allocations can block on them.
- * Hence we must operate in nofs conditions here.
- */
- unsigned nofs_flag;
-
- nofs_flag = memalloc_nofs_save();
status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
ioend->io_offset, ioend->io_size);
- memalloc_nofs_restore(nofs_flag);
}
/* Reserve log space if we might write beyond the on-disk inode size. */
@@ -663,9 +669,10 @@ xfs_submit_ioend(
!ioend->io_append_trans)
status = xfs_setfilesize_trans_alloc(ioend);
+ memalloc_nofs_restore(nofs_flag);
+
ioend->io_bio->bi_private = ioend;
ioend->io_bio->bi_end_io = xfs_end_bio;
- ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
/*
* If we are failing the IO now, just mark the ioend with an
@@ -679,7 +686,6 @@ xfs_submit_ioend(
return status;
}
- ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
submit_bio(ioend->io_bio);
return 0;
}
@@ -691,7 +697,8 @@ xfs_alloc_ioend(
xfs_exntst_t state,
xfs_off_t offset,
struct block_device *bdev,
- sector_t sector)
+ sector_t sector,
+ struct writeback_control *wbc)
{
struct xfs_ioend *ioend;
struct bio *bio;
@@ -699,6 +706,9 @@ xfs_alloc_ioend(
bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset);
bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = sector;
+ bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
+ bio->bi_write_hint = inode->i_write_hint;
+ wbc_init_bio(wbc, bio);
ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
INIT_LIST_HEAD(&ioend->io_list);
@@ -719,24 +729,22 @@ xfs_alloc_ioend(
* so that the bi_private linkage is set up in the right direction for the
* traversal in xfs_destroy_ioend().
*/
-static void
+static struct bio *
xfs_chain_bio(
- struct xfs_ioend *ioend,
- struct writeback_control *wbc,
- struct block_device *bdev,
- sector_t sector)
+ struct bio *prev)
{
struct bio *new;
new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
- bio_set_dev(new, bdev);
- new->bi_iter.bi_sector = sector;
- bio_chain(ioend->io_bio, new);
- bio_get(ioend->io_bio); /* for xfs_destroy_ioend */
- ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
- ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
- submit_bio(ioend->io_bio);
- ioend->io_bio = new;
+ bio_copy_dev(new, prev);/* also copies over blkcg information */
+ new->bi_iter.bi_sector = bio_end_sector(prev);
+ new->bi_opf = prev->bi_opf;
+ new->bi_write_hint = prev->bi_write_hint;
+
+ bio_chain(prev, new);
+ bio_get(prev); /* for xfs_destroy_ioend */
+ submit_bio(prev);
+ return new;
}
/*
@@ -772,7 +780,7 @@ xfs_add_to_ioend(
if (wpc->ioend)
list_add(&wpc->ioend->io_list, iolist);
wpc->ioend = xfs_alloc_ioend(inode, wpc->fork,
- wpc->imap.br_state, offset, bdev, sector);
+ wpc->imap.br_state, offset, bdev, sector, wbc);
}
merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff,
@@ -783,11 +791,12 @@ xfs_add_to_ioend(
if (!merged) {
if (bio_full(wpc->ioend->io_bio, len))
- xfs_chain_bio(wpc->ioend, wbc, bdev, sector);
+ wpc->ioend->io_bio = xfs_chain_bio(wpc->ioend->io_bio);
bio_add_page(wpc->ioend->io_bio, page, len, poff);
}
wpc->ioend->io_size += len;
+ wbc_account_cgroup_owner(wbc, page, len);
}
STATIC void
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index f62b03186c62..45a1ea240cbb 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -28,7 +28,6 @@ extern const struct address_space_operations xfs_dax_aops;
int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
-extern void xfs_count_page_state(struct page *, int *, int *);
extern struct block_device *xfs_find_bdev_for_inode(struct inode *);
extern struct dax_device *xfs_find_daxdev_for_inode(struct inode *);
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index 228821b2ebe0..dc93c51c17de 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -15,18 +15,13 @@
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_inode.h"
-#include "xfs_alloc.h"
#include "xfs_attr_remote.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_attr.h"
#include "xfs_attr_leaf.h"
-#include "xfs_error.h"
#include "xfs_quota.h"
-#include "xfs_trace.h"
#include "xfs_dir2.h"
-#include "xfs_defer.h"
/*
* Look at all the extents for this logical region,
@@ -121,7 +116,7 @@ xfs_attr3_leaf_inactive(
int size;
int tmp;
int i;
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
leaf = bp->b_addr;
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 3d213a7394c5..58fc820a70c6 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -6,25 +6,20 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
#include "xfs_mount.h"
#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_attr.h"
#include "xfs_attr_sf.h"
-#include "xfs_attr_remote.h"
#include "xfs_attr_leaf.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_buf_item.h"
-#include "xfs_cksum.h"
#include "xfs_dir2.h"
STATIC int
diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c
new file mode 100644
index 000000000000..e2148f2d5d6b
--- /dev/null
+++ b/fs/xfs/xfs_bio_io.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Christoph Hellwig.
+ */
+#include "xfs.h"
+
+static inline unsigned int bio_max_vecs(unsigned int count)
+{
+ return min_t(unsigned, howmany(count, PAGE_SIZE), BIO_MAX_PAGES);
+}
+
+int
+xfs_rw_bdev(
+ struct block_device *bdev,
+ sector_t sector,
+ unsigned int count,
+ char *data,
+ unsigned int op)
+
+{
+ unsigned int is_vmalloc = is_vmalloc_addr(data);
+ unsigned int left = count;
+ int error;
+ struct bio *bio;
+
+ if (is_vmalloc && op == REQ_OP_WRITE)
+ flush_kernel_vmap_range(data, count);
+
+ bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left));
+ bio_set_dev(bio, bdev);
+ bio->bi_iter.bi_sector = sector;
+ bio->bi_opf = op | REQ_META | REQ_SYNC;
+
+ do {
+ struct page *page = kmem_to_page(data);
+ unsigned int off = offset_in_page(data);
+ unsigned int len = min_t(unsigned, left, PAGE_SIZE - off);
+
+ while (bio_add_page(bio, page, len, off) != len) {
+ struct bio *prev = bio;
+
+ bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left));
+ bio_copy_dev(bio, prev);
+ bio->bi_iter.bi_sector = bio_end_sector(prev);
+ bio->bi_opf = prev->bi_opf;
+ bio_chain(prev, bio);
+
+ submit_bio(prev);
+ }
+
+ data += len;
+ left -= len;
+ } while (left > 0);
+
+ error = submit_bio_wait(bio);
+ bio_put(bio);
+
+ if (is_vmalloc && op == REQ_OP_READ)
+ invalidate_kernel_vmap_range(data, count);
+ return error;
+}
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index ce45f066995e..9fa4a7ee8cfc 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -9,17 +9,16 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
+#include "xfs_shared.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
-#include "xfs_buf_item.h"
#include "xfs_bmap_item.h"
#include "xfs_log.h"
#include "xfs_bmap.h"
#include "xfs_icache.h"
-#include "xfs_trace.h"
#include "xfs_bmap_btree.h"
#include "xfs_trans_space.h"
@@ -96,15 +95,6 @@ xfs_bui_item_format(
}
/*
- * Pinning has no meaning for an bui item, so just return.
- */
-STATIC void
-xfs_bui_item_pin(
- struct xfs_log_item *lip)
-{
-}
-
-/*
* The unpin operation is the last place an BUI is manipulated in the log. It is
* either inserted in the AIL or aborted in the event of a log I/O error. In
* either case, the BUI transaction has been successfully committed to make it
@@ -123,71 +113,22 @@ xfs_bui_item_unpin(
}
/*
- * BUI items have no locking or pushing. However, since BUIs are pulled from
- * the AIL when their corresponding BUDs are committed to disk, their situation
- * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller
- * will eventually flush the log. This should help in getting the BUI out of
- * the AIL.
- */
-STATIC uint
-xfs_bui_item_push(
- struct xfs_log_item *lip,
- struct list_head *buffer_list)
-{
- return XFS_ITEM_PINNED;
-}
-
-/*
* The BUI has been either committed or aborted if the transaction has been
* cancelled. If the transaction was cancelled, an BUD isn't going to be
* constructed and thus we free the BUI here directly.
*/
STATIC void
-xfs_bui_item_unlock(
+xfs_bui_item_release(
struct xfs_log_item *lip)
{
- if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
- xfs_bui_release(BUI_ITEM(lip));
-}
-
-/*
- * The BUI is logged only once and cannot be moved in the log, so simply return
- * the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_bui_item_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
- return lsn;
+ xfs_bui_release(BUI_ITEM(lip));
}
-/*
- * The BUI dependency tracking op doesn't do squat. It can't because
- * it doesn't know where the free extent is coming from. The dependency
- * tracking has to be handled by the "enclosing" metadata object. For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
-STATIC void
-xfs_bui_item_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
-}
-
-/*
- * This is the ops vector shared by all bui log items.
- */
static const struct xfs_item_ops xfs_bui_item_ops = {
.iop_size = xfs_bui_item_size,
.iop_format = xfs_bui_item_format,
- .iop_pin = xfs_bui_item_pin,
.iop_unpin = xfs_bui_item_unpin,
- .iop_unlock = xfs_bui_item_unlock,
- .iop_committed = xfs_bui_item_committed,
- .iop_push = xfs_bui_item_push,
- .iop_committing = xfs_bui_item_committing,
+ .iop_release = xfs_bui_item_release,
};
/*
@@ -249,126 +190,241 @@ xfs_bud_item_format(
}
/*
- * Pinning has no meaning for an bud item, so just return.
+ * The BUD is either committed or aborted if the transaction is cancelled. If
+ * the transaction is cancelled, drop our reference to the BUI and free the
+ * BUD.
*/
STATIC void
-xfs_bud_item_pin(
+xfs_bud_item_release(
struct xfs_log_item *lip)
{
+ struct xfs_bud_log_item *budp = BUD_ITEM(lip);
+
+ xfs_bui_release(budp->bud_buip);
+ kmem_zone_free(xfs_bud_zone, budp);
}
-/*
- * Since pinning has no meaning for an bud item, unpinning does
- * not either.
- */
-STATIC void
-xfs_bud_item_unpin(
- struct xfs_log_item *lip,
- int remove)
+static const struct xfs_item_ops xfs_bud_item_ops = {
+ .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED,
+ .iop_size = xfs_bud_item_size,
+ .iop_format = xfs_bud_item_format,
+ .iop_release = xfs_bud_item_release,
+};
+
+static struct xfs_bud_log_item *
+xfs_trans_get_bud(
+ struct xfs_trans *tp,
+ struct xfs_bui_log_item *buip)
{
+ struct xfs_bud_log_item *budp;
+
+ budp = kmem_zone_zalloc(xfs_bud_zone, KM_SLEEP);
+ xfs_log_item_init(tp->t_mountp, &budp->bud_item, XFS_LI_BUD,
+ &xfs_bud_item_ops);
+ budp->bud_buip = buip;
+ budp->bud_format.bud_bui_id = buip->bui_format.bui_id;
+
+ xfs_trans_add_item(tp, &budp->bud_item);
+ return budp;
}
/*
- * There isn't much you can do to push on an bud item. It is simply stuck
- * waiting for the log to be flushed to disk.
+ * Finish an bmap update and log it to the BUD. Note that the
+ * transaction is marked dirty regardless of whether the bmap update
+ * succeeds or fails to support the BUI/BUD lifecycle rules.
*/
-STATIC uint
-xfs_bud_item_push(
- struct xfs_log_item *lip,
- struct list_head *buffer_list)
+static int
+xfs_trans_log_finish_bmap_update(
+ struct xfs_trans *tp,
+ struct xfs_bud_log_item *budp,
+ enum xfs_bmap_intent_type type,
+ struct xfs_inode *ip,
+ int whichfork,
+ xfs_fileoff_t startoff,
+ xfs_fsblock_t startblock,
+ xfs_filblks_t *blockcount,
+ xfs_exntst_t state)
{
- return XFS_ITEM_PINNED;
+ int error;
+
+ error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff,
+ startblock, blockcount, state);
+
+ /*
+ * Mark the transaction dirty, even on error. This ensures the
+ * transaction is aborted, which:
+ *
+ * 1.) releases the BUI and frees the BUD
+ * 2.) shuts down the filesystem
+ */
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags);
+
+ return error;
}
-/*
- * The BUD is either committed or aborted if the transaction is cancelled. If
- * the transaction is cancelled, drop our reference to the BUI and free the
- * BUD.
- */
-STATIC void
-xfs_bud_item_unlock(
- struct xfs_log_item *lip)
+/* Sort bmap intents by inode. */
+static int
+xfs_bmap_update_diff_items(
+ void *priv,
+ struct list_head *a,
+ struct list_head *b)
{
- struct xfs_bud_log_item *budp = BUD_ITEM(lip);
+ struct xfs_bmap_intent *ba;
+ struct xfs_bmap_intent *bb;
- if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
- xfs_bui_release(budp->bud_buip);
- kmem_zone_free(xfs_bud_zone, budp);
- }
+ ba = container_of(a, struct xfs_bmap_intent, bi_list);
+ bb = container_of(b, struct xfs_bmap_intent, bi_list);
+ return ba->bi_owner->i_ino - bb->bi_owner->i_ino;
}
-/*
- * When the bud item is committed to disk, all we need to do is delete our
- * reference to our partner bui item and then free ourselves. Since we're
- * freeing ourselves we must return -1 to keep the transaction code from
- * further referencing this item.
- */
-STATIC xfs_lsn_t
-xfs_bud_item_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
+/* Get an BUI. */
+STATIC void *
+xfs_bmap_update_create_intent(
+ struct xfs_trans *tp,
+ unsigned int count)
{
- struct xfs_bud_log_item *budp = BUD_ITEM(lip);
+ struct xfs_bui_log_item *buip;
+
+ ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS);
+ ASSERT(tp != NULL);
+
+ buip = xfs_bui_init(tp->t_mountp);
+ ASSERT(buip != NULL);
/*
- * Drop the BUI reference regardless of whether the BUD has been
- * aborted. Once the BUD transaction is constructed, it is the sole
- * responsibility of the BUD to release the BUI (even if the BUI is
- * aborted due to log I/O error).
+ * Get a log_item_desc to point at the new item.
*/
- xfs_bui_release(budp->bud_buip);
- kmem_zone_free(xfs_bud_zone, budp);
+ xfs_trans_add_item(tp, &buip->bui_item);
+ return buip;
+}
- return (xfs_lsn_t)-1;
+/* Set the map extent flags for this mapping. */
+static void
+xfs_trans_set_bmap_flags(
+ struct xfs_map_extent *bmap,
+ enum xfs_bmap_intent_type type,
+ int whichfork,
+ xfs_exntst_t state)
+{
+ bmap->me_flags = 0;
+ switch (type) {
+ case XFS_BMAP_MAP:
+ case XFS_BMAP_UNMAP:
+ bmap->me_flags = type;
+ break;
+ default:
+ ASSERT(0);
+ }
+ if (state == XFS_EXT_UNWRITTEN)
+ bmap->me_flags |= XFS_BMAP_EXTENT_UNWRITTEN;
+ if (whichfork == XFS_ATTR_FORK)
+ bmap->me_flags |= XFS_BMAP_EXTENT_ATTR_FORK;
}
-/*
- * The BUD dependency tracking op doesn't do squat. It can't because
- * it doesn't know where the free extent is coming from. The dependency
- * tracking has to be handled by the "enclosing" metadata object. For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
+/* Log bmap updates in the intent item. */
STATIC void
-xfs_bud_item_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
+xfs_bmap_update_log_item(
+ struct xfs_trans *tp,
+ void *intent,
+ struct list_head *item)
{
+ struct xfs_bui_log_item *buip = intent;
+ struct xfs_bmap_intent *bmap;
+ uint next_extent;
+ struct xfs_map_extent *map;
+
+ bmap = container_of(item, struct xfs_bmap_intent, bi_list);
+
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags);
+
+ /*
+ * atomic_inc_return gives us the value after the increment;
+ * we want to use it as an array index so we need to subtract 1 from
+ * it.
+ */
+ next_extent = atomic_inc_return(&buip->bui_next_extent) - 1;
+ ASSERT(next_extent < buip->bui_format.bui_nextents);
+ map = &buip->bui_format.bui_extents[next_extent];
+ map->me_owner = bmap->bi_owner->i_ino;
+ map->me_startblock = bmap->bi_bmap.br_startblock;
+ map->me_startoff = bmap->bi_bmap.br_startoff;
+ map->me_len = bmap->bi_bmap.br_blockcount;
+ xfs_trans_set_bmap_flags(map, bmap->bi_type, bmap->bi_whichfork,
+ bmap->bi_bmap.br_state);
}
-/*
- * This is the ops vector shared by all bud log items.
- */
-static const struct xfs_item_ops xfs_bud_item_ops = {
- .iop_size = xfs_bud_item_size,
- .iop_format = xfs_bud_item_format,
- .iop_pin = xfs_bud_item_pin,
- .iop_unpin = xfs_bud_item_unpin,
- .iop_unlock = xfs_bud_item_unlock,
- .iop_committed = xfs_bud_item_committed,
- .iop_push = xfs_bud_item_push,
- .iop_committing = xfs_bud_item_committing,
-};
+/* Get an BUD so we can process all the deferred rmap updates. */
+STATIC void *
+xfs_bmap_update_create_done(
+ struct xfs_trans *tp,
+ void *intent,
+ unsigned int count)
+{
+ return xfs_trans_get_bud(tp, intent);
+}
-/*
- * Allocate and initialize an bud item with the given number of extents.
- */
-struct xfs_bud_log_item *
-xfs_bud_init(
- struct xfs_mount *mp,
- struct xfs_bui_log_item *buip)
+/* Process a deferred rmap update. */
+STATIC int
+xfs_bmap_update_finish_item(
+ struct xfs_trans *tp,
+ struct list_head *item,
+ void *done_item,
+ void **state)
+{
+ struct xfs_bmap_intent *bmap;
+ xfs_filblks_t count;
+ int error;
+
+ bmap = container_of(item, struct xfs_bmap_intent, bi_list);
+ count = bmap->bi_bmap.br_blockcount;
+ error = xfs_trans_log_finish_bmap_update(tp, done_item,
+ bmap->bi_type,
+ bmap->bi_owner, bmap->bi_whichfork,
+ bmap->bi_bmap.br_startoff,
+ bmap->bi_bmap.br_startblock,
+ &count,
+ bmap->bi_bmap.br_state);
+ if (!error && count > 0) {
+ ASSERT(bmap->bi_type == XFS_BMAP_UNMAP);
+ bmap->bi_bmap.br_blockcount = count;
+ return -EAGAIN;
+ }
+ kmem_free(bmap);
+ return error;
+}
+/* Abort all pending BUIs. */
+STATIC void
+xfs_bmap_update_abort_intent(
+ void *intent)
{
- struct xfs_bud_log_item *budp;
+ xfs_bui_release(intent);
+}
- budp = kmem_zone_zalloc(xfs_bud_zone, KM_SLEEP);
- xfs_log_item_init(mp, &budp->bud_item, XFS_LI_BUD, &xfs_bud_item_ops);
- budp->bud_buip = buip;
- budp->bud_format.bud_bui_id = buip->bui_format.bui_id;
+/* Cancel a deferred rmap update. */
+STATIC void
+xfs_bmap_update_cancel_item(
+ struct list_head *item)
+{
+ struct xfs_bmap_intent *bmap;
- return budp;
+ bmap = container_of(item, struct xfs_bmap_intent, bi_list);
+ kmem_free(bmap);
}
+const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
+ .max_items = XFS_BUI_MAX_FAST_EXTENTS,
+ .diff_items = xfs_bmap_update_diff_items,
+ .create_intent = xfs_bmap_update_create_intent,
+ .abort_intent = xfs_bmap_update_abort_intent,
+ .log_item = xfs_bmap_update_log_item,
+ .create_done = xfs_bmap_update_create_done,
+ .finish_item = xfs_bmap_update_finish_item,
+ .cancel_item = xfs_bmap_update_cancel_item,
+};
+
/*
* Process a bmap update intent item that was recovered from the log.
* We need to update some inode's bmbt.
diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h
index 89e043a88bb8..ad479cc73de8 100644
--- a/fs/xfs/xfs_bmap_item.h
+++ b/fs/xfs/xfs_bmap_item.h
@@ -75,8 +75,6 @@ extern struct kmem_zone *xfs_bui_zone;
extern struct kmem_zone *xfs_bud_zone;
struct xfs_bui_log_item *xfs_bui_init(struct xfs_mount *);
-struct xfs_bud_log_item *xfs_bud_init(struct xfs_mount *,
- struct xfs_bui_log_item *);
void xfs_bui_item_free(struct xfs_bui_log_item *);
void xfs_bui_release(struct xfs_bui_log_item *);
int xfs_bui_recover(struct xfs_trans *parent_tp, struct xfs_bui_log_item *buip);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 06d07f1e310b..98c6a7a71427 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -12,12 +12,10 @@
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_trans.h"
-#include "xfs_extfree_item.h"
#include "xfs_alloc.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
@@ -28,11 +26,8 @@
#include "xfs_trans_space.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
-#include "xfs_log.h"
-#include "xfs_rmap_btree.h"
#include "xfs_iomap.h"
#include "xfs_reflink.h"
-#include "xfs_refcount.h"
/* Kernel only BMAP related definitions and functions */
@@ -276,7 +271,7 @@ xfs_bmap_count_tree(
struct xfs_btree_block *block, *nextblock;
int numrecs;
- error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF,
+ error = xfs_btree_read_bufl(mp, tp, bno, &bp, XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
if (error)
return error;
@@ -287,7 +282,7 @@ xfs_bmap_count_tree(
/* Not at node above leaves, count this level of nodes */
nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
while (nextbno != NULLFSBLOCK) {
- error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp,
+ error = xfs_btree_read_bufl(mp, tp, nextbno, &nbp,
XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
if (error)
@@ -321,7 +316,7 @@ xfs_bmap_count_tree(
if (nextbno == NULLFSBLOCK)
break;
bno = nextbno;
- error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+ error = xfs_btree_read_bufl(mp, tp, bno, &bp,
XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
if (error)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 548344e25128..ca0849043f54 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -4,24 +4,9 @@
* All Rights Reserved.
*/
#include "xfs.h"
-#include <linux/stddef.h>
-#include <linux/errno.h>
-#include <linux/gfp.h>
-#include <linux/pagemap.h>
-#include <linux/init.h>
-#include <linux/vmalloc.h>
-#include <linux/bio.h>
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include <linux/workqueue.h>
-#include <linux/percpu.h>
-#include <linux/blkdev.h>
-#include <linux/hash.h>
-#include <linux/kthread.h>
-#include <linux/migrate.h>
#include <linux/backing-dev.h>
-#include <linux/freezer.h>
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
@@ -213,7 +198,7 @@ xfs_buf_free_maps(
}
}
-struct xfs_buf *
+static struct xfs_buf *
_xfs_buf_alloc(
struct xfs_buftarg *target,
struct xfs_buf_map *map,
@@ -243,6 +228,7 @@ _xfs_buf_alloc(
sema_init(&bp->b_sema, 0); /* held, no waiters */
spin_lock_init(&bp->b_lock);
bp->b_target = target;
+ bp->b_mount = target->bt_mount;
bp->b_flags = flags;
/*
@@ -263,12 +249,11 @@ _xfs_buf_alloc(
bp->b_maps[i].bm_len = map[i].bm_len;
bp->b_length += map[i].bm_len;
}
- bp->b_io_length = bp->b_length;
atomic_set(&bp->b_pin_count, 0);
init_waitqueue_head(&bp->b_waiters);
- XFS_STATS_INC(target->bt_mount, xb_create);
+ XFS_STATS_INC(bp->b_mount, xb_create);
trace_xfs_buf_init(bp, _RET_IP_);
return bp;
@@ -425,12 +410,12 @@ retry:
current->comm, current->pid,
__func__, gfp_mask);
- XFS_STATS_INC(bp->b_target->bt_mount, xb_page_retries);
+ XFS_STATS_INC(bp->b_mount, xb_page_retries);
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry;
}
- XFS_STATS_INC(bp->b_target->bt_mount, xb_page_found);
+ XFS_STATS_INC(bp->b_mount, xb_page_found);
nbytes = min_t(size_t, size, PAGE_SIZE - offset);
size -= nbytes;
@@ -909,83 +894,6 @@ xfs_buf_read_uncached(
return 0;
}
-/*
- * Return a buffer allocated as an empty buffer and associated to external
- * memory via xfs_buf_associate_memory() back to it's empty state.
- */
-void
-xfs_buf_set_empty(
- struct xfs_buf *bp,
- size_t numblks)
-{
- if (bp->b_pages)
- _xfs_buf_free_pages(bp);
-
- bp->b_pages = NULL;
- bp->b_page_count = 0;
- bp->b_addr = NULL;
- bp->b_length = numblks;
- bp->b_io_length = numblks;
-
- ASSERT(bp->b_map_count == 1);
- bp->b_bn = XFS_BUF_DADDR_NULL;
- bp->b_maps[0].bm_bn = XFS_BUF_DADDR_NULL;
- bp->b_maps[0].bm_len = bp->b_length;
-}
-
-static inline struct page *
-mem_to_page(
- void *addr)
-{
- if ((!is_vmalloc_addr(addr))) {
- return virt_to_page(addr);
- } else {
- return vmalloc_to_page(addr);
- }
-}
-
-int
-xfs_buf_associate_memory(
- xfs_buf_t *bp,
- void *mem,
- size_t len)
-{
- int rval;
- int i = 0;
- unsigned long pageaddr;
- unsigned long offset;
- size_t buflen;
- int page_count;
-
- pageaddr = (unsigned long)mem & PAGE_MASK;
- offset = (unsigned long)mem - pageaddr;
- buflen = PAGE_ALIGN(len + offset);
- page_count = buflen >> PAGE_SHIFT;
-
- /* Free any previous set of page pointers */
- if (bp->b_pages)
- _xfs_buf_free_pages(bp);
-
- bp->b_pages = NULL;
- bp->b_addr = mem;
-
- rval = _xfs_buf_get_pages(bp, page_count);
- if (rval)
- return rval;
-
- bp->b_offset = offset;
-
- for (i = 0; i < bp->b_page_count; i++) {
- bp->b_pages[i] = mem_to_page((void *)pageaddr);
- pageaddr += PAGE_SIZE;
- }
-
- bp->b_io_length = BTOBB(len);
- bp->b_length = BTOBB(buflen);
-
- return 0;
-}
-
xfs_buf_t *
xfs_buf_get_uncached(
struct xfs_buftarg *target,
@@ -1180,7 +1088,7 @@ xfs_buf_lock(
trace_xfs_buf_lock(bp, _RET_IP_);
if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
- xfs_log_force(bp->b_target->bt_mount, 0);
+ xfs_log_force(bp->b_mount, 0);
down(&bp->b_sema);
trace_xfs_buf_lock_done(bp, _RET_IP_);
@@ -1269,7 +1177,7 @@ xfs_buf_ioend_async(
struct xfs_buf *bp)
{
INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work);
- queue_work(bp->b_ioend_wq, &bp->b_ioend_work);
+ queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work);
}
void
@@ -1288,7 +1196,7 @@ xfs_buf_ioerror_alert(
struct xfs_buf *bp,
const char *func)
{
- xfs_alert(bp->b_target->bt_mount,
+ xfs_alert(bp->b_mount,
"metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d",
func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length,
-bp->b_error);
@@ -1307,10 +1215,8 @@ xfs_bwrite(
XBF_WRITE_FAIL | XBF_DONE);
error = xfs_buf_submit(bp);
- if (error) {
- xfs_force_shutdown(bp->b_target->bt_mount,
- SHUTDOWN_META_IO_ERROR);
- }
+ if (error)
+ xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
return error;
}
@@ -1436,21 +1342,8 @@ _xfs_buf_ioapply(
*/
bp->b_error = 0;
- /*
- * Initialize the I/O completion workqueue if we haven't yet or the
- * submitter has not opted to specify a custom one.
- */
- if (!bp->b_ioend_wq)
- bp->b_ioend_wq = bp->b_target->bt_mount->m_buf_workqueue;
-
if (bp->b_flags & XBF_WRITE) {
op = REQ_OP_WRITE;
- if (bp->b_flags & XBF_SYNCIO)
- op_flags = REQ_SYNC;
- if (bp->b_flags & XBF_FUA)
- op_flags |= REQ_FUA;
- if (bp->b_flags & XBF_FLUSH)
- op_flags |= REQ_PREFLUSH;
/*
* Run the write verifier callback function if it exists. If
@@ -1460,12 +1353,12 @@ _xfs_buf_ioapply(
if (bp->b_ops) {
bp->b_ops->verify_write(bp);
if (bp->b_error) {
- xfs_force_shutdown(bp->b_target->bt_mount,
+ xfs_force_shutdown(bp->b_mount,
SHUTDOWN_CORRUPT_INCORE);
return;
}
} else if (bp->b_bn != XFS_BUF_DADDR_NULL) {
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
/*
* non-crc filesystems don't attach verifiers during
@@ -1497,7 +1390,7 @@ _xfs_buf_ioapply(
* subsequent call.
*/
offset = bp->b_offset;
- size = BBTOB(bp->b_io_length);
+ size = BBTOB(bp->b_length);
blk_start_plug(&plug);
for (i = 0; i < bp->b_map_count; i++) {
xfs_buf_ioapply_map(bp, i, &offset, &size, op, op_flags);
@@ -1543,7 +1436,7 @@ __xfs_buf_submit(
ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
/* on shutdown we stale and complete the buffer immediately */
- if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
+ if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
xfs_buf_ioerror(bp, -EIO);
bp->b_flags &= ~XBF_DONE;
xfs_buf_stale(bp);
@@ -1613,16 +1506,11 @@ xfs_buf_offset(
return page_address(page) + (offset & (PAGE_SIZE-1));
}
-/*
- * Move data into or out of a buffer.
- */
void
-xfs_buf_iomove(
- xfs_buf_t *bp, /* buffer to process */
- size_t boff, /* starting buffer offset */
- size_t bsize, /* length to copy */
- void *data, /* data address */
- xfs_buf_rw_t mode) /* read/write/zero flag */
+xfs_buf_zero(
+ struct xfs_buf *bp,
+ size_t boff,
+ size_t bsize)
{
size_t bend;
@@ -1635,23 +1523,13 @@ xfs_buf_iomove(
page_offset = (boff + bp->b_offset) & ~PAGE_MASK;
page = bp->b_pages[page_index];
csize = min_t(size_t, PAGE_SIZE - page_offset,
- BBTOB(bp->b_io_length) - boff);
+ BBTOB(bp->b_length) - boff);
ASSERT((csize + page_offset) <= PAGE_SIZE);
- switch (mode) {
- case XBRW_ZERO:
- memset(page_address(page) + page_offset, 0, csize);
- break;
- case XBRW_READ:
- memcpy(data, page_address(page) + page_offset, csize);
- break;
- case XBRW_WRITE:
- memcpy(page_address(page) + page_offset, data, csize);
- }
+ memset(page_address(page) + page_offset, 0, csize);
boff += csize;
- data += csize;
}
}
@@ -2198,8 +2076,7 @@ void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
* This allows userspace to disrupt buffer caching for debug/testing
* purposes.
*/
- if (XFS_TEST_ERROR(false, bp->b_target->bt_mount,
- XFS_ERRTAG_BUF_LRU_REF))
+ if (XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_LRU_REF))
lru_ref = 0;
atomic_set(&bp->b_lru_ref, lru_ref);
@@ -2215,7 +2092,7 @@ xfs_verify_magic(
struct xfs_buf *bp,
__be32 dmagic)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
int idx;
idx = xfs_sb_version_hascrc(&mp->m_sb);
@@ -2233,7 +2110,7 @@ xfs_verify_magic16(
struct xfs_buf *bp,
__be16 dmagic)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
int idx;
idx = xfs_sb_version_hascrc(&mp->m_sb);
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index d0b96e071cec..c6e57a3f409e 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -21,12 +21,6 @@
#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL))
-typedef enum {
- XBRW_READ = 1, /* transfer into target memory */
- XBRW_WRITE = 2, /* transfer from target memory */
- XBRW_ZERO = 3, /* Zero target memory */
-} xfs_buf_rw_t;
-
#define XBF_READ (1 << 0) /* buffer intended for reading from device */
#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */
#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */
@@ -34,12 +28,7 @@ typedef enum {
#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */
#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */
-#define XBF_WRITE_FAIL (1 << 24)/* async writes have failed on this buffer */
-
-/* I/O hints for the BIO layer */
-#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */
-#define XBF_FUA (1 << 11)/* force cache write through mode */
-#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */
+#define XBF_WRITE_FAIL (1 << 7) /* async writes have failed on this buffer */
/* flags used only as arguments to access routines */
#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */
@@ -49,7 +38,6 @@ typedef enum {
#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */
#define _XBF_KMEM (1 << 21)/* backed by heap memory */
#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */
-#define _XBF_COMPOUND (1 << 23)/* compound buffer */
typedef unsigned int xfs_buf_flags_t;
@@ -62,15 +50,11 @@ typedef unsigned int xfs_buf_flags_t;
{ XBF_DONE, "DONE" }, \
{ XBF_STALE, "STALE" }, \
{ XBF_WRITE_FAIL, "WRITE_FAIL" }, \
- { XBF_SYNCIO, "SYNCIO" }, \
- { XBF_FUA, "FUA" }, \
- { XBF_FLUSH, "FLUSH" }, \
{ XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\
{ XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\
{ _XBF_PAGES, "PAGES" }, \
{ _XBF_KMEM, "KMEM" }, \
- { _XBF_DELWRI_Q, "DELWRI_Q" }, \
- { _XBF_COMPOUND, "COMPOUND" }
+ { _XBF_DELWRI_Q, "DELWRI_Q" }
/*
@@ -161,13 +145,13 @@ typedef struct xfs_buf {
wait_queue_head_t b_waiters; /* unpin waiters */
struct list_head b_list;
struct xfs_perag *b_pag; /* contains rbtree root */
+ struct xfs_mount *b_mount;
xfs_buftarg_t *b_target; /* buffer target (device) */
void *b_addr; /* virtual address of buffer */
struct work_struct b_ioend_work;
- struct workqueue_struct *b_ioend_wq; /* I/O completion wq */
xfs_buf_iodone_t b_iodone; /* I/O completion function */
struct completion b_iowait; /* queue for I/O waiters */
- void *b_log_item;
+ struct xfs_buf_log_item *b_log_item;
struct list_head b_li_list; /* Log items list head */
struct xfs_trans *b_transp;
struct page **b_pages; /* array of page pointers */
@@ -175,7 +159,6 @@ typedef struct xfs_buf {
struct xfs_buf_map *b_maps; /* compound buffer map */
struct xfs_buf_map __b_map; /* inline compound buffer map */
int b_map_count;
- int b_io_length; /* IO size in BBs */
atomic_t b_pin_count; /* pin count */
atomic_t b_io_remaining; /* #outstanding I/O requests */
unsigned int b_page_count; /* size of page array */
@@ -209,21 +192,6 @@ struct xfs_buf *xfs_buf_incore(struct xfs_buftarg *target,
xfs_daddr_t blkno, size_t numblks,
xfs_buf_flags_t flags);
-struct xfs_buf *_xfs_buf_alloc(struct xfs_buftarg *target,
- struct xfs_buf_map *map, int nmaps,
- xfs_buf_flags_t flags);
-
-static inline struct xfs_buf *
-xfs_buf_alloc(
- struct xfs_buftarg *target,
- xfs_daddr_t blkno,
- size_t numblks,
- xfs_buf_flags_t flags)
-{
- DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
- return _xfs_buf_alloc(target, &map, 1, flags);
-}
-
struct xfs_buf *xfs_buf_get_map(struct xfs_buftarg *target,
struct xfs_buf_map *map, int nmaps,
xfs_buf_flags_t flags);
@@ -239,11 +207,10 @@ static inline struct xfs_buf *
xfs_buf_get(
struct xfs_buftarg *target,
xfs_daddr_t blkno,
- size_t numblks,
- xfs_buf_flags_t flags)
+ size_t numblks)
{
DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
- return xfs_buf_get_map(target, &map, 1, flags);
+ return xfs_buf_get_map(target, &map, 1, 0);
}
static inline struct xfs_buf *
@@ -269,9 +236,6 @@ xfs_buf_readahead(
return xfs_buf_readahead_map(target, &map, 1, ops);
}
-void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks);
-int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length);
-
struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks,
int flags);
int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr,
@@ -305,10 +269,7 @@ static inline int xfs_buf_submit(struct xfs_buf *bp)
return __xfs_buf_submit(bp, wait);
}
-extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
- xfs_buf_rw_t);
-#define xfs_buf_zero(bp, off, len) \
- xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
+void xfs_buf_zero(struct xfs_buf *bp, size_t boff, size_t bsize);
/* Buffer Utility Routines */
extern void *xfs_buf_offset(struct xfs_buf *, size_t);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 65b32acfa0f6..7dcaec54a20b 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -5,19 +5,17 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
-#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_trans_priv.h"
-#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_log.h"
-#include "xfs_inode.h"
kmem_zone_t *xfs_buf_item_zone;
@@ -520,7 +518,7 @@ xfs_buf_item_push(
/* has a previous flush failed due to IO errors? */
if ((bp->b_flags & XBF_WRITE_FAIL) &&
___ratelimit(&xfs_buf_write_fail_rl_state, "XFS: Failing async write")) {
- xfs_warn(bp->b_target->bt_mount,
+ xfs_warn(bp->b_mount,
"Failing async write on buffer block 0x%llx. Retrying async write.",
(long long)bp->b_bn);
}
@@ -594,7 +592,7 @@ xfs_buf_item_put(
* free the item.
*/
STATIC void
-xfs_buf_item_unlock(
+xfs_buf_item_release(
struct xfs_log_item *lip)
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
@@ -609,7 +607,7 @@ xfs_buf_item_unlock(
&lip->li_flags);
#endif
- trace_xfs_buf_item_unlock(bip);
+ trace_xfs_buf_item_release(bip);
/*
* The bli dirty state should match whether the blf has logged segments
@@ -639,6 +637,14 @@ xfs_buf_item_unlock(
xfs_buf_relse(bp);
}
+STATIC void
+xfs_buf_item_committing(
+ struct xfs_log_item *lip,
+ xfs_lsn_t commit_lsn)
+{
+ return xfs_buf_item_release(lip);
+}
+
/*
* This is called to find out where the oldest active copy of the
* buf log item in the on disk log resides now that the last log
@@ -671,25 +677,15 @@ xfs_buf_item_committed(
return lsn;
}
-STATIC void
-xfs_buf_item_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t commit_lsn)
-{
-}
-
-/*
- * This is the ops vector shared by all buf log items.
- */
static const struct xfs_item_ops xfs_buf_item_ops = {
.iop_size = xfs_buf_item_size,
.iop_format = xfs_buf_item_format,
.iop_pin = xfs_buf_item_pin,
.iop_unpin = xfs_buf_item_unpin,
- .iop_unlock = xfs_buf_item_unlock,
+ .iop_release = xfs_buf_item_release,
+ .iop_committing = xfs_buf_item_committing,
.iop_committed = xfs_buf_item_committed,
.iop_push = xfs_buf_item_push,
- .iop_committing = xfs_buf_item_committing
};
STATIC int
@@ -743,7 +739,7 @@ xfs_buf_item_init(
* this buffer. If we do already have one, there is
* nothing to do here so return.
*/
- ASSERT(bp->b_target->bt_mount == mp);
+ ASSERT(bp->b_mount == mp);
if (bip) {
ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
ASSERT(!bp->b_transp);
@@ -980,9 +976,9 @@ xfs_buf_item_relse(
*/
void
xfs_buf_attach_iodone(
- xfs_buf_t *bp,
- void (*cb)(xfs_buf_t *, xfs_log_item_t *),
- xfs_log_item_t *lip)
+ struct xfs_buf *bp,
+ void (*cb)(struct xfs_buf *, struct xfs_log_item *),
+ struct xfs_log_item *lip)
{
ASSERT(xfs_buf_islocked(bp));
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 90f65f891fab..4a054b11011a 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -39,7 +39,7 @@ struct xfs_buf_log_item;
* locked, and which 128 byte chunks of the buffer are dirty.
*/
struct xfs_buf_log_item {
- xfs_log_item_t bli_item; /* common item structure */
+ struct xfs_log_item bli_item; /* common item structure */
struct xfs_buf *bli_buf; /* real buffer pointer */
unsigned int bli_flags; /* misc flags */
unsigned int bli_recur; /* lock recursion count */
@@ -55,8 +55,8 @@ bool xfs_buf_item_put(struct xfs_buf_log_item *);
void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint);
bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *);
void xfs_buf_attach_iodone(struct xfs_buf *,
- void(*)(struct xfs_buf *, xfs_log_item_t *),
- xfs_log_item_t *);
+ void(*)(struct xfs_buf *, struct xfs_log_item *),
+ struct xfs_log_item *);
void xfs_buf_iodone_callbacks(struct xfs_buf *);
void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
bool xfs_buf_resubmit_failed_buffers(struct xfs_buf *,
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 5142e64e2345..283df898dd9f 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -6,17 +6,14 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
-#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_bmap.h"
#include "xfs_trans.h"
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index d0df0ed50f4b..8ec7aab89044 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -4,19 +4,17 @@
* All Rights Reserved.
*/
#include "xfs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_quota.h"
-#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_extent_busy.h"
-#include "xfs_discard.h"
#include "xfs_trace.h"
#include "xfs_log.h"
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index a1af984e4913..fb1ad4483081 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -14,16 +14,12 @@
#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
-#include "xfs_alloc.h"
#include "xfs_quota.h"
-#include "xfs_error.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_trans_space.h"
#include "xfs_trans_priv.h"
#include "xfs_qm.h"
-#include "xfs_cksum.h"
#include "xfs_trace.h"
#include "xfs_log.h"
#include "xfs_bmap_btree.h"
@@ -1243,7 +1239,7 @@ xfs_qm_exit(void)
/*
* Iterate every dquot of a particular type. The caller must ensure that the
* particular quota type is active. iter_fn can return negative error codes,
- * or XFS_BTREE_QUERY_RANGE_ABORT to indicate that it wants to stop iterating.
+ * or XFS_ITER_ABORT to indicate that it wants to stop iterating.
*/
int
xfs_qm_dqiterate(
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 64bd8640f6e8..4fe85709d55d 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -34,7 +34,6 @@ typedef struct xfs_dquot {
uint dq_flags; /* various flags (XFS_DQ_*) */
struct list_head q_lru; /* global free list of dquots */
struct xfs_mount*q_mount; /* filesystem this relates to */
- struct xfs_trans*q_transp; /* trans this belongs to currently */
uint q_nrefs; /* # active refs from inodes */
xfs_daddr_t q_blkno; /* blkno of dquot buffer */
int q_bufoffset; /* off of dq in buffer (# dquots) */
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 7dedd17c4813..282ec5af293e 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -5,13 +5,13 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_quota.h"
-#include "xfs_error.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_trans_priv.h"
@@ -94,18 +94,6 @@ xfs_qm_dquot_logitem_unpin(
wake_up(&dqp->q_pinwait);
}
-STATIC xfs_lsn_t
-xfs_qm_dquot_logitem_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
- /*
- * We always re-log the entire dquot when it becomes dirty,
- * so, the latest copy _is_ the only one that matters.
- */
- return lsn;
-}
-
/*
* This is called to wait for the given dquot to be unpinned.
* Most of these pin/unpin routines are plagiarized from inode code.
@@ -209,14 +197,8 @@ out_unlock:
return rval;
}
-/*
- * Unlock the dquot associated with the log item.
- * Clear the fields of the dquot and dquot log item that
- * are specific to the current transaction. If the
- * hold flags is set, do not unlock the dquot.
- */
STATIC void
-xfs_qm_dquot_logitem_unlock(
+xfs_qm_dquot_logitem_release(
struct xfs_log_item *lip)
{
struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
@@ -224,11 +206,6 @@ xfs_qm_dquot_logitem_unlock(
ASSERT(XFS_DQ_IS_LOCKED(dqp));
/*
- * Clear the transaction pointer in the dquot
- */
- dqp->q_transp = NULL;
-
- /*
* dquots are never 'held' from getting unlocked at the end of
* a transaction. Their locking and unlocking is hidden inside the
* transaction layer, within trans_commit. Hence, no LI_HOLD flag
@@ -237,30 +214,22 @@ xfs_qm_dquot_logitem_unlock(
xfs_dqunlock(dqp);
}
-/*
- * this needs to stamp an lsn into the dquot, I think.
- * rpc's that look at user dquot's would then have to
- * push on the dependency recorded in the dquot
- */
STATIC void
xfs_qm_dquot_logitem_committing(
struct xfs_log_item *lip,
- xfs_lsn_t lsn)
+ xfs_lsn_t commit_lsn)
{
+ return xfs_qm_dquot_logitem_release(lip);
}
-/*
- * This is the ops vector for dquots
- */
static const struct xfs_item_ops xfs_dquot_item_ops = {
.iop_size = xfs_qm_dquot_logitem_size,
.iop_format = xfs_qm_dquot_logitem_format,
.iop_pin = xfs_qm_dquot_logitem_pin,
.iop_unpin = xfs_qm_dquot_logitem_unpin,
- .iop_unlock = xfs_qm_dquot_logitem_unlock,
- .iop_committed = xfs_qm_dquot_logitem_committed,
+ .iop_release = xfs_qm_dquot_logitem_release,
+ .iop_committing = xfs_qm_dquot_logitem_committing,
.iop_push = xfs_qm_dquot_logitem_push,
- .iop_committing = xfs_qm_dquot_logitem_committing,
.iop_error = xfs_dquot_item_error
};
@@ -320,26 +289,6 @@ xfs_qm_qoff_logitem_format(
}
/*
- * Pinning has no meaning for an quotaoff item, so just return.
- */
-STATIC void
-xfs_qm_qoff_logitem_pin(
- struct xfs_log_item *lip)
-{
-}
-
-/*
- * Since pinning has no meaning for an quotaoff item, unpinning does
- * not either.
- */
-STATIC void
-xfs_qm_qoff_logitem_unpin(
- struct xfs_log_item *lip,
- int remove)
-{
-}
-
-/*
* There isn't much you can do to push a quotaoff item. It is simply
* stuck waiting for the log to be flushed to disk.
*/
@@ -351,28 +300,6 @@ xfs_qm_qoff_logitem_push(
return XFS_ITEM_LOCKED;
}
-/*
- * Quotaoff items have no locking or pushing, so return failure
- * so that the caller doesn't bother with us.
- */
-STATIC void
-xfs_qm_qoff_logitem_unlock(
- struct xfs_log_item *lip)
-{
-}
-
-/*
- * The quotaoff-start-item is logged only once and cannot be moved in the log,
- * so simply return the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_qm_qoff_logitem_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
- return lsn;
-}
-
STATIC xfs_lsn_t
xfs_qm_qoffend_logitem_committed(
struct xfs_log_item *lip,
@@ -396,50 +323,17 @@ xfs_qm_qoffend_logitem_committed(
return (xfs_lsn_t)-1;
}
-/*
- * XXX rcc - don't know quite what to do with this. I think we can
- * just ignore it. The only time that isn't the case is if we allow
- * the client to somehow see that quotas have been turned off in which
- * we can't allow that to get back until the quotaoff hits the disk.
- * So how would that happen? Also, do we need different routines for
- * quotaoff start and quotaoff end? I suspect the answer is yes but
- * to be sure, I need to look at the recovery code and see how quota off
- * recovery is handled (do we roll forward or back or do something else).
- * If we roll forwards or backwards, then we need two separate routines,
- * one that does nothing and one that stamps in the lsn that matters
- * (truly makes the quotaoff irrevocable). If we do something else,
- * then maybe we don't need two.
- */
-STATIC void
-xfs_qm_qoff_logitem_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t commit_lsn)
-{
-}
-
static const struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
.iop_size = xfs_qm_qoff_logitem_size,
.iop_format = xfs_qm_qoff_logitem_format,
- .iop_pin = xfs_qm_qoff_logitem_pin,
- .iop_unpin = xfs_qm_qoff_logitem_unpin,
- .iop_unlock = xfs_qm_qoff_logitem_unlock,
.iop_committed = xfs_qm_qoffend_logitem_committed,
.iop_push = xfs_qm_qoff_logitem_push,
- .iop_committing = xfs_qm_qoff_logitem_committing
};
-/*
- * This is the ops vector shared by all quotaoff-start log items.
- */
static const struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
.iop_size = xfs_qm_qoff_logitem_size,
.iop_format = xfs_qm_qoff_logitem_format,
- .iop_pin = xfs_qm_qoff_logitem_pin,
- .iop_unpin = xfs_qm_qoff_logitem_unpin,
- .iop_unlock = xfs_qm_qoff_logitem_unlock,
- .iop_committed = xfs_qm_qoff_logitem_committed,
.iop_push = xfs_qm_qoff_logitem_push,
- .iop_committing = xfs_qm_qoff_logitem_committing
};
/*
diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h
index db9df710a308..1aed34ccdabc 100644
--- a/fs/xfs/xfs_dquot_item.h
+++ b/fs/xfs/xfs_dquot_item.h
@@ -12,13 +12,13 @@ struct xfs_mount;
struct xfs_qoff_logitem;
typedef struct xfs_dq_logitem {
- xfs_log_item_t qli_item; /* common portion */
+ struct xfs_log_item qli_item; /* common portion */
struct xfs_dquot *qli_dquot; /* dquot ptr */
xfs_lsn_t qli_flush_lsn; /* lsn at last flush */
} xfs_dq_logitem_t;
typedef struct xfs_qoff_logitem {
- xfs_log_item_t qql_item; /* common portion */
+ struct xfs_log_item qql_item; /* common portion */
struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
unsigned int qql_flags;
} xfs_qoff_logitem_t;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index a1e177f66404..544c9482a0ef 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -4,6 +4,7 @@
* All Rights Reserved.
*/
#include "xfs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_fs.h"
#include "xfs_log_format.h"
@@ -353,7 +354,7 @@ xfs_buf_verifier_error(
size_t bufsz,
xfs_failaddr_t failaddr)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
int sz;
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index f2284ceb129f..f1372f9046e3 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -4,18 +4,16 @@
* All Rights Reserved.
*/
#include "xfs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_dir2.h"
#include "xfs_export.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_inode_item.h"
-#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_log.h"
#include "xfs_pnfs.h"
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 74ddf66f4cfe..86f6512d6864 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -9,14 +9,18 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
+#include "xfs_shared.h"
#include "xfs_mount.h"
+#include "xfs_defer.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
-#include "xfs_buf_item.h"
#include "xfs_extfree_item.h"
#include "xfs_log.h"
#include "xfs_btree.h"
#include "xfs_rmap.h"
+#include "xfs_alloc.h"
+#include "xfs_bmap.h"
+#include "xfs_trace.h"
kmem_zone_t *xfs_efi_zone;
@@ -107,15 +111,6 @@ xfs_efi_item_format(
/*
- * Pinning has no meaning for an efi item, so just return.
- */
-STATIC void
-xfs_efi_item_pin(
- struct xfs_log_item *lip)
-{
-}
-
-/*
* The unpin operation is the last place an EFI is manipulated in the log. It is
* either inserted in the AIL or aborted in the event of a log I/O error. In
* either case, the EFI transaction has been successfully committed to make it
@@ -133,71 +128,22 @@ xfs_efi_item_unpin(
}
/*
- * Efi items have no locking or pushing. However, since EFIs are pulled from
- * the AIL when their corresponding EFDs are committed to disk, their situation
- * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller
- * will eventually flush the log. This should help in getting the EFI out of
- * the AIL.
- */
-STATIC uint
-xfs_efi_item_push(
- struct xfs_log_item *lip,
- struct list_head *buffer_list)
-{
- return XFS_ITEM_PINNED;
-}
-
-/*
* The EFI has been either committed or aborted if the transaction has been
* cancelled. If the transaction was cancelled, an EFD isn't going to be
* constructed and thus we free the EFI here directly.
*/
STATIC void
-xfs_efi_item_unlock(
+xfs_efi_item_release(
struct xfs_log_item *lip)
{
- if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
- xfs_efi_release(EFI_ITEM(lip));
-}
-
-/*
- * The EFI is logged only once and cannot be moved in the log, so simply return
- * the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_efi_item_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
- return lsn;
-}
-
-/*
- * The EFI dependency tracking op doesn't do squat. It can't because
- * it doesn't know where the free extent is coming from. The dependency
- * tracking has to be handled by the "enclosing" metadata object. For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
-STATIC void
-xfs_efi_item_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
+ xfs_efi_release(EFI_ITEM(lip));
}
-/*
- * This is the ops vector shared by all efi log items.
- */
static const struct xfs_item_ops xfs_efi_item_ops = {
.iop_size = xfs_efi_item_size,
.iop_format = xfs_efi_item_format,
- .iop_pin = xfs_efi_item_pin,
.iop_unpin = xfs_efi_item_unpin,
- .iop_unlock = xfs_efi_item_unlock,
- .iop_committed = xfs_efi_item_committed,
- .iop_push = xfs_efi_item_push,
- .iop_committing = xfs_efi_item_committing
+ .iop_release = xfs_efi_item_release,
};
@@ -349,136 +295,298 @@ xfs_efd_item_format(
}
/*
- * Pinning has no meaning for an efd item, so just return.
+ * The EFD is either committed or aborted if the transaction is cancelled. If
+ * the transaction is cancelled, drop our reference to the EFI and free the EFD.
*/
STATIC void
-xfs_efd_item_pin(
+xfs_efd_item_release(
struct xfs_log_item *lip)
{
+ struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
+
+ xfs_efi_release(efdp->efd_efip);
+ xfs_efd_item_free(efdp);
}
+static const struct xfs_item_ops xfs_efd_item_ops = {
+ .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED,
+ .iop_size = xfs_efd_item_size,
+ .iop_format = xfs_efd_item_format,
+ .iop_release = xfs_efd_item_release,
+};
+
/*
- * Since pinning has no meaning for an efd item, unpinning does
- * not either.
+ * Allocate an "extent free done" log item that will hold nextents worth of
+ * extents. The caller must use all nextents extents, because we are not
+ * flexible about this at all.
*/
-STATIC void
-xfs_efd_item_unpin(
- struct xfs_log_item *lip,
- int remove)
+static struct xfs_efd_log_item *
+xfs_trans_get_efd(
+ struct xfs_trans *tp,
+ struct xfs_efi_log_item *efip,
+ unsigned int nextents)
{
+ struct xfs_efd_log_item *efdp;
+
+ ASSERT(nextents > 0);
+
+ if (nextents > XFS_EFD_MAX_FAST_EXTENTS) {
+ efdp = kmem_zalloc(sizeof(struct xfs_efd_log_item) +
+ (nextents - 1) * sizeof(struct xfs_extent),
+ KM_SLEEP);
+ } else {
+ efdp = kmem_zone_zalloc(xfs_efd_zone, KM_SLEEP);
+ }
+
+ xfs_log_item_init(tp->t_mountp, &efdp->efd_item, XFS_LI_EFD,
+ &xfs_efd_item_ops);
+ efdp->efd_efip = efip;
+ efdp->efd_format.efd_nextents = nextents;
+ efdp->efd_format.efd_efi_id = efip->efi_format.efi_id;
+
+ xfs_trans_add_item(tp, &efdp->efd_item);
+ return efdp;
}
/*
- * There isn't much you can do to push on an efd item. It is simply stuck
- * waiting for the log to be flushed to disk.
+ * Free an extent and log it to the EFD. Note that the transaction is marked
+ * dirty regardless of whether the extent free succeeds or fails to support the
+ * EFI/EFD lifecycle rules.
*/
-STATIC uint
-xfs_efd_item_push(
- struct xfs_log_item *lip,
- struct list_head *buffer_list)
+static int
+xfs_trans_free_extent(
+ struct xfs_trans *tp,
+ struct xfs_efd_log_item *efdp,
+ xfs_fsblock_t start_block,
+ xfs_extlen_t ext_len,
+ const struct xfs_owner_info *oinfo,
+ bool skip_discard)
{
- return XFS_ITEM_PINNED;
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_extent *extp;
+ uint next_extent;
+ xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block);
+ xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp,
+ start_block);
+ int error;
+
+ trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len);
+
+ error = __xfs_free_extent(tp, start_block, ext_len,
+ oinfo, XFS_AG_RESV_NONE, skip_discard);
+ /*
+ * Mark the transaction dirty, even on error. This ensures the
+ * transaction is aborted, which:
+ *
+ * 1.) releases the EFI and frees the EFD
+ * 2.) shuts down the filesystem
+ */
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
+
+ next_extent = efdp->efd_next_extent;
+ ASSERT(next_extent < efdp->efd_format.efd_nextents);
+ extp = &(efdp->efd_format.efd_extents[next_extent]);
+ extp->ext_start = start_block;
+ extp->ext_len = ext_len;
+ efdp->efd_next_extent++;
+
+ return error;
}
-/*
- * The EFD is either committed or aborted if the transaction is cancelled. If
- * the transaction is cancelled, drop our reference to the EFI and free the EFD.
- */
-STATIC void
-xfs_efd_item_unlock(
- struct xfs_log_item *lip)
+/* Sort bmap items by AG. */
+static int
+xfs_extent_free_diff_items(
+ void *priv,
+ struct list_head *a,
+ struct list_head *b)
{
- struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
+ struct xfs_mount *mp = priv;
+ struct xfs_extent_free_item *ra;
+ struct xfs_extent_free_item *rb;
+
+ ra = container_of(a, struct xfs_extent_free_item, xefi_list);
+ rb = container_of(b, struct xfs_extent_free_item, xefi_list);
+ return XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) -
+ XFS_FSB_TO_AGNO(mp, rb->xefi_startblock);
+}
- if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
- xfs_efi_release(efdp->efd_efip);
- xfs_efd_item_free(efdp);
- }
+/* Get an EFI. */
+STATIC void *
+xfs_extent_free_create_intent(
+ struct xfs_trans *tp,
+ unsigned int count)
+{
+ struct xfs_efi_log_item *efip;
+
+ ASSERT(tp != NULL);
+ ASSERT(count > 0);
+
+ efip = xfs_efi_init(tp->t_mountp, count);
+ ASSERT(efip != NULL);
+
+ /*
+ * Get a log_item_desc to point at the new item.
+ */
+ xfs_trans_add_item(tp, &efip->efi_item);
+ return efip;
}
-/*
- * When the efd item is committed to disk, all we need to do is delete our
- * reference to our partner efi item and then free ourselves. Since we're
- * freeing ourselves we must return -1 to keep the transaction code from further
- * referencing this item.
- */
-STATIC xfs_lsn_t
-xfs_efd_item_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
+/* Log a free extent to the intent item. */
+STATIC void
+xfs_extent_free_log_item(
+ struct xfs_trans *tp,
+ void *intent,
+ struct list_head *item)
{
- struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
+ struct xfs_efi_log_item *efip = intent;
+ struct xfs_extent_free_item *free;
+ uint next_extent;
+ struct xfs_extent *extp;
+
+ free = container_of(item, struct xfs_extent_free_item, xefi_list);
+
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags);
/*
- * Drop the EFI reference regardless of whether the EFD has been
- * aborted. Once the EFD transaction is constructed, it is the sole
- * responsibility of the EFD to release the EFI (even if the EFI is
- * aborted due to log I/O error).
+ * atomic_inc_return gives us the value after the increment;
+ * we want to use it as an array index so we need to subtract 1 from
+ * it.
*/
- xfs_efi_release(efdp->efd_efip);
- xfs_efd_item_free(efdp);
+ next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
+ ASSERT(next_extent < efip->efi_format.efi_nextents);
+ extp = &efip->efi_format.efi_extents[next_extent];
+ extp->ext_start = free->xefi_startblock;
+ extp->ext_len = free->xefi_blockcount;
+}
- return (xfs_lsn_t)-1;
+/* Get an EFD so we can process all the free extents. */
+STATIC void *
+xfs_extent_free_create_done(
+ struct xfs_trans *tp,
+ void *intent,
+ unsigned int count)
+{
+ return xfs_trans_get_efd(tp, intent, count);
}
-/*
- * The EFD dependency tracking op doesn't do squat. It can't because
- * it doesn't know where the free extent is coming from. The dependency
- * tracking has to be handled by the "enclosing" metadata object. For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
+/* Process a free extent. */
+STATIC int
+xfs_extent_free_finish_item(
+ struct xfs_trans *tp,
+ struct list_head *item,
+ void *done_item,
+ void **state)
+{
+ struct xfs_extent_free_item *free;
+ int error;
+
+ free = container_of(item, struct xfs_extent_free_item, xefi_list);
+ error = xfs_trans_free_extent(tp, done_item,
+ free->xefi_startblock,
+ free->xefi_blockcount,
+ &free->xefi_oinfo, free->xefi_skip_discard);
+ kmem_free(free);
+ return error;
+}
+
+/* Abort all pending EFIs. */
STATIC void
-xfs_efd_item_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
+xfs_extent_free_abort_intent(
+ void *intent)
{
+ xfs_efi_release(intent);
}
-/*
- * This is the ops vector shared by all efd log items.
- */
-static const struct xfs_item_ops xfs_efd_item_ops = {
- .iop_size = xfs_efd_item_size,
- .iop_format = xfs_efd_item_format,
- .iop_pin = xfs_efd_item_pin,
- .iop_unpin = xfs_efd_item_unpin,
- .iop_unlock = xfs_efd_item_unlock,
- .iop_committed = xfs_efd_item_committed,
- .iop_push = xfs_efd_item_push,
- .iop_committing = xfs_efd_item_committing
+/* Cancel a free extent. */
+STATIC void
+xfs_extent_free_cancel_item(
+ struct list_head *item)
+{
+ struct xfs_extent_free_item *free;
+
+ free = container_of(item, struct xfs_extent_free_item, xefi_list);
+ kmem_free(free);
+}
+
+const struct xfs_defer_op_type xfs_extent_free_defer_type = {
+ .max_items = XFS_EFI_MAX_FAST_EXTENTS,
+ .diff_items = xfs_extent_free_diff_items,
+ .create_intent = xfs_extent_free_create_intent,
+ .abort_intent = xfs_extent_free_abort_intent,
+ .log_item = xfs_extent_free_log_item,
+ .create_done = xfs_extent_free_create_done,
+ .finish_item = xfs_extent_free_finish_item,
+ .cancel_item = xfs_extent_free_cancel_item,
};
/*
- * Allocate and initialize an efd item with the given number of extents.
+ * AGFL blocks are accounted differently in the reserve pools and are not
+ * inserted into the busy extent list.
*/
-struct xfs_efd_log_item *
-xfs_efd_init(
- struct xfs_mount *mp,
- struct xfs_efi_log_item *efip,
- uint nextents)
-
+STATIC int
+xfs_agfl_free_finish_item(
+ struct xfs_trans *tp,
+ struct list_head *item,
+ void *done_item,
+ void **state)
{
- struct xfs_efd_log_item *efdp;
- uint size;
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_efd_log_item *efdp = done_item;
+ struct xfs_extent_free_item *free;
+ struct xfs_extent *extp;
+ struct xfs_buf *agbp;
+ int error;
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+ uint next_extent;
+
+ free = container_of(item, struct xfs_extent_free_item, xefi_list);
+ ASSERT(free->xefi_blockcount == 1);
+ agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock);
+ agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock);
+
+ trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount);
+
+ error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
+ if (!error)
+ error = xfs_free_agfl_block(tp, agno, agbno, agbp,
+ &free->xefi_oinfo);
- ASSERT(nextents > 0);
- if (nextents > XFS_EFD_MAX_FAST_EXTENTS) {
- size = (uint)(sizeof(xfs_efd_log_item_t) +
- ((nextents - 1) * sizeof(xfs_extent_t)));
- efdp = kmem_zalloc(size, KM_SLEEP);
- } else {
- efdp = kmem_zone_zalloc(xfs_efd_zone, KM_SLEEP);
- }
+ /*
+ * Mark the transaction dirty, even on error. This ensures the
+ * transaction is aborted, which:
+ *
+ * 1.) releases the EFI and frees the EFD
+ * 2.) shuts down the filesystem
+ */
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
- xfs_log_item_init(mp, &efdp->efd_item, XFS_LI_EFD, &xfs_efd_item_ops);
- efdp->efd_efip = efip;
- efdp->efd_format.efd_nextents = nextents;
- efdp->efd_format.efd_efi_id = efip->efi_format.efi_id;
+ next_extent = efdp->efd_next_extent;
+ ASSERT(next_extent < efdp->efd_format.efd_nextents);
+ extp = &(efdp->efd_format.efd_extents[next_extent]);
+ extp->ext_start = free->xefi_startblock;
+ extp->ext_len = free->xefi_blockcount;
+ efdp->efd_next_extent++;
- return efdp;
+ kmem_free(free);
+ return error;
}
+/* sub-type with special handling for AGFL deferred frees */
+const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
+ .max_items = XFS_EFI_MAX_FAST_EXTENTS,
+ .diff_items = xfs_extent_free_diff_items,
+ .create_intent = xfs_extent_free_create_intent,
+ .abort_intent = xfs_extent_free_abort_intent,
+ .log_item = xfs_extent_free_log_item,
+ .create_done = xfs_extent_free_create_done,
+ .finish_item = xfs_agfl_free_finish_item,
+ .cancel_item = xfs_extent_free_cancel_item,
+};
+
/*
* Process an extent free intent item that was recovered from
* the log. We need to free the extents that it describes.
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 2a6a895ca73e..16aaab06d4ec 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -51,7 +51,7 @@ struct kmem_zone;
* AIL, so at this point both the EFI and EFD are freed.
*/
typedef struct xfs_efi_log_item {
- xfs_log_item_t efi_item;
+ struct xfs_log_item efi_item;
atomic_t efi_refcount;
atomic_t efi_next_extent;
unsigned long efi_flags; /* misc flags */
@@ -64,7 +64,7 @@ typedef struct xfs_efi_log_item {
* have been freed.
*/
typedef struct xfs_efd_log_item {
- xfs_log_item_t efd_item;
+ struct xfs_log_item efd_item;
xfs_efi_log_item_t *efd_efip;
uint efd_next_extent;
xfs_efd_log_format_t efd_format;
@@ -79,8 +79,6 @@ extern struct kmem_zone *xfs_efi_zone;
extern struct kmem_zone *xfs_efd_zone;
xfs_efi_log_item_t *xfs_efi_init(struct xfs_mount *, uint);
-xfs_efd_log_item_t *xfs_efd_init(struct xfs_mount *, xfs_efi_log_item_t *,
- uint);
int xfs_efi_copy_format(xfs_log_iovec_t *buf,
xfs_efi_log_format_t *dst_efi_fmt);
void xfs_efi_item_free(xfs_efi_log_item_t *);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 76748255f843..e93bacbd49ae 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -10,14 +10,11 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
-#include "xfs_error.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_ioctl.h"
@@ -28,9 +25,7 @@
#include "xfs_iomap.h"
#include "xfs_reflink.h"
-#include <linux/dcache.h>
#include <linux/falloc.h>
-#include <linux/pagevec.h>
#include <linux/backing-dev.h>
#include <linux/mman.h>
@@ -367,20 +362,7 @@ restart:
* lock above. Eventually we should look into a way to avoid
* the pointless lock roundtrip.
*/
- if (likely(!(file->f_mode & FMODE_NOCMTIME))) {
- error = file_update_time(file);
- if (error)
- return error;
- }
-
- /*
- * If we're writing the file then make sure to clear the setuid and
- * setgid bits if the process is not being run by root. This keeps
- * people from modifying setuid and setgid binaries.
- */
- if (!IS_NOSEC(inode))
- return file_remove_privs(file);
- return 0;
+ return file_modified(file);
}
static int
@@ -392,6 +374,7 @@ xfs_dio_write_end_io(
struct inode *inode = file_inode(iocb->ki_filp);
struct xfs_inode *ip = XFS_I(inode);
loff_t offset = iocb->ki_pos;
+ unsigned int nofs_flag;
int error = 0;
trace_xfs_end_io_direct_write(ip, offset, size);
@@ -408,10 +391,17 @@ xfs_dio_write_end_io(
*/
XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size);
+ /*
+ * We can allocate memory here while doing writeback on behalf of
+ * memory reclaim. To avoid memory allocation deadlocks set the
+ * task-wide nofs context for the following operations.
+ */
+ nofs_flag = memalloc_nofs_save();
+
if (flags & IOMAP_DIO_COW) {
error = xfs_reflink_end_cow(ip, offset, size);
if (error)
- return error;
+ goto out;
}
/*
@@ -420,8 +410,10 @@ xfs_dio_write_end_io(
* earlier allows a racing dio read to find unwritten extents before
* they are converted.
*/
- if (flags & IOMAP_DIO_UNWRITTEN)
- return xfs_iomap_write_unwritten(ip, offset, size, true);
+ if (flags & IOMAP_DIO_UNWRITTEN) {
+ error = xfs_iomap_write_unwritten(ip, offset, size, true);
+ goto out;
+ }
/*
* We need to update the in-core inode size here so that we don't end up
@@ -443,6 +435,8 @@ xfs_dio_write_end_io(
spin_unlock(&ip->i_flags_lock);
}
+out:
+ memalloc_nofs_restore(nofs_flag);
return error;
}
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 182501373af2..574a7a8b4736 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -5,22 +5,19 @@
* All Rights Reserved.
*/
#include "xfs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
#include "xfs_alloc.h"
#include "xfs_mru_cache.h"
-#include "xfs_filestream.h"
#include "xfs_trace.h"
#include "xfs_ag_resv.h"
#include "xfs_trans.h"
-#include "xfs_shared.h"
struct xfs_fstrm_item {
struct xfs_mru_cache_elem mru;
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 3d76a9e35870..5a8f9641562a 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -9,16 +9,12 @@
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_error.h"
#include "xfs_btree.h"
#include "xfs_rmap_btree.h"
#include "xfs_trace.h"
-#include "xfs_log.h"
#include "xfs_rmap.h"
#include "xfs_alloc.h"
#include "xfs_bit.h"
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 3d0e0570e3aa..3e61d0cc23f8 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -11,15 +11,11 @@
#include "xfs_trans_resv.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_trans.h"
#include "xfs_error.h"
-#include "xfs_btree.h"
#include "xfs_alloc.h"
#include "xfs_fsops.h"
#include "xfs_trans_space.h"
-#include "xfs_rtalloc.h"
-#include "xfs_trace.h"
#include "xfs_log.h"
#include "xfs_ag.h"
#include "xfs_ag_resv.h"
@@ -251,9 +247,9 @@ xfs_growfs_data(
if (mp->m_sb.sb_imax_pct) {
uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct;
do_div(icount, 100);
- mp->m_maxicount = XFS_FSB_TO_INO(mp, icount);
+ M_IGEO(mp)->maxicount = XFS_FSB_TO_INO(mp, icount);
} else
- mp->m_maxicount = 0;
+ M_IGEO(mp)->maxicount = 0;
/* Update secondary superblocks now the physical grow has completed */
error = xfs_update_secondary_sbs(mp);
diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c
index d0d377384120..fa55ab8b8d80 100644
--- a/fs/xfs/xfs_globals.c
+++ b/fs/xfs/xfs_globals.c
@@ -4,7 +4,6 @@
* All Rights Reserved.
*/
#include "xfs.h"
-#include "xfs_sysctl.h"
/*
* Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n,
@@ -41,4 +40,7 @@ struct xfs_globals xfs_globals = {
#else
.bug_on_assert = false, /* assert failures WARN() */
#endif
+#ifdef DEBUG
+ .pwork_threads = -1, /* automatic thread detection */
+#endif
};
diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c
index 4c4929f9e7bf..8e0cb05a7142 100644
--- a/fs/xfs/xfs_health.c
+++ b/fs/xfs/xfs_health.c
@@ -9,12 +9,8 @@
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_trace.h"
#include "xfs_health.h"
@@ -373,7 +369,7 @@ static const struct ioctl_sick_map ino_map[] = {
void
xfs_bulkstat_health(
struct xfs_inode *ip,
- struct xfs_bstat *bs)
+ struct xfs_bulkstat *bs)
{
const struct ioctl_sick_map *m;
unsigned int sick;
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index a76b27565a18..0b0fd10a36d4 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -5,13 +5,13 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
-#include "xfs_error.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
#include "xfs_inode_item.h"
@@ -23,8 +23,6 @@
#include "xfs_dquot.h"
#include "xfs_reflink.h"
-#include <linux/kthread.h>
-#include <linux/freezer.h>
#include <linux/iversion.h>
/*
diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c
index 8381d34cb102..d99a0a3e5f40 100644
--- a/fs/xfs/xfs_icreate_item.c
+++ b/fs/xfs/xfs_icreate_item.c
@@ -6,14 +6,9 @@
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
-#include "xfs_format.h"
#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_mount.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
-#include "xfs_error.h"
#include "xfs_icreate_item.h"
#include "xfs_log.h"
@@ -56,80 +51,18 @@ xfs_icreate_item_format(
sizeof(struct xfs_icreate_log));
}
-
-/* Pinning has no meaning for the create item, so just return. */
STATIC void
-xfs_icreate_item_pin(
+xfs_icreate_item_release(
struct xfs_log_item *lip)
{
+ kmem_zone_free(xfs_icreate_zone, ICR_ITEM(lip));
}
-
-/* pinning has no meaning for the create item, so just return. */
-STATIC void
-xfs_icreate_item_unpin(
- struct xfs_log_item *lip,
- int remove)
-{
-}
-
-STATIC void
-xfs_icreate_item_unlock(
- struct xfs_log_item *lip)
-{
- struct xfs_icreate_item *icp = ICR_ITEM(lip);
-
- if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
- kmem_zone_free(xfs_icreate_zone, icp);
- return;
-}
-
-/*
- * Because we have ordered buffers being tracked in the AIL for the inode
- * creation, we don't need the create item after this. Hence we can free
- * the log item and return -1 to tell the caller we're done with the item.
- */
-STATIC xfs_lsn_t
-xfs_icreate_item_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
- struct xfs_icreate_item *icp = ICR_ITEM(lip);
-
- kmem_zone_free(xfs_icreate_zone, icp);
- return (xfs_lsn_t)-1;
-}
-
-/* item can never get into the AIL */
-STATIC uint
-xfs_icreate_item_push(
- struct xfs_log_item *lip,
- struct list_head *buffer_list)
-{
- ASSERT(0);
- return XFS_ITEM_SUCCESS;
-}
-
-/* Ordered buffers do the dependency tracking here, so this does nothing. */
-STATIC void
-xfs_icreate_item_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
-}
-
-/*
- * This is the ops vector shared by all buf log items.
- */
static const struct xfs_item_ops xfs_icreate_item_ops = {
+ .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED,
.iop_size = xfs_icreate_item_size,
.iop_format = xfs_icreate_item_format,
- .iop_pin = xfs_icreate_item_pin,
- .iop_unpin = xfs_icreate_item_unpin,
- .iop_push = xfs_icreate_item_push,
- .iop_unlock = xfs_icreate_item_unlock,
- .iop_committed = xfs_icreate_item_committed,
- .iop_committing = xfs_icreate_item_committing,
+ .iop_release = xfs_icreate_item_release,
};
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 71d216cf6f87..6467d5e1df2d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3,7 +3,6 @@
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include <linux/log2.h>
#include <linux/iversion.h>
#include "xfs.h"
@@ -16,10 +15,7 @@
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_inode.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_dir2.h"
-#include "xfs_attr_sf.h"
#include "xfs_attr.h"
#include "xfs_trans_space.h"
#include "xfs_trans.h"
@@ -32,7 +28,6 @@
#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_filestream.h"
-#include "xfs_cksum.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_symlink.h"
@@ -40,7 +35,6 @@
#include "xfs_log.h"
#include "xfs_bmap_btree.h"
#include "xfs_reflink.h"
-#include "xfs_dir2_priv.h"
kmem_zone_t *xfs_inode_zone;
@@ -441,12 +435,12 @@ xfs_lock_inumorder(int lock_mode, int subclass)
*/
static void
xfs_lock_inodes(
- xfs_inode_t **ips,
- int inodes,
- uint lock_mode)
+ struct xfs_inode **ips,
+ int inodes,
+ uint lock_mode)
{
- int attempts = 0, i, j, try_lock;
- xfs_log_item_t *lp;
+ int attempts = 0, i, j, try_lock;
+ struct xfs_log_item *lp;
/*
* Currently supports between 2 and 5 inodes with exclusive locking. We
@@ -485,7 +479,7 @@ again:
*/
if (!try_lock) {
for (j = (i - 1); j >= 0 && !try_lock; j--) {
- lp = (xfs_log_item_t *)ips[j]->i_itemp;
+ lp = &ips[j]->i_itemp->ili_item;
if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags))
try_lock++;
}
@@ -551,7 +545,7 @@ xfs_lock_two_inodes(
struct xfs_inode *temp;
uint mode_temp;
int attempts = 0;
- xfs_log_item_t *lp;
+ struct xfs_log_item *lp;
ASSERT(hweight32(ip0_mode) == 1);
ASSERT(hweight32(ip1_mode) == 1);
@@ -585,7 +579,7 @@ xfs_lock_two_inodes(
* the second lock. If we can't get it, we must release the first one
* and try again.
*/
- lp = (xfs_log_item_t *)ip0->i_itemp;
+ lp = &ip0->i_itemp->ili_item;
if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) {
if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) {
xfs_iunlock(ip0, ip0_mode);
@@ -2537,13 +2531,14 @@ xfs_ifree_cluster(
xfs_inode_log_item_t *iip;
struct xfs_log_item *lip;
struct xfs_perag *pag;
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
xfs_ino_t inum;
inum = xic->first_ino;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
- nbufs = mp->m_ialloc_blks / mp->m_blocks_per_cluster;
+ nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster;
- for (j = 0; j < nbufs; j++, inum += mp->m_inodes_per_cluster) {
+ for (j = 0; j < nbufs; j++, inum += igeo->inodes_per_cluster) {
/*
* The allocation bitmap tells us which inodes of the chunk were
* physically allocated. Skip the cluster if an inode falls into
@@ -2551,7 +2546,7 @@ xfs_ifree_cluster(
*/
ioffset = inum - xic->first_ino;
if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) {
- ASSERT(ioffset % mp->m_inodes_per_cluster == 0);
+ ASSERT(ioffset % igeo->inodes_per_cluster == 0);
continue;
}
@@ -2567,7 +2562,7 @@ xfs_ifree_cluster(
* to mark all the active inodes on the buffer stale.
*/
bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
- mp->m_bsize * mp->m_blocks_per_cluster,
+ mp->m_bsize * igeo->blocks_per_cluster,
XBF_UNMAPPED);
if (!bp)
@@ -2614,7 +2609,7 @@ xfs_ifree_cluster(
* transaction stale above, which means there is no point in
* even trying to lock them.
*/
- for (i = 0; i < mp->m_inodes_per_cluster; i++) {
+ for (i = 0; i < igeo->inodes_per_cluster; i++) {
retry:
rcu_read_lock();
ip = radix_tree_lookup(&pag->pag_ici_root,
@@ -3472,28 +3467,27 @@ xfs_iflush_cluster(
struct xfs_mount *mp = ip->i_mount;
struct xfs_perag *pag;
unsigned long first_index, mask;
- unsigned long inodes_per_cluster;
int cilist_size;
struct xfs_inode **cilist;
struct xfs_inode *cip;
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
int nr_found;
int clcount = 0;
int i;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
- inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
- cilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
+ cilist_size = igeo->inodes_per_cluster * sizeof(struct xfs_inode *);
cilist = kmem_alloc(cilist_size, KM_MAYFAIL|KM_NOFS);
if (!cilist)
goto out_put;
- mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1);
+ mask = ~(igeo->inodes_per_cluster - 1);
first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
rcu_read_lock();
/* really need a gang lookup range call here */
nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist,
- first_index, inodes_per_cluster);
+ first_index, igeo->inodes_per_cluster);
if (nr_found == 0)
goto out_free;
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index fa1c4fe2ffbf..c9a502eed204 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -5,6 +5,7 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
@@ -12,7 +13,6 @@
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_inode_item.h"
-#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_trans_priv.h"
#include "xfs_buf_item.h"
@@ -565,7 +565,7 @@ out_unlock:
* Unlock the inode associated with the inode log item.
*/
STATIC void
-xfs_inode_item_unlock(
+xfs_inode_item_release(
struct xfs_log_item *lip)
{
struct xfs_inode_log_item *iip = INODE_ITEM(lip);
@@ -621,23 +621,21 @@ xfs_inode_item_committed(
STATIC void
xfs_inode_item_committing(
struct xfs_log_item *lip,
- xfs_lsn_t lsn)
+ xfs_lsn_t commit_lsn)
{
- INODE_ITEM(lip)->ili_last_lsn = lsn;
+ INODE_ITEM(lip)->ili_last_lsn = commit_lsn;
+ return xfs_inode_item_release(lip);
}
-/*
- * This is the ops vector shared by all buf log items.
- */
static const struct xfs_item_ops xfs_inode_item_ops = {
.iop_size = xfs_inode_item_size,
.iop_format = xfs_inode_item_format,
.iop_pin = xfs_inode_item_pin,
.iop_unpin = xfs_inode_item_unpin,
- .iop_unlock = xfs_inode_item_unlock,
+ .iop_release = xfs_inode_item_release,
.iop_committed = xfs_inode_item_committed,
.iop_push = xfs_inode_item_push,
- .iop_committing = xfs_inode_item_committing,
+ .iop_committing = xfs_inode_item_committing,
.iop_error = xfs_inode_item_error
};
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 27081eba220c..07a60e74c39c 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -14,7 +14,7 @@ struct xfs_inode;
struct xfs_mount;
typedef struct xfs_inode_log_item {
- xfs_log_item_t ili_item; /* common portion */
+ struct xfs_log_item ili_item; /* common portion */
struct xfs_inode *ili_inode; /* inode ptr */
xfs_lsn_t ili_flush_lsn; /* lsn at last flush */
xfs_lsn_t ili_last_lsn; /* lsn at last transaction */
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index d7dfc13f30f5..6f7848cd5527 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -11,9 +11,8 @@
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
-#include "xfs_ioctl.h"
-#include "xfs_alloc.h"
#include "xfs_rtalloc.h"
+#include "xfs_iwalk.h"
#include "xfs_itable.h"
#include "xfs_error.h"
#include "xfs_attr.h"
@@ -25,7 +24,6 @@
#include "xfs_export.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
-#include "xfs_symlink.h"
#include "xfs_trans.h"
#include "xfs_acl.h"
#include "xfs_btree.h"
@@ -36,14 +34,8 @@
#include "xfs_ag.h"
#include "xfs_health.h"
-#include <linux/capability.h>
-#include <linux/cred.h>
-#include <linux/dcache.h>
#include <linux/mount.h>
#include <linux/namei.h>
-#include <linux/pagemap.h>
-#include <linux/slab.h>
-#include <linux/exportfs.h>
/*
* xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
@@ -721,16 +713,45 @@ out_unlock:
return error;
}
+/* Return 0 on success or positive error */
+int
+xfs_fsbulkstat_one_fmt(
+ struct xfs_ibulk *breq,
+ const struct xfs_bulkstat *bstat)
+{
+ struct xfs_bstat bs1;
+
+ xfs_bulkstat_to_bstat(breq->mp, &bs1, bstat);
+ if (copy_to_user(breq->ubuffer, &bs1, sizeof(bs1)))
+ return -EFAULT;
+ return xfs_ibulk_advance(breq, sizeof(struct xfs_bstat));
+}
+
+int
+xfs_fsinumbers_fmt(
+ struct xfs_ibulk *breq,
+ const struct xfs_inumbers *igrp)
+{
+ struct xfs_inogrp ig1;
+
+ xfs_inumbers_to_inogrp(&ig1, igrp);
+ if (copy_to_user(breq->ubuffer, &ig1, sizeof(struct xfs_inogrp)))
+ return -EFAULT;
+ return xfs_ibulk_advance(breq, sizeof(struct xfs_inogrp));
+}
+
STATIC int
-xfs_ioc_bulkstat(
+xfs_ioc_fsbulkstat(
xfs_mount_t *mp,
unsigned int cmd,
void __user *arg)
{
- xfs_fsop_bulkreq_t bulkreq;
- int count; /* # of records returned */
- xfs_ino_t inlast; /* last inode number */
- int done;
+ struct xfs_fsop_bulkreq bulkreq;
+ struct xfs_ibulk breq = {
+ .mp = mp,
+ .ocount = 0,
+ };
+ xfs_ino_t lastino;
int error;
/* done = 1 if there are more stats to get and if bulkstat */
@@ -742,41 +763,243 @@ xfs_ioc_bulkstat(
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
- if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t)))
+ if (copy_from_user(&bulkreq, arg, sizeof(struct xfs_fsop_bulkreq)))
return -EFAULT;
- if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
+ if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64)))
return -EFAULT;
- if ((count = bulkreq.icount) <= 0)
+ if (bulkreq.icount <= 0)
return -EINVAL;
if (bulkreq.ubuffer == NULL)
return -EINVAL;
- if (cmd == XFS_IOC_FSINUMBERS)
- error = xfs_inumbers(mp, &inlast, &count,
- bulkreq.ubuffer, xfs_inumbers_fmt);
- else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
- error = xfs_bulkstat_one(mp, inlast, bulkreq.ubuffer,
- sizeof(xfs_bstat_t), NULL, &done);
- else /* XFS_IOC_FSBULKSTAT */
- error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
- sizeof(xfs_bstat_t), bulkreq.ubuffer,
- &done);
+ breq.ubuffer = bulkreq.ubuffer;
+ breq.icount = bulkreq.icount;
+
+ /*
+ * FSBULKSTAT_SINGLE expects that *lastip contains the inode number
+ * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect
+ * that *lastip contains either zero or the number of the last inode to
+ * be examined by the previous call and return results starting with
+ * the next inode after that. The new bulk request back end functions
+ * take the inode to start with, so we have to compute the startino
+ * parameter from lastino to maintain correct function. lastino == 0
+ * is a special case because it has traditionally meant "first inode
+ * in filesystem".
+ */
+ if (cmd == XFS_IOC_FSINUMBERS) {
+ breq.startino = lastino ? lastino + 1 : 0;
+ error = xfs_inumbers(&breq, xfs_fsinumbers_fmt);
+ lastino = breq.startino - 1;
+ } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) {
+ breq.startino = lastino;
+ breq.icount = 1;
+ error = xfs_bulkstat_one(&breq, xfs_fsbulkstat_one_fmt);
+ } else { /* XFS_IOC_FSBULKSTAT */
+ breq.startino = lastino ? lastino + 1 : 0;
+ error = xfs_bulkstat(&breq, xfs_fsbulkstat_one_fmt);
+ lastino = breq.startino - 1;
+ }
if (error)
return error;
- if (bulkreq.ocount != NULL) {
- if (copy_to_user(bulkreq.lastip, &inlast,
- sizeof(xfs_ino_t)))
- return -EFAULT;
+ if (bulkreq.lastip != NULL &&
+ copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t)))
+ return -EFAULT;
- if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
- return -EFAULT;
+ if (bulkreq.ocount != NULL &&
+ copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32)))
+ return -EFAULT;
+
+ return 0;
+}
+
+/* Return 0 on success or positive error */
+static int
+xfs_bulkstat_fmt(
+ struct xfs_ibulk *breq,
+ const struct xfs_bulkstat *bstat)
+{
+ if (copy_to_user(breq->ubuffer, bstat, sizeof(struct xfs_bulkstat)))
+ return -EFAULT;
+ return xfs_ibulk_advance(breq, sizeof(struct xfs_bulkstat));
+}
+
+/*
+ * Check the incoming bulk request @hdr from userspace and initialize the
+ * internal @breq bulk request appropriately. Returns 0 if the bulk request
+ * should proceed; XFS_ITER_ABORT if there's nothing to do; or the usual
+ * negative error code.
+ */
+static int
+xfs_bulk_ireq_setup(
+ struct xfs_mount *mp,
+ struct xfs_bulk_ireq *hdr,
+ struct xfs_ibulk *breq,
+ void __user *ubuffer)
+{
+ if (hdr->icount == 0 ||
+ (hdr->flags & ~XFS_BULK_IREQ_FLAGS_ALL) ||
+ memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
+ return -EINVAL;
+
+ breq->startino = hdr->ino;
+ breq->ubuffer = ubuffer;
+ breq->icount = hdr->icount;
+ breq->ocount = 0;
+ breq->flags = 0;
+
+ /*
+ * The @ino parameter is a special value, so we must look it up here.
+ * We're not allowed to have IREQ_AGNO, and we only return one inode
+ * worth of data.
+ */
+ if (hdr->flags & XFS_BULK_IREQ_SPECIAL) {
+ if (hdr->flags & XFS_BULK_IREQ_AGNO)
+ return -EINVAL;
+
+ switch (hdr->ino) {
+ case XFS_BULK_IREQ_SPECIAL_ROOT:
+ hdr->ino = mp->m_sb.sb_rootino;
+ break;
+ default:
+ return -EINVAL;
+ }
+ breq->icount = 1;
}
+ /*
+ * The IREQ_AGNO flag means that we only want results from a given AG.
+ * If @hdr->ino is zero, we start iterating in that AG. If @hdr->ino is
+ * beyond the specified AG then we return no results.
+ */
+ if (hdr->flags & XFS_BULK_IREQ_AGNO) {
+ if (hdr->agno >= mp->m_sb.sb_agcount)
+ return -EINVAL;
+
+ if (breq->startino == 0)
+ breq->startino = XFS_AGINO_TO_INO(mp, hdr->agno, 0);
+ else if (XFS_INO_TO_AGNO(mp, breq->startino) < hdr->agno)
+ return -EINVAL;
+
+ breq->flags |= XFS_IBULK_SAME_AG;
+
+ /* Asking for an inode past the end of the AG? We're done! */
+ if (XFS_INO_TO_AGNO(mp, breq->startino) > hdr->agno)
+ return XFS_ITER_ABORT;
+ } else if (hdr->agno)
+ return -EINVAL;
+
+ /* Asking for an inode past the end of the FS? We're done! */
+ if (XFS_INO_TO_AGNO(mp, breq->startino) >= mp->m_sb.sb_agcount)
+ return XFS_ITER_ABORT;
+
+ return 0;
+}
+
+/*
+ * Update the userspace bulk request @hdr to reflect the end state of the
+ * internal bulk request @breq.
+ */
+static void
+xfs_bulk_ireq_teardown(
+ struct xfs_bulk_ireq *hdr,
+ struct xfs_ibulk *breq)
+{
+ hdr->ino = breq->startino;
+ hdr->ocount = breq->ocount;
+}
+
+/* Handle the v5 bulkstat ioctl. */
+STATIC int
+xfs_ioc_bulkstat(
+ struct xfs_mount *mp,
+ unsigned int cmd,
+ struct xfs_bulkstat_req __user *arg)
+{
+ struct xfs_bulk_ireq hdr;
+ struct xfs_ibulk breq = {
+ .mp = mp,
+ };
+ int error;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -EIO;
+
+ if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr)))
+ return -EFAULT;
+
+ error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->bulkstat);
+ if (error == XFS_ITER_ABORT)
+ goto out_teardown;
+ if (error < 0)
+ return error;
+
+ error = xfs_bulkstat(&breq, xfs_bulkstat_fmt);
+ if (error)
+ return error;
+
+out_teardown:
+ xfs_bulk_ireq_teardown(&hdr, &breq);
+ if (copy_to_user(&arg->hdr, &hdr, sizeof(hdr)))
+ return -EFAULT;
+
+ return 0;
+}
+
+STATIC int
+xfs_inumbers_fmt(
+ struct xfs_ibulk *breq,
+ const struct xfs_inumbers *igrp)
+{
+ if (copy_to_user(breq->ubuffer, igrp, sizeof(struct xfs_inumbers)))
+ return -EFAULT;
+ return xfs_ibulk_advance(breq, sizeof(struct xfs_inumbers));
+}
+
+/* Handle the v5 inumbers ioctl. */
+STATIC int
+xfs_ioc_inumbers(
+ struct xfs_mount *mp,
+ unsigned int cmd,
+ struct xfs_inumbers_req __user *arg)
+{
+ struct xfs_bulk_ireq hdr;
+ struct xfs_ibulk breq = {
+ .mp = mp,
+ };
+ int error;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -EIO;
+
+ if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr)))
+ return -EFAULT;
+
+ error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->inumbers);
+ if (error == XFS_ITER_ABORT)
+ goto out_teardown;
+ if (error < 0)
+ return error;
+
+ error = xfs_inumbers(&breq, xfs_inumbers_fmt);
+ if (error)
+ return error;
+
+out_teardown:
+ xfs_bulk_ireq_teardown(&hdr, &breq);
+ if (copy_to_user(&arg->hdr, &hdr, sizeof(hdr)))
+ return -EFAULT;
+
return 0;
}
@@ -879,37 +1102,44 @@ xfs_di2lxflags(
return flags;
}
-STATIC int
-xfs_ioc_fsgetxattr(
- xfs_inode_t *ip,
- int attr,
- void __user *arg)
+static void
+xfs_fill_fsxattr(
+ struct xfs_inode *ip,
+ bool attr,
+ struct fsxattr *fa)
{
- struct fsxattr fa;
-
- memset(&fa, 0, sizeof(struct fsxattr));
-
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- fa.fsx_xflags = xfs_ip2xflags(ip);
- fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
- fa.fsx_cowextsize = ip->i_d.di_cowextsize <<
+ simple_fill_fsxattr(fa, xfs_ip2xflags(ip));
+ fa->fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
+ fa->fsx_cowextsize = ip->i_d.di_cowextsize <<
ip->i_mount->m_sb.sb_blocklog;
- fa.fsx_projid = xfs_get_projid(ip);
+ fa->fsx_projid = xfs_get_projid(ip);
if (attr) {
if (ip->i_afp) {
if (ip->i_afp->if_flags & XFS_IFEXTENTS)
- fa.fsx_nextents = xfs_iext_count(ip->i_afp);
+ fa->fsx_nextents = xfs_iext_count(ip->i_afp);
else
- fa.fsx_nextents = ip->i_d.di_anextents;
+ fa->fsx_nextents = ip->i_d.di_anextents;
} else
- fa.fsx_nextents = 0;
+ fa->fsx_nextents = 0;
} else {
if (ip->i_df.if_flags & XFS_IFEXTENTS)
- fa.fsx_nextents = xfs_iext_count(&ip->i_df);
+ fa->fsx_nextents = xfs_iext_count(&ip->i_df);
else
- fa.fsx_nextents = ip->i_d.di_nextents;
+ fa->fsx_nextents = ip->i_d.di_nextents;
}
+}
+
+STATIC int
+xfs_ioc_fsgetxattr(
+ xfs_inode_t *ip,
+ int attr,
+ void __user *arg)
+{
+ struct fsxattr fa;
+
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
+ xfs_fill_fsxattr(ip, attr, &fa);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
if (copy_to_user(arg, &fa, sizeof(fa)))
@@ -1035,15 +1265,6 @@ xfs_ioctl_setattr_xflags(
if ((fa->fsx_xflags & FS_XFLAG_DAX) && xfs_is_reflink_inode(ip))
return -EINVAL;
- /*
- * Can't modify an immutable/append-only file unless
- * we have appropriate permission.
- */
- if (((ip->i_d.di_flags & (XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND)) ||
- (fa->fsx_xflags & (FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND))) &&
- !capable(CAP_LINUX_IMMUTABLE))
- return -EPERM;
-
/* diflags2 only valid for v3 inodes. */
di_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags);
if (di_flags2 && ip->i_d.di_version < 3)
@@ -1202,39 +1423,31 @@ xfs_ioctl_setattr_check_extsize(
struct fsxattr *fa)
{
struct xfs_mount *mp = ip->i_mount;
-
- if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(VFS_I(ip)->i_mode))
- return -EINVAL;
-
- if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
- !S_ISDIR(VFS_I(ip)->i_mode))
- return -EINVAL;
+ xfs_extlen_t size;
+ xfs_fsblock_t extsize_fsb;
if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_d.di_nextents &&
((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize))
return -EINVAL;
- if (fa->fsx_extsize != 0) {
- xfs_extlen_t size;
- xfs_fsblock_t extsize_fsb;
-
- extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
- if (extsize_fsb > MAXEXTLEN)
- return -EINVAL;
+ if (fa->fsx_extsize == 0)
+ return 0;
- if (XFS_IS_REALTIME_INODE(ip) ||
- (fa->fsx_xflags & FS_XFLAG_REALTIME)) {
- size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
- } else {
- size = mp->m_sb.sb_blocksize;
- if (extsize_fsb > mp->m_sb.sb_agblocks / 2)
- return -EINVAL;
- }
+ extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
+ if (extsize_fsb > MAXEXTLEN)
+ return -EINVAL;
- if (fa->fsx_extsize % size)
+ if (XFS_IS_REALTIME_INODE(ip) ||
+ (fa->fsx_xflags & FS_XFLAG_REALTIME)) {
+ size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
+ } else {
+ size = mp->m_sb.sb_blocksize;
+ if (extsize_fsb > mp->m_sb.sb_agblocks / 2)
return -EINVAL;
- } else
- fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT);
+ }
+
+ if (fa->fsx_extsize % size)
+ return -EINVAL;
return 0;
}
@@ -1260,6 +1473,8 @@ xfs_ioctl_setattr_check_cowextsize(
struct fsxattr *fa)
{
struct xfs_mount *mp = ip->i_mount;
+ xfs_extlen_t size;
+ xfs_fsblock_t cowextsize_fsb;
if (!(fa->fsx_xflags & FS_XFLAG_COWEXTSIZE))
return 0;
@@ -1268,25 +1483,19 @@ xfs_ioctl_setattr_check_cowextsize(
ip->i_d.di_version != 3)
return -EINVAL;
- if (!S_ISREG(VFS_I(ip)->i_mode) && !S_ISDIR(VFS_I(ip)->i_mode))
- return -EINVAL;
-
- if (fa->fsx_cowextsize != 0) {
- xfs_extlen_t size;
- xfs_fsblock_t cowextsize_fsb;
+ if (fa->fsx_cowextsize == 0)
+ return 0;
- cowextsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_cowextsize);
- if (cowextsize_fsb > MAXEXTLEN)
- return -EINVAL;
+ cowextsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_cowextsize);
+ if (cowextsize_fsb > MAXEXTLEN)
+ return -EINVAL;
- size = mp->m_sb.sb_blocksize;
- if (cowextsize_fsb > mp->m_sb.sb_agblocks / 2)
- return -EINVAL;
+ size = mp->m_sb.sb_blocksize;
+ if (cowextsize_fsb > mp->m_sb.sb_agblocks / 2)
+ return -EINVAL;
- if (fa->fsx_cowextsize % size)
- return -EINVAL;
- } else
- fa->fsx_xflags &= ~FS_XFLAG_COWEXTSIZE;
+ if (fa->fsx_cowextsize % size)
+ return -EINVAL;
return 0;
}
@@ -1300,21 +1509,6 @@ xfs_ioctl_setattr_check_projid(
if (fa->fsx_projid > (uint16_t)-1 &&
!xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
return -EINVAL;
-
- /*
- * Project Quota ID state is only allowed to change from within the init
- * namespace. Enforce that restriction only if we are trying to change
- * the quota ID state. Everything else is allowed in user namespaces.
- */
- if (current_user_ns() == &init_user_ns)
- return 0;
-
- if (xfs_get_projid(ip) != fa->fsx_projid)
- return -EINVAL;
- if ((fa->fsx_xflags & FS_XFLAG_PROJINHERIT) !=
- (ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT))
- return -EINVAL;
-
return 0;
}
@@ -1323,6 +1517,7 @@ xfs_ioctl_setattr(
xfs_inode_t *ip,
struct fsxattr *fa)
{
+ struct fsxattr old_fa;
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
struct xfs_dquot *udqp = NULL;
@@ -1370,7 +1565,6 @@ xfs_ioctl_setattr(
goto error_free_dquots;
}
-
if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp) &&
xfs_get_projid(ip) != fa->fsx_projid) {
code = xfs_qm_vop_chown_reserve(tp, ip, udqp, NULL, pdqp,
@@ -1379,6 +1573,11 @@ xfs_ioctl_setattr(
goto error_trans_cancel;
}
+ xfs_fill_fsxattr(ip, false, &old_fa);
+ code = vfs_ioc_fssetxattr_check(VFS_I(ip), &old_fa, fa);
+ if (code)
+ goto error_trans_cancel;
+
code = xfs_ioctl_setattr_check_extsize(ip, fa);
if (code)
goto error_trans_cancel;
@@ -1489,6 +1688,7 @@ xfs_ioc_setxflags(
{
struct xfs_trans *tp;
struct fsxattr fa;
+ struct fsxattr old_fa;
unsigned int flags;
int join_flags = 0;
int error;
@@ -1524,6 +1724,13 @@ xfs_ioc_setxflags(
goto out_drop_write;
}
+ xfs_fill_fsxattr(ip, false, &old_fa);
+ error = vfs_ioc_fssetxattr_check(VFS_I(ip), &old_fa, &fa);
+ if (error) {
+ xfs_trans_cancel(tp);
+ goto out_drop_write;
+ }
+
error = xfs_ioctl_setattr_xflags(tp, ip, &fa);
if (error) {
xfs_trans_cancel(tp);
@@ -1942,7 +2149,12 @@ xfs_file_ioctl(
case XFS_IOC_FSBULKSTAT_SINGLE:
case XFS_IOC_FSBULKSTAT:
case XFS_IOC_FSINUMBERS:
+ return xfs_ioc_fsbulkstat(mp, cmd, arg);
+
+ case XFS_IOC_BULKSTAT:
return xfs_ioc_bulkstat(mp, cmd, arg);
+ case XFS_IOC_INUMBERS:
+ return xfs_ioc_inumbers(mp, cmd, arg);
case XFS_IOC_FSGEOMETRY_V1:
return xfs_ioc_fsgeometry(mp, arg, 3);
diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h
index 4b17f67c888a..654c0bb1bcf8 100644
--- a/fs/xfs/xfs_ioctl.h
+++ b/fs/xfs/xfs_ioctl.h
@@ -77,4 +77,12 @@ xfs_set_dmattrs(
uint evmask,
uint16_t state);
+struct xfs_ibulk;
+struct xfs_bstat;
+struct xfs_inogrp;
+
+int xfs_fsbulkstat_one_fmt(struct xfs_ibulk *breq,
+ const struct xfs_bulkstat *bstat);
+int xfs_fsinumbers_fmt(struct xfs_ibulk *breq, const struct xfs_inumbers *igrp);
+
#endif
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 614fc6886d24..7fcf7569743f 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -3,23 +3,19 @@
* Copyright (c) 2004-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include <linux/compat.h>
-#include <linux/ioctl.h>
#include <linux/mount.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
#include <linux/fsmap.h>
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
+#include "xfs_iwalk.h"
#include "xfs_itable.h"
-#include "xfs_error.h"
#include "xfs_fsops.h"
-#include "xfs_alloc.h"
#include "xfs_rtalloc.h"
#include "xfs_attr.h"
#include "xfs_ioctl.h"
@@ -84,27 +80,26 @@ xfs_compat_growfs_rt_copyin(
}
STATIC int
-xfs_inumbers_fmt_compat(
- void __user *ubuffer,
- const struct xfs_inogrp *buffer,
- long count,
- long *written)
+xfs_fsinumbers_fmt_compat(
+ struct xfs_ibulk *breq,
+ const struct xfs_inumbers *ig)
{
- compat_xfs_inogrp_t __user *p32 = ubuffer;
- long i;
+ struct compat_xfs_inogrp __user *p32 = breq->ubuffer;
+ struct xfs_inogrp ig1;
+ struct xfs_inogrp *igrp = &ig1;
- for (i = 0; i < count; i++) {
- if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) ||
- put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
- put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask))
- return -EFAULT;
- }
- *written = count * sizeof(*p32);
- return 0;
+ xfs_inumbers_to_inogrp(&ig1, ig);
+
+ if (put_user(igrp->xi_startino, &p32->xi_startino) ||
+ put_user(igrp->xi_alloccount, &p32->xi_alloccount) ||
+ put_user(igrp->xi_allocmask, &p32->xi_allocmask))
+ return -EFAULT;
+
+ return xfs_ibulk_advance(breq, sizeof(struct compat_xfs_inogrp));
}
#else
-#define xfs_inumbers_fmt_compat xfs_inumbers_fmt
+#define xfs_fsinumbers_fmt_compat xfs_fsinumbers_fmt
#endif /* BROKEN_X86_ALIGNMENT */
STATIC int
@@ -121,11 +116,14 @@ xfs_ioctl32_bstime_copyin(
return 0;
}
-/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */
+/*
+ * struct xfs_bstat has differing alignment on intel, & bstime_t sizes
+ * everywhere
+ */
STATIC int
xfs_ioctl32_bstat_copyin(
- xfs_bstat_t *bstat,
- compat_xfs_bstat_t __user *bstat32)
+ struct xfs_bstat *bstat,
+ struct compat_xfs_bstat __user *bstat32)
{
if (get_user(bstat->bs_ino, &bstat32->bs_ino) ||
get_user(bstat->bs_mode, &bstat32->bs_mode) ||
@@ -171,16 +169,15 @@ xfs_bstime_store_compat(
/* Return 0 on success or positive error (to xfs_bulkstat()) */
STATIC int
-xfs_bulkstat_one_fmt_compat(
- void __user *ubuffer,
- int ubsize,
- int *ubused,
- const xfs_bstat_t *buffer)
+xfs_fsbulkstat_one_fmt_compat(
+ struct xfs_ibulk *breq,
+ const struct xfs_bulkstat *bstat)
{
- compat_xfs_bstat_t __user *p32 = ubuffer;
+ struct compat_xfs_bstat __user *p32 = breq->ubuffer;
+ struct xfs_bstat bs1;
+ struct xfs_bstat *buffer = &bs1;
- if (ubsize < sizeof(*p32))
- return -ENOMEM;
+ xfs_bulkstat_to_bstat(breq->mp, &bs1, bstat);
if (put_user(buffer->bs_ino, &p32->bs_ino) ||
put_user(buffer->bs_mode, &p32->bs_mode) ||
@@ -205,37 +202,24 @@ xfs_bulkstat_one_fmt_compat(
put_user(buffer->bs_dmstate, &p32->bs_dmstate) ||
put_user(buffer->bs_aextents, &p32->bs_aextents))
return -EFAULT;
- if (ubused)
- *ubused = sizeof(*p32);
- return 0;
-}
-STATIC int
-xfs_bulkstat_one_compat(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t ino, /* inode number to get data for */
- void __user *buffer, /* buffer to place output in */
- int ubsize, /* size of buffer */
- int *ubused, /* bytes used by me */
- int *stat) /* BULKSTAT_RV_... */
-{
- return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
- xfs_bulkstat_one_fmt_compat,
- ubused, stat);
+ return xfs_ibulk_advance(breq, sizeof(struct compat_xfs_bstat));
}
/* copied from xfs_ioctl.c */
STATIC int
-xfs_compat_ioc_bulkstat(
+xfs_compat_ioc_fsbulkstat(
xfs_mount_t *mp,
unsigned int cmd,
- compat_xfs_fsop_bulkreq_t __user *p32)
+ struct compat_xfs_fsop_bulkreq __user *p32)
{
u32 addr;
- xfs_fsop_bulkreq_t bulkreq;
- int count; /* # of records returned */
- xfs_ino_t inlast; /* last inode number */
- int done;
+ struct xfs_fsop_bulkreq bulkreq;
+ struct xfs_ibulk breq = {
+ .mp = mp,
+ .ocount = 0,
+ };
+ xfs_ino_t lastino;
int error;
/*
@@ -244,9 +228,8 @@ xfs_compat_ioc_bulkstat(
* to userpace memory via bulkreq.ubuffer. Normally the compat
* functions and structure size are the correct ones to use ...
*/
- inumbers_fmt_pf inumbers_func = xfs_inumbers_fmt_compat;
- bulkstat_one_pf bs_one_func = xfs_bulkstat_one_compat;
- size_t bs_one_size = sizeof(struct compat_xfs_bstat);
+ inumbers_fmt_pf inumbers_func = xfs_fsinumbers_fmt_compat;
+ bulkstat_one_fmt_pf bs_one_func = xfs_fsbulkstat_one_fmt_compat;
#ifdef CONFIG_X86_X32
if (in_x32_syscall()) {
@@ -258,9 +241,8 @@ xfs_compat_ioc_bulkstat(
* the data written out in compat layout will not match what
* x32 userspace expects.
*/
- inumbers_func = xfs_inumbers_fmt;
- bs_one_func = xfs_bulkstat_one;
- bs_one_size = sizeof(struct xfs_bstat);
+ inumbers_func = xfs_fsinumbers_fmt;
+ bs_one_func = xfs_fsbulkstat_one_fmt;
}
#endif
@@ -284,40 +266,55 @@ xfs_compat_ioc_bulkstat(
return -EFAULT;
bulkreq.ocount = compat_ptr(addr);
- if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
+ if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64)))
return -EFAULT;
- if ((count = bulkreq.icount) <= 0)
+ if (bulkreq.icount <= 0)
return -EINVAL;
if (bulkreq.ubuffer == NULL)
return -EINVAL;
+ breq.ubuffer = bulkreq.ubuffer;
+ breq.icount = bulkreq.icount;
+
+ /*
+ * FSBULKSTAT_SINGLE expects that *lastip contains the inode number
+ * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect
+ * that *lastip contains either zero or the number of the last inode to
+ * be examined by the previous call and return results starting with
+ * the next inode after that. The new bulk request back end functions
+ * take the inode to start with, so we have to compute the startino
+ * parameter from lastino to maintain correct function. lastino == 0
+ * is a special case because it has traditionally meant "first inode
+ * in filesystem".
+ */
if (cmd == XFS_IOC_FSINUMBERS_32) {
- error = xfs_inumbers(mp, &inlast, &count,
- bulkreq.ubuffer, inumbers_func);
+ breq.startino = lastino ? lastino + 1 : 0;
+ error = xfs_inumbers(&breq, inumbers_func);
+ lastino = breq.startino - 1;
} else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) {
- int res;
-
- error = bs_one_func(mp, inlast, bulkreq.ubuffer,
- bs_one_size, NULL, &res);
+ breq.startino = lastino;
+ breq.icount = 1;
+ error = xfs_bulkstat_one(&breq, bs_one_func);
+ lastino = breq.startino;
} else if (cmd == XFS_IOC_FSBULKSTAT_32) {
- error = xfs_bulkstat(mp, &inlast, &count,
- bs_one_func, bs_one_size,
- bulkreq.ubuffer, &done);
- } else
+ breq.startino = lastino ? lastino + 1 : 0;
+ error = xfs_bulkstat(&breq, bs_one_func);
+ lastino = breq.startino - 1;
+ } else {
error = -EINVAL;
+ }
if (error)
return error;
- if (bulkreq.ocount != NULL) {
- if (copy_to_user(bulkreq.lastip, &inlast,
- sizeof(xfs_ino_t)))
- return -EFAULT;
+ if (bulkreq.lastip != NULL &&
+ copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t)))
+ return -EFAULT;
- if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
- return -EFAULT;
- }
+ if (bulkreq.ocount != NULL &&
+ copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32)))
+ return -EFAULT;
return 0;
}
@@ -577,6 +574,8 @@ xfs_file_compat_ioctl(
case XFS_IOC_ERROR_CLEARALL:
case FS_IOC_GETFSMAP:
case XFS_IOC_SCRUB_METADATA:
+ case XFS_IOC_BULKSTAT:
+ case XFS_IOC_INUMBERS:
return xfs_file_ioctl(filp, cmd, p);
#if !defined(BROKEN_X86_ALIGNMENT) || defined(CONFIG_X86_X32)
/*
@@ -674,7 +673,7 @@ xfs_file_compat_ioctl(
case XFS_IOC_FSBULKSTAT_32:
case XFS_IOC_FSBULKSTAT_SINGLE_32:
case XFS_IOC_FSINUMBERS_32:
- return xfs_compat_ioc_bulkstat(mp, cmd, arg);
+ return xfs_compat_ioc_fsbulkstat(mp, cmd, arg);
case XFS_IOC_FD_TO_HANDLE_32:
case XFS_IOC_PATH_TO_HANDLE_32:
case XFS_IOC_PATH_TO_FSHANDLE_32: {
diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h
index d28fa824284a..7985344d3aa6 100644
--- a/fs/xfs/xfs_ioctl32.h
+++ b/fs/xfs/xfs_ioctl32.h
@@ -36,7 +36,7 @@ typedef struct compat_xfs_bstime {
__s32 tv_nsec; /* and nanoseconds */
} compat_xfs_bstime_t;
-typedef struct compat_xfs_bstat {
+struct compat_xfs_bstat {
__u64 bs_ino; /* inode number */
__u16 bs_mode; /* type and mode */
__u16 bs_nlink; /* number of links */
@@ -61,14 +61,14 @@ typedef struct compat_xfs_bstat {
__u32 bs_dmevmask; /* DMIG event mask */
__u16 bs_dmstate; /* DMIG state info */
__u16 bs_aextents; /* attribute number of extents */
-} __compat_packed compat_xfs_bstat_t;
+} __compat_packed;
-typedef struct compat_xfs_fsop_bulkreq {
+struct compat_xfs_fsop_bulkreq {
compat_uptr_t lastip; /* last inode # pointer */
__s32 icount; /* count of entries in buffer */
compat_uptr_t ubuffer; /* user buffer for inode desc. */
compat_uptr_t ocount; /* output count pointer */
-} compat_xfs_fsop_bulkreq_t;
+};
#define XFS_IOC_FSBULKSTAT_32 \
_IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
@@ -106,7 +106,7 @@ typedef struct compat_xfs_swapext {
xfs_off_t sx_offset; /* offset into file */
xfs_off_t sx_length; /* leng from offset */
char sx_pad[16]; /* pad space, unused */
- compat_xfs_bstat_t sx_stat; /* stat of target b4 copy */
+ struct compat_xfs_bstat sx_stat; /* stat of target b4 copy */
} __compat_packed compat_xfs_swapext_t;
#define XFS_IOC_SWAPEXT_32 _IOWR('X', 109, struct compat_xfs_swapext)
@@ -201,11 +201,11 @@ typedef struct compat_xfs_fsop_geom_v1 {
#define XFS_IOC_FSGEOMETRY_V1_32 \
_IOR('X', 100, struct compat_xfs_fsop_geom_v1)
-typedef struct compat_xfs_inogrp {
+struct compat_xfs_inogrp {
__u64 xi_startino; /* starting inode number */
__s32 xi_alloccount; /* # bits set in allocmask */
__u64 xi_allocmask; /* mask of allocated inodes */
-} __attribute__((packed)) compat_xfs_inogrp_t;
+} __attribute__((packed));
/* These growfs input structures have padding on the end, so must translate */
typedef struct compat_xfs_growfs_data {
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 63d323916bba..3a4310d7cb59 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -4,7 +4,6 @@
* Copyright (c) 2016-2018 Christoph Hellwig.
* All Rights Reserved.
*/
-#include <linux/iomap.h>
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
@@ -12,7 +11,6 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_bmap_btree.h"
@@ -25,7 +23,6 @@
#include "xfs_inode_item.h"
#include "xfs_iomap.h"
#include "xfs_trace.h"
-#include "xfs_icache.h"
#include "xfs_quota.h"
#include "xfs_dquot_item.h"
#include "xfs_dquot.h"
@@ -779,7 +776,7 @@ xfs_iomap_write_unwritten(
* complete here and might deadlock on the iolock.
*/
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
- XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp);
+ XFS_TRANS_RESERVE, &tp);
if (error)
return error;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 74047bd0c1ae..ff3c1fae5357 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -10,30 +10,20 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
#include "xfs_acl.h"
#include "xfs_quota.h"
-#include "xfs_error.h"
#include "xfs_attr.h"
#include "xfs_trans.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_symlink.h"
-#include "xfs_da_btree.h"
#include "xfs_dir2.h"
-#include "xfs_trans_space.h"
#include "xfs_iomap.h"
-#include "xfs_defer.h"
-#include <linux/capability.h>
#include <linux/xattr.h>
#include <linux/posix_acl.h>
#include <linux/security.h>
-#include <linux/iomap.h>
-#include <linux/slab.h>
#include <linux/iversion.h>
/*
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 1e1a0af1dd34..a8a06bb78ea8 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -14,46 +14,66 @@
#include "xfs_btree.h"
#include "xfs_ialloc.h"
#include "xfs_ialloc_btree.h"
+#include "xfs_iwalk.h"
#include "xfs_itable.h"
#include "xfs_error.h"
-#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_health.h"
/*
- * Return stat information for one inode.
- * Return 0 if ok, else errno.
+ * Bulk Stat
+ * =========
+ *
+ * Use the inode walking functions to fill out struct xfs_bulkstat for every
+ * allocated inode, then pass the stat information to some externally provided
+ * iteration function.
*/
-int
+
+struct xfs_bstat_chunk {
+ bulkstat_one_fmt_pf formatter;
+ struct xfs_ibulk *breq;
+ struct xfs_bulkstat *buf;
+};
+
+/*
+ * Fill out the bulkstat info for a single inode and report it somewhere.
+ *
+ * bc->breq->lastino is effectively the inode cursor as we walk through the
+ * filesystem. Therefore, we update it any time we need to move the cursor
+ * forward, regardless of whether or not we're sending any bstat information
+ * back to userspace. If the inode is internal metadata or, has been freed
+ * out from under us, we just simply keep going.
+ *
+ * However, if any other type of error happens we want to stop right where we
+ * are so that userspace will call back with exact number of the bad inode and
+ * we can send back an error code.
+ *
+ * Note that if the formatter tells us there's no space left in the buffer we
+ * move the cursor forward and abort the walk.
+ */
+STATIC int
xfs_bulkstat_one_int(
- struct xfs_mount *mp, /* mount point for filesystem */
- xfs_ino_t ino, /* inode to get data for */
- void __user *buffer, /* buffer to place output in */
- int ubsize, /* size of buffer */
- bulkstat_one_fmt_pf formatter, /* formatter, copy to user */
- int *ubused, /* bytes used by me */
- int *stat) /* BULKSTAT_RV_... */
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_ino_t ino,
+ struct xfs_bstat_chunk *bc)
{
struct xfs_icdinode *dic; /* dinode core info pointer */
struct xfs_inode *ip; /* incore inode pointer */
struct inode *inode;
- struct xfs_bstat *buf; /* return buffer */
- int error = 0; /* error value */
+ struct xfs_bulkstat *buf = bc->buf;
+ int error = -EINVAL;
- *stat = BULKSTAT_RV_NOTHING;
+ if (xfs_internal_inum(mp, ino))
+ goto out_advance;
- if (!buffer || xfs_internal_inum(mp, ino))
- return -EINVAL;
-
- buf = kmem_zalloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL);
- if (!buf)
- return -ENOMEM;
-
- error = xfs_iget(mp, NULL, ino,
+ error = xfs_iget(mp, tp, ino,
(XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED),
XFS_ILOCK_SHARED, &ip);
+ if (error == -ENOENT || error == -EINVAL)
+ goto out_advance;
if (error)
- goto out_free;
+ goto out;
ASSERT(ip != NULL);
ASSERT(ip->i_imap.im_blkno != 0);
@@ -64,37 +84,35 @@ xfs_bulkstat_one_int(
/* xfs_iget returns the following without needing
* further change.
*/
- buf->bs_projid_lo = dic->di_projid_lo;
- buf->bs_projid_hi = dic->di_projid_hi;
+ buf->bs_projectid = xfs_get_projid(ip);
buf->bs_ino = ino;
buf->bs_uid = dic->di_uid;
buf->bs_gid = dic->di_gid;
buf->bs_size = dic->di_size;
buf->bs_nlink = inode->i_nlink;
- buf->bs_atime.tv_sec = inode->i_atime.tv_sec;
- buf->bs_atime.tv_nsec = inode->i_atime.tv_nsec;
- buf->bs_mtime.tv_sec = inode->i_mtime.tv_sec;
- buf->bs_mtime.tv_nsec = inode->i_mtime.tv_nsec;
- buf->bs_ctime.tv_sec = inode->i_ctime.tv_sec;
- buf->bs_ctime.tv_nsec = inode->i_ctime.tv_nsec;
+ buf->bs_atime = inode->i_atime.tv_sec;
+ buf->bs_atime_nsec = inode->i_atime.tv_nsec;
+ buf->bs_mtime = inode->i_mtime.tv_sec;
+ buf->bs_mtime_nsec = inode->i_mtime.tv_nsec;
+ buf->bs_ctime = inode->i_ctime.tv_sec;
+ buf->bs_ctime_nsec = inode->i_ctime.tv_nsec;
+ buf->bs_btime = dic->di_crtime.t_sec;
+ buf->bs_btime_nsec = dic->di_crtime.t_nsec;
buf->bs_gen = inode->i_generation;
buf->bs_mode = inode->i_mode;
buf->bs_xflags = xfs_ip2xflags(ip);
- buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog;
+ buf->bs_extsize_blks = dic->di_extsize;
buf->bs_extents = dic->di_nextents;
- memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
xfs_bulkstat_health(ip, buf);
- buf->bs_dmevmask = dic->di_dmevmask;
- buf->bs_dmstate = dic->di_dmstate;
buf->bs_aextents = dic->di_anextents;
buf->bs_forkoff = XFS_IFORK_BOFF(ip);
+ buf->bs_version = XFS_BULKSTAT_VERSION_V5;
if (dic->di_version == 3) {
if (dic->di_flags2 & XFS_DIFLAG2_COWEXTSIZE)
- buf->bs_cowextsize = dic->di_cowextsize <<
- mp->m_sb.sb_blocklog;
+ buf->bs_cowextsize_blks = dic->di_cowextsize;
}
switch (dic->di_format) {
@@ -118,385 +136,121 @@ xfs_bulkstat_one_int(
xfs_iunlock(ip, XFS_ILOCK_SHARED);
xfs_irele(ip);
- error = formatter(buffer, ubsize, ubused, buf);
- if (!error)
- *stat = BULKSTAT_RV_DIDONE;
+ error = bc->formatter(bc->breq, buf);
+ if (error == XFS_IBULK_ABORT)
+ goto out_advance;
+ if (error)
+ goto out;
- out_free:
- kmem_free(buf);
+out_advance:
+ /*
+ * Advance the cursor to the inode that comes after the one we just
+ * looked at. We want the caller to move along if the bulkstat
+ * information was copied successfully; if we tried to grab the inode
+ * but it's no longer allocated; or if it's internal metadata.
+ */
+ bc->breq->startino = ino + 1;
+out:
return error;
}
-/* Return 0 on success or positive error */
-STATIC int
-xfs_bulkstat_one_fmt(
- void __user *ubuffer,
- int ubsize,
- int *ubused,
- const xfs_bstat_t *buffer)
-{
- if (ubsize < sizeof(*buffer))
- return -ENOMEM;
- if (copy_to_user(ubuffer, buffer, sizeof(*buffer)))
- return -EFAULT;
- if (ubused)
- *ubused = sizeof(*buffer);
- return 0;
-}
-
+/* Bulkstat a single inode. */
int
xfs_bulkstat_one(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t ino, /* inode number to get data for */
- void __user *buffer, /* buffer to place output in */
- int ubsize, /* size of buffer */
- int *ubused, /* bytes used by me */
- int *stat) /* BULKSTAT_RV_... */
+ struct xfs_ibulk *breq,
+ bulkstat_one_fmt_pf formatter)
{
- return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
- xfs_bulkstat_one_fmt, ubused, stat);
-}
+ struct xfs_bstat_chunk bc = {
+ .formatter = formatter,
+ .breq = breq,
+ };
+ int error;
-/*
- * Loop over all clusters in a chunk for a given incore inode allocation btree
- * record. Do a readahead if there are any allocated inodes in that cluster.
- */
-STATIC void
-xfs_bulkstat_ichunk_ra(
- struct xfs_mount *mp,
- xfs_agnumber_t agno,
- struct xfs_inobt_rec_incore *irec)
-{
- xfs_agblock_t agbno;
- struct blk_plug plug;
- int i; /* inode chunk index */
-
- agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino);
-
- blk_start_plug(&plug);
- for (i = 0; i < XFS_INODES_PER_CHUNK;
- i += mp->m_inodes_per_cluster, agbno += mp->m_blocks_per_cluster) {
- if (xfs_inobt_maskn(i, mp->m_inodes_per_cluster) &
- ~irec->ir_free) {
- xfs_btree_reada_bufs(mp, agno, agbno,
- mp->m_blocks_per_cluster,
- &xfs_inode_buf_ops);
- }
- }
- blk_finish_plug(&plug);
-}
+ ASSERT(breq->icount == 1);
-/*
- * Lookup the inode chunk that the given inode lives in and then get the record
- * if we found the chunk. If the inode was not the last in the chunk and there
- * are some left allocated, update the data for the pointed-to record as well as
- * return the count of grabbed inodes.
- */
-STATIC int
-xfs_bulkstat_grab_ichunk(
- struct xfs_btree_cur *cur, /* btree cursor */
- xfs_agino_t agino, /* starting inode of chunk */
- int *icount,/* return # of inodes grabbed */
- struct xfs_inobt_rec_incore *irec) /* btree record */
-{
- int idx; /* index into inode chunk */
- int stat;
- int error = 0;
-
- /* Lookup the inode chunk that this inode lives in */
- error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &stat);
- if (error)
- return error;
- if (!stat) {
- *icount = 0;
- return error;
- }
+ bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat),
+ KM_SLEEP | KM_MAYFAIL);
+ if (!bc.buf)
+ return -ENOMEM;
- /* Get the record, should always work */
- error = xfs_inobt_get_rec(cur, irec, &stat);
- if (error)
- return error;
- XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, stat == 1);
+ error = xfs_bulkstat_one_int(breq->mp, NULL, breq->startino, &bc);
- /* Check if the record contains the inode in request */
- if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) {
- *icount = 0;
- return 0;
- }
+ kmem_free(bc.buf);
- idx = agino - irec->ir_startino + 1;
- if (idx < XFS_INODES_PER_CHUNK &&
- (xfs_inobt_maskn(idx, XFS_INODES_PER_CHUNK - idx) & ~irec->ir_free)) {
- int i;
-
- /* We got a right chunk with some left inodes allocated at it.
- * Grab the chunk record. Mark all the uninteresting inodes
- * free -- because they're before our start point.
- */
- for (i = 0; i < idx; i++) {
- if (XFS_INOBT_MASK(i) & ~irec->ir_free)
- irec->ir_freecount++;
- }
-
- irec->ir_free |= xfs_inobt_maskn(0, idx);
- *icount = irec->ir_count - irec->ir_freecount;
- }
+ /*
+ * If we reported one inode to userspace then we abort because we hit
+ * the end of the buffer. Don't leak that back to userspace.
+ */
+ if (error == XFS_IWALK_ABORT)
+ error = 0;
- return 0;
+ return error;
}
-#define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size)
-
-struct xfs_bulkstat_agichunk {
- char __user **ac_ubuffer;/* pointer into user's buffer */
- int ac_ubleft; /* bytes left in user's buffer */
- int ac_ubelem; /* spaces used in user's buffer */
-};
-
-/*
- * Process inodes in chunk with a pointer to a formatter function
- * that will iget the inode and fill in the appropriate structure.
- */
static int
-xfs_bulkstat_ag_ichunk(
- struct xfs_mount *mp,
- xfs_agnumber_t agno,
- struct xfs_inobt_rec_incore *irbp,
- bulkstat_one_pf formatter,
- size_t statstruct_size,
- struct xfs_bulkstat_agichunk *acp,
- xfs_agino_t *last_agino)
+xfs_bulkstat_iwalk(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_ino_t ino,
+ void *data)
{
- char __user **ubufp = acp->ac_ubuffer;
- int chunkidx;
- int error = 0;
- xfs_agino_t agino = irbp->ir_startino;
-
- for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK;
- chunkidx++, agino++) {
- int fmterror;
- int ubused;
-
- /* inode won't fit in buffer, we are done */
- if (acp->ac_ubleft < statstruct_size)
- break;
-
- /* Skip if this inode is free */
- if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free)
- continue;
-
- /* Get the inode and fill in a single buffer */
- ubused = statstruct_size;
- error = formatter(mp, XFS_AGINO_TO_INO(mp, agno, agino),
- *ubufp, acp->ac_ubleft, &ubused, &fmterror);
-
- if (fmterror == BULKSTAT_RV_GIVEUP ||
- (error && error != -ENOENT && error != -EINVAL)) {
- acp->ac_ubleft = 0;
- ASSERT(error);
- break;
- }
-
- /* be careful not to leak error if at end of chunk */
- if (fmterror == BULKSTAT_RV_NOTHING || error) {
- error = 0;
- continue;
- }
-
- *ubufp += ubused;
- acp->ac_ubleft -= ubused;
- acp->ac_ubelem++;
- }
-
- /*
- * Post-update *last_agino. At this point, agino will always point one
- * inode past the last inode we processed successfully. Hence we
- * substract that inode when setting the *last_agino cursor so that we
- * return the correct cookie to userspace. On the next bulkstat call,
- * the inode under the lastino cookie will be skipped as we have already
- * processed it here.
- */
- *last_agino = agino - 1;
+ int error;
+ error = xfs_bulkstat_one_int(mp, tp, ino, data);
+ /* bulkstat just skips over missing inodes */
+ if (error == -ENOENT || error == -EINVAL)
+ return 0;
return error;
}
/*
- * Return stat information in bulk (by-inode) for the filesystem.
+ * Check the incoming lastino parameter.
+ *
+ * We allow any inode value that could map to physical space inside the
+ * filesystem because if there are no inodes there, bulkstat moves on to the
+ * next chunk. In other words, the magic agino value of zero takes us to the
+ * first chunk in the AG, and an agino value past the end of the AG takes us to
+ * the first chunk in the next AG.
+ *
+ * Therefore we can end early if the requested inode is beyond the end of the
+ * filesystem or doesn't map properly.
*/
-int /* error status */
-xfs_bulkstat(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t *lastinop, /* last inode returned */
- int *ubcountp, /* size of buffer/count returned */
- bulkstat_one_pf formatter, /* func that'd fill a single buf */
- size_t statstruct_size, /* sizeof struct filling */
- char __user *ubuffer, /* buffer with inode stats */
- int *done) /* 1 if there are more stats to get */
+static inline bool
+xfs_bulkstat_already_done(
+ struct xfs_mount *mp,
+ xfs_ino_t startino)
{
- xfs_buf_t *agbp; /* agi header buffer */
- xfs_agino_t agino; /* inode # in allocation group */
- xfs_agnumber_t agno; /* allocation group number */
- xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
- xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
- int nirbuf; /* size of irbuf */
- int ubcount; /* size of user's buffer */
- struct xfs_bulkstat_agichunk ac;
- int error = 0;
+ xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
+ xfs_agino_t agino = XFS_INO_TO_AGINO(mp, startino);
- /*
- * Get the last inode value, see if there's nothing to do.
- */
- agno = XFS_INO_TO_AGNO(mp, *lastinop);
- agino = XFS_INO_TO_AGINO(mp, *lastinop);
- if (agno >= mp->m_sb.sb_agcount ||
- *lastinop != XFS_AGINO_TO_INO(mp, agno, agino)) {
- *done = 1;
- *ubcountp = 0;
- return 0;
- }
+ return agno >= mp->m_sb.sb_agcount ||
+ startino != XFS_AGINO_TO_INO(mp, agno, agino);
+}
- ubcount = *ubcountp; /* statstruct's */
- ac.ac_ubuffer = &ubuffer;
- ac.ac_ubleft = ubcount * statstruct_size; /* bytes */;
- ac.ac_ubelem = 0;
+/* Return stat information in bulk (by-inode) for the filesystem. */
+int
+xfs_bulkstat(
+ struct xfs_ibulk *breq,
+ bulkstat_one_fmt_pf formatter)
+{
+ struct xfs_bstat_chunk bc = {
+ .formatter = formatter,
+ .breq = breq,
+ };
+ int error;
- *ubcountp = 0;
- *done = 0;
+ if (xfs_bulkstat_already_done(breq->mp, breq->startino))
+ return 0;
- irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP);
- if (!irbuf)
+ bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat),
+ KM_SLEEP | KM_MAYFAIL);
+ if (!bc.buf)
return -ENOMEM;
- nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf);
- /*
- * Loop over the allocation groups, starting from the last
- * inode returned; 0 means start of the allocation group.
- */
- while (agno < mp->m_sb.sb_agcount) {
- struct xfs_inobt_rec_incore *irbp = irbuf;
- struct xfs_inobt_rec_incore *irbufend = irbuf + nirbuf;
- bool end_of_ag = false;
- int icount = 0;
- int stat;
-
- error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
- if (error)
- break;
- /*
- * Allocate and initialize a btree cursor for ialloc btree.
- */
- cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
- XFS_BTNUM_INO);
- if (agino > 0) {
- /*
- * In the middle of an allocation group, we need to get
- * the remainder of the chunk we're in.
- */
- struct xfs_inobt_rec_incore r;
-
- error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r);
- if (error)
- goto del_cursor;
- if (icount) {
- irbp->ir_startino = r.ir_startino;
- irbp->ir_holemask = r.ir_holemask;
- irbp->ir_count = r.ir_count;
- irbp->ir_freecount = r.ir_freecount;
- irbp->ir_free = r.ir_free;
- irbp++;
- }
- /* Increment to the next record */
- error = xfs_btree_increment(cur, 0, &stat);
- } else {
- /* Start of ag. Lookup the first inode chunk */
- error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &stat);
- }
- if (error || stat == 0) {
- end_of_ag = true;
- goto del_cursor;
- }
-
- /*
- * Loop through inode btree records in this ag,
- * until we run out of inodes or space in the buffer.
- */
- while (irbp < irbufend && icount < ubcount) {
- struct xfs_inobt_rec_incore r;
-
- error = xfs_inobt_get_rec(cur, &r, &stat);
- if (error || stat == 0) {
- end_of_ag = true;
- goto del_cursor;
- }
-
- /*
- * If this chunk has any allocated inodes, save it.
- * Also start read-ahead now for this chunk.
- */
- if (r.ir_freecount < r.ir_count) {
- xfs_bulkstat_ichunk_ra(mp, agno, &r);
- irbp->ir_startino = r.ir_startino;
- irbp->ir_holemask = r.ir_holemask;
- irbp->ir_count = r.ir_count;
- irbp->ir_freecount = r.ir_freecount;
- irbp->ir_free = r.ir_free;
- irbp++;
- icount += r.ir_count - r.ir_freecount;
- }
- error = xfs_btree_increment(cur, 0, &stat);
- if (error || stat == 0) {
- end_of_ag = true;
- goto del_cursor;
- }
- cond_resched();
- }
-
- /*
- * Drop the btree buffers and the agi buffer as we can't hold any
- * of the locks these represent when calling iget. If there is a
- * pending error, then we are done.
- */
-del_cursor:
- xfs_btree_del_cursor(cur, error);
- xfs_buf_relse(agbp);
- if (error)
- break;
- /*
- * Now format all the good inodes into the user's buffer. The
- * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer
- * for the next loop iteration.
- */
- irbufend = irbp;
- for (irbp = irbuf;
- irbp < irbufend && ac.ac_ubleft >= statstruct_size;
- irbp++) {
- error = xfs_bulkstat_ag_ichunk(mp, agno, irbp,
- formatter, statstruct_size, &ac,
- &agino);
- if (error)
- break;
-
- cond_resched();
- }
-
- /*
- * If we've run out of space or had a formatting error, we
- * are now done
- */
- if (ac.ac_ubleft < statstruct_size || error)
- break;
-
- if (end_of_ag) {
- agno++;
- agino = 0;
- }
- }
- /*
- * Done, we're either out of filesystem or space to put the data.
- */
- kmem_free(irbuf);
- *ubcountp = ac.ac_ubelem;
+ error = xfs_iwalk(breq->mp, NULL, breq->startino, breq->flags,
+ xfs_bulkstat_iwalk, breq->icount, &bc);
+
+ kmem_free(bc.buf);
/*
* We found some inodes, so clear the error status and return them.
@@ -505,135 +259,136 @@ del_cursor:
* triggered again and propagated to userspace as there will be no
* formatted inodes in the buffer.
*/
- if (ac.ac_ubelem)
+ if (breq->ocount > 0)
error = 0;
- /*
- * If we ran out of filesystem, lastino will point off the end of
- * the filesystem so the next call will return immediately.
- */
- *lastinop = XFS_AGINO_TO_INO(mp, agno, agino);
- if (agno >= mp->m_sb.sb_agcount)
- *done = 1;
-
return error;
}
-int
-xfs_inumbers_fmt(
- void __user *ubuffer, /* buffer to write to */
- const struct xfs_inogrp *buffer, /* buffer to read from */
- long count, /* # of elements to read */
- long *written) /* # of bytes written */
+/* Convert bulkstat (v5) to bstat (v1). */
+void
+xfs_bulkstat_to_bstat(
+ struct xfs_mount *mp,
+ struct xfs_bstat *bs1,
+ const struct xfs_bulkstat *bstat)
{
- if (copy_to_user(ubuffer, buffer, count * sizeof(*buffer)))
- return -EFAULT;
- *written = count * sizeof(*buffer);
- return 0;
+ memset(bs1, 0, sizeof(struct xfs_bstat));
+ bs1->bs_ino = bstat->bs_ino;
+ bs1->bs_mode = bstat->bs_mode;
+ bs1->bs_nlink = bstat->bs_nlink;
+ bs1->bs_uid = bstat->bs_uid;
+ bs1->bs_gid = bstat->bs_gid;
+ bs1->bs_rdev = bstat->bs_rdev;
+ bs1->bs_blksize = bstat->bs_blksize;
+ bs1->bs_size = bstat->bs_size;
+ bs1->bs_atime.tv_sec = bstat->bs_atime;
+ bs1->bs_mtime.tv_sec = bstat->bs_mtime;
+ bs1->bs_ctime.tv_sec = bstat->bs_ctime;
+ bs1->bs_atime.tv_nsec = bstat->bs_atime_nsec;
+ bs1->bs_mtime.tv_nsec = bstat->bs_mtime_nsec;
+ bs1->bs_ctime.tv_nsec = bstat->bs_ctime_nsec;
+ bs1->bs_blocks = bstat->bs_blocks;
+ bs1->bs_xflags = bstat->bs_xflags;
+ bs1->bs_extsize = XFS_FSB_TO_B(mp, bstat->bs_extsize_blks);
+ bs1->bs_extents = bstat->bs_extents;
+ bs1->bs_gen = bstat->bs_gen;
+ bs1->bs_projid_lo = bstat->bs_projectid & 0xFFFF;
+ bs1->bs_forkoff = bstat->bs_forkoff;
+ bs1->bs_projid_hi = bstat->bs_projectid >> 16;
+ bs1->bs_sick = bstat->bs_sick;
+ bs1->bs_checked = bstat->bs_checked;
+ bs1->bs_cowextsize = XFS_FSB_TO_B(mp, bstat->bs_cowextsize_blks);
+ bs1->bs_dmevmask = 0;
+ bs1->bs_dmstate = 0;
+ bs1->bs_aextents = bstat->bs_aextents;
+}
+
+struct xfs_inumbers_chunk {
+ inumbers_fmt_pf formatter;
+ struct xfs_ibulk *breq;
+};
+
+/*
+ * INUMBERS
+ * ========
+ * This is how we export inode btree records to userspace, so that XFS tools
+ * can figure out where inodes are allocated.
+ */
+
+/*
+ * Format the inode group structure and report it somewhere.
+ *
+ * Similar to xfs_bulkstat_one_int, lastino is the inode cursor as we walk
+ * through the filesystem so we move it forward unless there was a runtime
+ * error. If the formatter tells us the buffer is now full we also move the
+ * cursor forward and abort the walk.
+ */
+STATIC int
+xfs_inumbers_walk(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_agnumber_t agno,
+ const struct xfs_inobt_rec_incore *irec,
+ void *data)
+{
+ struct xfs_inumbers inogrp = {
+ .xi_startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino),
+ .xi_alloccount = irec->ir_count - irec->ir_freecount,
+ .xi_allocmask = ~irec->ir_free,
+ .xi_version = XFS_INUMBERS_VERSION_V5,
+ };
+ struct xfs_inumbers_chunk *ic = data;
+ int error;
+
+ error = ic->formatter(ic->breq, &inogrp);
+ if (error && error != XFS_IBULK_ABORT)
+ return error;
+
+ ic->breq->startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino) +
+ XFS_INODES_PER_CHUNK;
+ return error;
}
/*
* Return inode number table for the filesystem.
*/
-int /* error status */
+int
xfs_inumbers(
- struct xfs_mount *mp,/* mount point for filesystem */
- xfs_ino_t *lastino,/* last inode returned */
- int *count,/* size of buffer/count returned */
- void __user *ubuffer,/* buffer with inode descriptions */
+ struct xfs_ibulk *breq,
inumbers_fmt_pf formatter)
{
- xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, *lastino);
- xfs_agino_t agino = XFS_INO_TO_AGINO(mp, *lastino);
- struct xfs_btree_cur *cur = NULL;
- struct xfs_buf *agbp = NULL;
- struct xfs_inogrp *buffer;
- int bcount;
- int left = *count;
- int bufidx = 0;
+ struct xfs_inumbers_chunk ic = {
+ .formatter = formatter,
+ .breq = breq,
+ };
int error = 0;
- *count = 0;
- if (agno >= mp->m_sb.sb_agcount ||
- *lastino != XFS_AGINO_TO_INO(mp, agno, agino))
- return error;
+ if (xfs_bulkstat_already_done(breq->mp, breq->startino))
+ return 0;
- bcount = min(left, (int)(PAGE_SIZE / sizeof(*buffer)));
- buffer = kmem_zalloc(bcount * sizeof(*buffer), KM_SLEEP);
- do {
- struct xfs_inobt_rec_incore r;
- int stat;
-
- if (!agbp) {
- error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
- if (error)
- break;
-
- cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
- XFS_BTNUM_INO);
- error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE,
- &stat);
- if (error)
- break;
- if (!stat)
- goto next_ag;
- }
-
- error = xfs_inobt_get_rec(cur, &r, &stat);
- if (error)
- break;
- if (!stat)
- goto next_ag;
-
- agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1;
- buffer[bufidx].xi_startino =
- XFS_AGINO_TO_INO(mp, agno, r.ir_startino);
- buffer[bufidx].xi_alloccount = r.ir_count - r.ir_freecount;
- buffer[bufidx].xi_allocmask = ~r.ir_free;
- if (++bufidx == bcount) {
- long written;
-
- error = formatter(ubuffer, buffer, bufidx, &written);
- if (error)
- break;
- ubuffer += written;
- *count += bufidx;
- bufidx = 0;
- }
- if (!--left)
- break;
-
- error = xfs_btree_increment(cur, 0, &stat);
- if (error)
- break;
- if (stat)
- continue;
-
-next_ag:
- xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
- cur = NULL;
- xfs_buf_relse(agbp);
- agbp = NULL;
- agino = 0;
- agno++;
- } while (agno < mp->m_sb.sb_agcount);
-
- if (!error) {
- if (bufidx) {
- long written;
-
- error = formatter(ubuffer, buffer, bufidx, &written);
- if (!error)
- *count += bufidx;
- }
- *lastino = XFS_AGINO_TO_INO(mp, agno, agino);
- }
+ error = xfs_inobt_walk(breq->mp, NULL, breq->startino, breq->flags,
+ xfs_inumbers_walk, breq->icount, &ic);
- kmem_free(buffer);
- if (cur)
- xfs_btree_del_cursor(cur, error);
- if (agbp)
- xfs_buf_relse(agbp);
+ /*
+ * We found some inode groups, so clear the error status and return
+ * them. The lastino pointer will point directly at the inode that
+ * triggered any error that occurred, so on the next call the error
+ * will be triggered again and propagated to userspace as there will be
+ * no formatted inode groups in the buffer.
+ */
+ if (breq->ocount > 0)
+ error = 0;
return error;
}
+
+/* Convert an inumbers (v5) struct to a inogrp (v1) struct. */
+void
+xfs_inumbers_to_inogrp(
+ struct xfs_inogrp *ig1,
+ const struct xfs_inumbers *ig)
+{
+ ig1->xi_startino = ig->xi_startino;
+ ig1->xi_alloccount = ig->xi_alloccount;
+ ig1->xi_allocmask = ig->xi_allocmask;
+}
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 8a822285b671..e90c1fc5b981 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -5,83 +5,55 @@
#ifndef __XFS_ITABLE_H__
#define __XFS_ITABLE_H__
-/*
- * xfs_bulkstat() is used to fill in xfs_bstat structures as well as dm_stat
- * structures (by the dmi library). This is a pointer to a formatter function
- * that will iget the inode and fill in the appropriate structure.
- * see xfs_bulkstat_one() and xfs_dm_bulkstat_one() in dmapi_xfs.c
- */
-typedef int (*bulkstat_one_pf)(struct xfs_mount *mp,
- xfs_ino_t ino,
- void __user *buffer,
- int ubsize,
- int *ubused,
- int *stat);
+/* In-memory representation of a userspace request for batch inode data. */
+struct xfs_ibulk {
+ struct xfs_mount *mp;
+ void __user *ubuffer; /* user output buffer */
+ xfs_ino_t startino; /* start with this inode */
+ unsigned int icount; /* number of elements in ubuffer */
+ unsigned int ocount; /* number of records returned */
+ unsigned int flags; /* see XFS_IBULK_FLAG_* */
+};
+
+/* Only iterate within the same AG as startino */
+#define XFS_IBULK_SAME_AG (XFS_IWALK_SAME_AG)
+
+/* Return value that means we want to abort the walk. */
+#define XFS_IBULK_ABORT (XFS_IWALK_ABORT)
/*
- * Values for stat return value.
+ * Advance the user buffer pointer by one record of the given size. If the
+ * buffer is now full, return the appropriate error code.
*/
-#define BULKSTAT_RV_NOTHING 0
-#define BULKSTAT_RV_DIDONE 1
-#define BULKSTAT_RV_GIVEUP 2
+static inline int
+xfs_ibulk_advance(
+ struct xfs_ibulk *breq,
+ size_t bytes)
+{
+ char __user *b = breq->ubuffer;
+
+ breq->ubuffer = b + bytes;
+ breq->ocount++;
+ return breq->ocount == breq->icount ? XFS_IBULK_ABORT : 0;
+}
/*
* Return stat information in bulk (by-inode) for the filesystem.
*/
-int /* error status */
-xfs_bulkstat(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t *lastino, /* last inode returned */
- int *count, /* size of buffer/count returned */
- bulkstat_one_pf formatter, /* func that'd fill a single buf */
- size_t statstruct_size,/* sizeof struct that we're filling */
- char __user *ubuffer,/* buffer with inode stats */
- int *done); /* 1 if there are more stats to get */
-
-typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */
- void __user *ubuffer, /* buffer to write to */
- int ubsize, /* remaining user buffer sz */
- int *ubused, /* bytes used by formatter */
- const xfs_bstat_t *buffer); /* buffer to read from */
-
-int
-xfs_bulkstat_one_int(
- xfs_mount_t *mp,
- xfs_ino_t ino,
- void __user *buffer,
- int ubsize,
- bulkstat_one_fmt_pf formatter,
- int *ubused,
- int *stat);
-int
-xfs_bulkstat_one(
- xfs_mount_t *mp,
- xfs_ino_t ino,
- void __user *buffer,
- int ubsize,
- int *ubused,
- int *stat);
+typedef int (*bulkstat_one_fmt_pf)(struct xfs_ibulk *breq,
+ const struct xfs_bulkstat *bstat);
-typedef int (*inumbers_fmt_pf)(
- void __user *ubuffer, /* buffer to write to */
- const xfs_inogrp_t *buffer, /* buffer to read from */
- long count, /* # of elements to read */
- long *written); /* # of bytes written */
+int xfs_bulkstat_one(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter);
+int xfs_bulkstat(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter);
+void xfs_bulkstat_to_bstat(struct xfs_mount *mp, struct xfs_bstat *bs1,
+ const struct xfs_bulkstat *bstat);
-int
-xfs_inumbers_fmt(
- void __user *ubuffer, /* buffer to write to */
- const xfs_inogrp_t *buffer, /* buffer to read from */
- long count, /* # of elements to read */
- long *written); /* # of bytes written */
+typedef int (*inumbers_fmt_pf)(struct xfs_ibulk *breq,
+ const struct xfs_inumbers *igrp);
-int /* error status */
-xfs_inumbers(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t *last, /* last inode returned */
- int *count, /* size of buffer/count returned */
- void __user *buffer, /* buffer with inode info */
- inumbers_fmt_pf formatter);
+int xfs_inumbers(struct xfs_ibulk *breq, inumbers_fmt_pf formatter);
+void xfs_inumbers_to_inogrp(struct xfs_inogrp *ig1,
+ const struct xfs_inumbers *ig);
#endif /* __XFS_ITABLE_H__ */
diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c
new file mode 100644
index 000000000000..8c7d727149ea
--- /dev/null
+++ b/fs/xfs/xfs_iwalk.c
@@ -0,0 +1,720 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_iwalk.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_icache.h"
+#include "xfs_health.h"
+#include "xfs_trans.h"
+#include "xfs_pwork.h"
+
+/*
+ * Walking Inodes in the Filesystem
+ * ================================
+ *
+ * This iterator function walks a subset of filesystem inodes in increasing
+ * order from @startino until there are no more inodes. For each allocated
+ * inode it finds, it calls a walk function with the relevant inode number and
+ * a pointer to caller-provided data. The walk function can return the usual
+ * negative error code to stop the iteration; 0 to continue the iteration; or
+ * XFS_IWALK_ABORT to stop the iteration. This return value is returned to the
+ * caller.
+ *
+ * Internally, we allow the walk function to do anything, which means that we
+ * cannot maintain the inobt cursor or our lock on the AGI buffer. We
+ * therefore cache the inobt records in kernel memory and only call the walk
+ * function when our memory buffer is full. @nr_recs is the number of records
+ * that we've cached, and @sz_recs is the size of our cache.
+ *
+ * It is the responsibility of the walk function to ensure it accesses
+ * allocated inodes, as the inobt records may be stale by the time they are
+ * acted upon.
+ */
+
+struct xfs_iwalk_ag {
+ /* parallel work control data; will be null if single threaded */
+ struct xfs_pwork pwork;
+
+ struct xfs_mount *mp;
+ struct xfs_trans *tp;
+
+ /* Where do we start the traversal? */
+ xfs_ino_t startino;
+
+ /* Array of inobt records we cache. */
+ struct xfs_inobt_rec_incore *recs;
+
+ /* Number of entries allocated for the @recs array. */
+ unsigned int sz_recs;
+
+ /* Number of entries in the @recs array that are in use. */
+ unsigned int nr_recs;
+
+ /* Inode walk function and data pointer. */
+ xfs_iwalk_fn iwalk_fn;
+ xfs_inobt_walk_fn inobt_walk_fn;
+ void *data;
+
+ /*
+ * Make it look like the inodes up to startino are free so that
+ * bulkstat can start its inode iteration at the correct place without
+ * needing to special case everywhere.
+ */
+ unsigned int trim_start:1;
+
+ /* Skip empty inobt records? */
+ unsigned int skip_empty:1;
+};
+
+/*
+ * Loop over all clusters in a chunk for a given incore inode allocation btree
+ * record. Do a readahead if there are any allocated inodes in that cluster.
+ */
+STATIC void
+xfs_iwalk_ichunk_ra(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ struct xfs_inobt_rec_incore *irec)
+{
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
+ xfs_agblock_t agbno;
+ struct blk_plug plug;
+ int i; /* inode chunk index */
+
+ agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino);
+
+ blk_start_plug(&plug);
+ for (i = 0; i < XFS_INODES_PER_CHUNK; i += igeo->inodes_per_cluster) {
+ xfs_inofree_t imask;
+
+ imask = xfs_inobt_maskn(i, igeo->inodes_per_cluster);
+ if (imask & ~irec->ir_free) {
+ xfs_btree_reada_bufs(mp, agno, agbno,
+ igeo->blocks_per_cluster,
+ &xfs_inode_buf_ops);
+ }
+ agbno += igeo->blocks_per_cluster;
+ }
+ blk_finish_plug(&plug);
+}
+
+/*
+ * Set the bits in @irec's free mask that correspond to the inodes before
+ * @agino so that we skip them. This is how we restart an inode walk that was
+ * interrupted in the middle of an inode record.
+ */
+STATIC void
+xfs_iwalk_adjust_start(
+ xfs_agino_t agino, /* starting inode of chunk */
+ struct xfs_inobt_rec_incore *irec) /* btree record */
+{
+ int idx; /* index into inode chunk */
+ int i;
+
+ idx = agino - irec->ir_startino;
+
+ /*
+ * We got a right chunk with some left inodes allocated at it. Grab
+ * the chunk record. Mark all the uninteresting inodes free because
+ * they're before our start point.
+ */
+ for (i = 0; i < idx; i++) {
+ if (XFS_INOBT_MASK(i) & ~irec->ir_free)
+ irec->ir_freecount++;
+ }
+
+ irec->ir_free |= xfs_inobt_maskn(0, idx);
+}
+
+/* Allocate memory for a walk. */
+STATIC int
+xfs_iwalk_alloc(
+ struct xfs_iwalk_ag *iwag)
+{
+ size_t size;
+
+ ASSERT(iwag->recs == NULL);
+ iwag->nr_recs = 0;
+
+ /* Allocate a prefetch buffer for inobt records. */
+ size = iwag->sz_recs * sizeof(struct xfs_inobt_rec_incore);
+ iwag->recs = kmem_alloc(size, KM_MAYFAIL);
+ if (iwag->recs == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/* Free memory we allocated for a walk. */
+STATIC void
+xfs_iwalk_free(
+ struct xfs_iwalk_ag *iwag)
+{
+ kmem_free(iwag->recs);
+ iwag->recs = NULL;
+}
+
+/* For each inuse inode in each cached inobt record, call our function. */
+STATIC int
+xfs_iwalk_ag_recs(
+ struct xfs_iwalk_ag *iwag)
+{
+ struct xfs_mount *mp = iwag->mp;
+ struct xfs_trans *tp = iwag->tp;
+ xfs_ino_t ino;
+ unsigned int i, j;
+ xfs_agnumber_t agno;
+ int error;
+
+ agno = XFS_INO_TO_AGNO(mp, iwag->startino);
+ for (i = 0; i < iwag->nr_recs; i++) {
+ struct xfs_inobt_rec_incore *irec = &iwag->recs[i];
+
+ trace_xfs_iwalk_ag_rec(mp, agno, irec);
+
+ if (xfs_pwork_want_abort(&iwag->pwork))
+ return 0;
+
+ if (iwag->inobt_walk_fn) {
+ error = iwag->inobt_walk_fn(mp, tp, agno, irec,
+ iwag->data);
+ if (error)
+ return error;
+ }
+
+ if (!iwag->iwalk_fn)
+ continue;
+
+ for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
+ if (xfs_pwork_want_abort(&iwag->pwork))
+ return 0;
+
+ /* Skip if this inode is free */
+ if (XFS_INOBT_MASK(j) & irec->ir_free)
+ continue;
+
+ /* Otherwise call our function. */
+ ino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino + j);
+ error = iwag->iwalk_fn(mp, tp, ino, iwag->data);
+ if (error)
+ return error;
+ }
+ }
+
+ return 0;
+}
+
+/* Delete cursor and let go of AGI. */
+static inline void
+xfs_iwalk_del_inobt(
+ struct xfs_trans *tp,
+ struct xfs_btree_cur **curpp,
+ struct xfs_buf **agi_bpp,
+ int error)
+{
+ if (*curpp) {
+ xfs_btree_del_cursor(*curpp, error);
+ *curpp = NULL;
+ }
+ if (*agi_bpp) {
+ xfs_trans_brelse(tp, *agi_bpp);
+ *agi_bpp = NULL;
+ }
+}
+
+/*
+ * Set ourselves up for walking inobt records starting from a given point in
+ * the filesystem.
+ *
+ * If caller passed in a nonzero start inode number, load the record from the
+ * inobt and make the record look like all the inodes before agino are free so
+ * that we skip them, and then move the cursor to the next inobt record. This
+ * is how we support starting an iwalk in the middle of an inode chunk.
+ *
+ * If the caller passed in a start number of zero, move the cursor to the first
+ * inobt record.
+ *
+ * The caller is responsible for cleaning up the cursor and buffer pointer
+ * regardless of the error status.
+ */
+STATIC int
+xfs_iwalk_ag_start(
+ struct xfs_iwalk_ag *iwag,
+ xfs_agnumber_t agno,
+ xfs_agino_t agino,
+ struct xfs_btree_cur **curpp,
+ struct xfs_buf **agi_bpp,
+ int *has_more)
+{
+ struct xfs_mount *mp = iwag->mp;
+ struct xfs_trans *tp = iwag->tp;
+ struct xfs_inobt_rec_incore *irec;
+ int error;
+
+ /* Set up a fresh cursor and empty the inobt cache. */
+ iwag->nr_recs = 0;
+ error = xfs_inobt_cur(mp, tp, agno, XFS_BTNUM_INO, curpp, agi_bpp);
+ if (error)
+ return error;
+
+ /* Starting at the beginning of the AG? That's easy! */
+ if (agino == 0)
+ return xfs_inobt_lookup(*curpp, 0, XFS_LOOKUP_GE, has_more);
+
+ /*
+ * Otherwise, we have to grab the inobt record where we left off, stuff
+ * the record into our cache, and then see if there are more records.
+ * We require a lookup cache of at least two elements so that the
+ * caller doesn't have to deal with tearing down the cursor to walk the
+ * records.
+ */
+ error = xfs_inobt_lookup(*curpp, agino, XFS_LOOKUP_LE, has_more);
+ if (error)
+ return error;
+
+ /*
+ * If the LE lookup at @agino yields no records, jump ahead to the
+ * inobt cursor increment to see if there are more records to process.
+ */
+ if (!*has_more)
+ goto out_advance;
+
+ /* Get the record, should always work */
+ irec = &iwag->recs[iwag->nr_recs];
+ error = xfs_inobt_get_rec(*curpp, irec, has_more);
+ if (error)
+ return error;
+ XFS_WANT_CORRUPTED_RETURN(mp, *has_more == 1);
+
+ /*
+ * If the LE lookup yielded an inobt record before the cursor position,
+ * skip it and see if there's another one after it.
+ */
+ if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino)
+ goto out_advance;
+
+ /*
+ * If agino fell in the middle of the inode record, make it look like
+ * the inodes up to agino are free so that we don't return them again.
+ */
+ if (iwag->trim_start)
+ xfs_iwalk_adjust_start(agino, irec);
+
+ /*
+ * The prefetch calculation is supposed to give us a large enough inobt
+ * record cache that grab_ichunk can stage a partial first record and
+ * the loop body can cache a record without having to check for cache
+ * space until after it reads an inobt record.
+ */
+ iwag->nr_recs++;
+ ASSERT(iwag->nr_recs < iwag->sz_recs);
+
+out_advance:
+ return xfs_btree_increment(*curpp, 0, has_more);
+}
+
+/*
+ * The inobt record cache is full, so preserve the inobt cursor state and
+ * run callbacks on the cached inobt records. When we're done, restore the
+ * cursor state to wherever the cursor would have been had the cache not been
+ * full (and therefore we could've just incremented the cursor) if *@has_more
+ * is true. On exit, *@has_more will indicate whether or not the caller should
+ * try for more inode records.
+ */
+STATIC int
+xfs_iwalk_run_callbacks(
+ struct xfs_iwalk_ag *iwag,
+ xfs_agnumber_t agno,
+ struct xfs_btree_cur **curpp,
+ struct xfs_buf **agi_bpp,
+ int *has_more)
+{
+ struct xfs_mount *mp = iwag->mp;
+ struct xfs_trans *tp = iwag->tp;
+ struct xfs_inobt_rec_incore *irec;
+ xfs_agino_t restart;
+ int error;
+
+ ASSERT(iwag->nr_recs > 0);
+
+ /* Delete cursor but remember the last record we cached... */
+ xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0);
+ irec = &iwag->recs[iwag->nr_recs - 1];
+ restart = irec->ir_startino + XFS_INODES_PER_CHUNK - 1;
+
+ error = xfs_iwalk_ag_recs(iwag);
+ if (error)
+ return error;
+
+ /* ...empty the cache... */
+ iwag->nr_recs = 0;
+
+ if (!has_more)
+ return 0;
+
+ /* ...and recreate the cursor just past where we left off. */
+ error = xfs_inobt_cur(mp, tp, agno, XFS_BTNUM_INO, curpp, agi_bpp);
+ if (error)
+ return error;
+
+ return xfs_inobt_lookup(*curpp, restart, XFS_LOOKUP_GE, has_more);
+}
+
+/* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */
+STATIC int
+xfs_iwalk_ag(
+ struct xfs_iwalk_ag *iwag)
+{
+ struct xfs_mount *mp = iwag->mp;
+ struct xfs_trans *tp = iwag->tp;
+ struct xfs_buf *agi_bp = NULL;
+ struct xfs_btree_cur *cur = NULL;
+ xfs_agnumber_t agno;
+ xfs_agino_t agino;
+ int has_more;
+ int error = 0;
+
+ /* Set up our cursor at the right place in the inode btree. */
+ agno = XFS_INO_TO_AGNO(mp, iwag->startino);
+ agino = XFS_INO_TO_AGINO(mp, iwag->startino);
+ error = xfs_iwalk_ag_start(iwag, agno, agino, &cur, &agi_bp, &has_more);
+
+ while (!error && has_more) {
+ struct xfs_inobt_rec_incore *irec;
+
+ cond_resched();
+ if (xfs_pwork_want_abort(&iwag->pwork))
+ goto out;
+
+ /* Fetch the inobt record. */
+ irec = &iwag->recs[iwag->nr_recs];
+ error = xfs_inobt_get_rec(cur, irec, &has_more);
+ if (error || !has_more)
+ break;
+
+ /* No allocated inodes in this chunk; skip it. */
+ if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) {
+ error = xfs_btree_increment(cur, 0, &has_more);
+ if (error)
+ break;
+ continue;
+ }
+
+ /*
+ * Start readahead for this inode chunk in anticipation of
+ * walking the inodes.
+ */
+ if (iwag->iwalk_fn)
+ xfs_iwalk_ichunk_ra(mp, agno, irec);
+
+ /*
+ * If there's space in the buffer for more records, increment
+ * the btree cursor and grab more.
+ */
+ if (++iwag->nr_recs < iwag->sz_recs) {
+ error = xfs_btree_increment(cur, 0, &has_more);
+ if (error || !has_more)
+ break;
+ continue;
+ }
+
+ /*
+ * Otherwise, we need to save cursor state and run the callback
+ * function on the cached records. The run_callbacks function
+ * is supposed to return a cursor pointing to the record where
+ * we would be if we had been able to increment like above.
+ */
+ ASSERT(has_more);
+ error = xfs_iwalk_run_callbacks(iwag, agno, &cur, &agi_bp,
+ &has_more);
+ }
+
+ if (iwag->nr_recs == 0 || error)
+ goto out;
+
+ /* Walk the unprocessed records in the cache. */
+ error = xfs_iwalk_run_callbacks(iwag, agno, &cur, &agi_bp, &has_more);
+
+out:
+ xfs_iwalk_del_inobt(tp, &cur, &agi_bp, error);
+ return error;
+}
+
+/*
+ * We experimentally determined that the reduction in ioctl call overhead
+ * diminishes when userspace asks for more than 2048 inodes, so we'll cap
+ * prefetch at this point.
+ */
+#define IWALK_MAX_INODE_PREFETCH (2048U)
+
+/*
+ * Given the number of inodes to prefetch, set the number of inobt records that
+ * we cache in memory, which controls the number of inodes we try to read
+ * ahead. Set the maximum if @inodes == 0.
+ */
+static inline unsigned int
+xfs_iwalk_prefetch(
+ unsigned int inodes)
+{
+ unsigned int inobt_records;
+
+ /*
+ * If the caller didn't tell us the number of inodes they wanted,
+ * assume the maximum prefetch possible for best performance.
+ * Otherwise, cap prefetch at that maximum so that we don't start an
+ * absurd amount of prefetch.
+ */
+ if (inodes == 0)
+ inodes = IWALK_MAX_INODE_PREFETCH;
+ inodes = min(inodes, IWALK_MAX_INODE_PREFETCH);
+
+ /* Round the inode count up to a full chunk. */
+ inodes = round_up(inodes, XFS_INODES_PER_CHUNK);
+
+ /*
+ * In order to convert the number of inodes to prefetch into an
+ * estimate of the number of inobt records to cache, we require a
+ * conversion factor that reflects our expectations of the average
+ * loading factor of an inode chunk. Based on data gathered, most
+ * (but not all) filesystems manage to keep the inode chunks totally
+ * full, so we'll underestimate slightly so that our readahead will
+ * still deliver the performance we want on aging filesystems:
+ *
+ * inobt = inodes / (INODES_PER_CHUNK * (4 / 5));
+ *
+ * The funny math is to avoid integer division.
+ */
+ inobt_records = (inodes * 5) / (4 * XFS_INODES_PER_CHUNK);
+
+ /*
+ * Allocate enough space to prefetch at least two inobt records so that
+ * we can cache both the record where the iwalk started and the next
+ * record. This simplifies the AG inode walk loop setup code.
+ */
+ return max(inobt_records, 2U);
+}
+
+/*
+ * Walk all inodes in the filesystem starting from @startino. The @iwalk_fn
+ * will be called for each allocated inode, being passed the inode's number and
+ * @data. @max_prefetch controls how many inobt records' worth of inodes we
+ * try to readahead.
+ */
+int
+xfs_iwalk(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_ino_t startino,
+ unsigned int flags,
+ xfs_iwalk_fn iwalk_fn,
+ unsigned int inode_records,
+ void *data)
+{
+ struct xfs_iwalk_ag iwag = {
+ .mp = mp,
+ .tp = tp,
+ .iwalk_fn = iwalk_fn,
+ .data = data,
+ .startino = startino,
+ .sz_recs = xfs_iwalk_prefetch(inode_records),
+ .trim_start = 1,
+ .skip_empty = 1,
+ .pwork = XFS_PWORK_SINGLE_THREADED,
+ };
+ xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
+ int error;
+
+ ASSERT(agno < mp->m_sb.sb_agcount);
+ ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL));
+
+ error = xfs_iwalk_alloc(&iwag);
+ if (error)
+ return error;
+
+ for (; agno < mp->m_sb.sb_agcount; agno++) {
+ error = xfs_iwalk_ag(&iwag);
+ if (error)
+ break;
+ iwag.startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
+ if (flags & XFS_INOBT_WALK_SAME_AG)
+ break;
+ }
+
+ xfs_iwalk_free(&iwag);
+ return error;
+}
+
+/* Run per-thread iwalk work. */
+static int
+xfs_iwalk_ag_work(
+ struct xfs_mount *mp,
+ struct xfs_pwork *pwork)
+{
+ struct xfs_iwalk_ag *iwag;
+ int error = 0;
+
+ iwag = container_of(pwork, struct xfs_iwalk_ag, pwork);
+ if (xfs_pwork_want_abort(pwork))
+ goto out;
+
+ error = xfs_iwalk_alloc(iwag);
+ if (error)
+ goto out;
+
+ error = xfs_iwalk_ag(iwag);
+ xfs_iwalk_free(iwag);
+out:
+ kmem_free(iwag);
+ return error;
+}
+
+/*
+ * Walk all the inodes in the filesystem using multiple threads to process each
+ * AG.
+ */
+int
+xfs_iwalk_threaded(
+ struct xfs_mount *mp,
+ xfs_ino_t startino,
+ unsigned int flags,
+ xfs_iwalk_fn iwalk_fn,
+ unsigned int inode_records,
+ bool polled,
+ void *data)
+{
+ struct xfs_pwork_ctl pctl;
+ xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
+ unsigned int nr_threads;
+ int error;
+
+ ASSERT(agno < mp->m_sb.sb_agcount);
+ ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL));
+
+ nr_threads = xfs_pwork_guess_datadev_parallelism(mp);
+ error = xfs_pwork_init(mp, &pctl, xfs_iwalk_ag_work, "xfs_iwalk",
+ nr_threads);
+ if (error)
+ return error;
+
+ for (; agno < mp->m_sb.sb_agcount; agno++) {
+ struct xfs_iwalk_ag *iwag;
+
+ if (xfs_pwork_ctl_want_abort(&pctl))
+ break;
+
+ iwag = kmem_zalloc(sizeof(struct xfs_iwalk_ag), KM_SLEEP);
+ iwag->mp = mp;
+ iwag->iwalk_fn = iwalk_fn;
+ iwag->data = data;
+ iwag->startino = startino;
+ iwag->sz_recs = xfs_iwalk_prefetch(inode_records);
+ xfs_pwork_queue(&pctl, &iwag->pwork);
+ startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
+ if (flags & XFS_INOBT_WALK_SAME_AG)
+ break;
+ }
+
+ if (polled)
+ xfs_pwork_poll(&pctl);
+ return xfs_pwork_destroy(&pctl);
+}
+
+/*
+ * Allow callers to cache up to a page's worth of inobt records. This reflects
+ * the existing inumbers prefetching behavior. Since the inobt walk does not
+ * itself do anything with the inobt records, we can set a fairly high limit
+ * here.
+ */
+#define MAX_INOBT_WALK_PREFETCH \
+ (PAGE_SIZE / sizeof(struct xfs_inobt_rec_incore))
+
+/*
+ * Given the number of records that the user wanted, set the number of inobt
+ * records that we buffer in memory. Set the maximum if @inobt_records == 0.
+ */
+static inline unsigned int
+xfs_inobt_walk_prefetch(
+ unsigned int inobt_records)
+{
+ /*
+ * If the caller didn't tell us the number of inobt records they
+ * wanted, assume the maximum prefetch possible for best performance.
+ */
+ if (inobt_records == 0)
+ inobt_records = MAX_INOBT_WALK_PREFETCH;
+
+ /*
+ * Allocate enough space to prefetch at least two inobt records so that
+ * we can cache both the record where the iwalk started and the next
+ * record. This simplifies the AG inode walk loop setup code.
+ */
+ inobt_records = max(inobt_records, 2U);
+
+ /*
+ * Cap prefetch at that maximum so that we don't use an absurd amount
+ * of memory.
+ */
+ return min_t(unsigned int, inobt_records, MAX_INOBT_WALK_PREFETCH);
+}
+
+/*
+ * Walk all inode btree records in the filesystem starting from @startino. The
+ * @inobt_walk_fn will be called for each btree record, being passed the incore
+ * record and @data. @max_prefetch controls how many inobt records we try to
+ * cache ahead of time.
+ */
+int
+xfs_inobt_walk(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_ino_t startino,
+ unsigned int flags,
+ xfs_inobt_walk_fn inobt_walk_fn,
+ unsigned int inobt_records,
+ void *data)
+{
+ struct xfs_iwalk_ag iwag = {
+ .mp = mp,
+ .tp = tp,
+ .inobt_walk_fn = inobt_walk_fn,
+ .data = data,
+ .startino = startino,
+ .sz_recs = xfs_inobt_walk_prefetch(inobt_records),
+ .pwork = XFS_PWORK_SINGLE_THREADED,
+ };
+ xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
+ int error;
+
+ ASSERT(agno < mp->m_sb.sb_agcount);
+ ASSERT(!(flags & ~XFS_INOBT_WALK_FLAGS_ALL));
+
+ error = xfs_iwalk_alloc(&iwag);
+ if (error)
+ return error;
+
+ for (; agno < mp->m_sb.sb_agcount; agno++) {
+ error = xfs_iwalk_ag(&iwag);
+ if (error)
+ break;
+ iwag.startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
+ if (flags & XFS_INOBT_WALK_SAME_AG)
+ break;
+ }
+
+ xfs_iwalk_free(&iwag);
+ return error;
+}
diff --git a/fs/xfs/xfs_iwalk.h b/fs/xfs/xfs_iwalk.h
new file mode 100644
index 000000000000..6c960e10ed4d
--- /dev/null
+++ b/fs/xfs/xfs_iwalk.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2019 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_IWALK_H__
+#define __XFS_IWALK_H__
+
+/* Walk all inodes in the filesystem starting from @startino. */
+typedef int (*xfs_iwalk_fn)(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_ino_t ino, void *data);
+/* Return values for xfs_iwalk_fn. */
+#define XFS_IWALK_CONTINUE (XFS_ITER_CONTINUE)
+#define XFS_IWALK_ABORT (XFS_ITER_ABORT)
+
+int xfs_iwalk(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t startino,
+ unsigned int flags, xfs_iwalk_fn iwalk_fn,
+ unsigned int inode_records, void *data);
+int xfs_iwalk_threaded(struct xfs_mount *mp, xfs_ino_t startino,
+ unsigned int flags, xfs_iwalk_fn iwalk_fn,
+ unsigned int inode_records, bool poll, void *data);
+
+/* Only iterate inodes within the same AG as @startino. */
+#define XFS_IWALK_SAME_AG (0x1)
+
+#define XFS_IWALK_FLAGS_ALL (XFS_IWALK_SAME_AG)
+
+/* Walk all inode btree records in the filesystem starting from @startino. */
+typedef int (*xfs_inobt_walk_fn)(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_agnumber_t agno,
+ const struct xfs_inobt_rec_incore *irec,
+ void *data);
+/* Return value (for xfs_inobt_walk_fn) that aborts the walk immediately. */
+#define XFS_INOBT_WALK_ABORT (XFS_IWALK_ABORT)
+
+int xfs_inobt_walk(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_ino_t startino, unsigned int flags,
+ xfs_inobt_walk_fn inobt_walk_fn, unsigned int inobt_records,
+ void *data);
+
+/* Only iterate inobt records within the same AG as @startino. */
+#define XFS_INOBT_WALK_SAME_AG (XFS_IWALK_SAME_AG)
+
+#define XFS_INOBT_WALK_FLAGS_ALL (XFS_INOBT_WALK_SAME_AG)
+
+#endif /* __XFS_IWALK_H__ */
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index edbd5a210df2..ca15105681ca 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -110,8 +110,6 @@ typedef __u32 xfs_nlink_t;
#define current_restore_flags_nested(sp, f) \
(current->flags = ((current->flags & ~(f)) | (*(sp) & (f))))
-#define spinlock_destroy(lock)
-
#define NBBY 8 /* number of bits per byte */
/*
@@ -221,6 +219,9 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y)
return x;
}
+int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count,
+ char *data, unsigned int op);
+
#define ASSERT_ALWAYS(expr) \
(likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 2466b0f5b6c4..00e9f5c388d3 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -16,11 +16,7 @@
#include "xfs_trans_priv.h"
#include "xfs_log.h"
#include "xfs_log_priv.h"
-#include "xfs_log_recover.h"
-#include "xfs_inode.h"
#include "xfs_trace.h"
-#include "xfs_fsops.h"
-#include "xfs_cksum.h"
#include "xfs_sysfs.h"
#include "xfs_sb.h"
#include "xfs_health.h"
@@ -45,21 +41,14 @@ STATIC int
xlog_space_left(
struct xlog *log,
atomic64_t *head);
-STATIC int
-xlog_sync(
- struct xlog *log,
- struct xlog_in_core *iclog);
STATIC void
xlog_dealloc_log(
struct xlog *log);
/* local state machine functions */
-STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int);
-STATIC void
-xlog_state_do_callback(
- struct xlog *log,
- int aborted,
- struct xlog_in_core *iclog);
+STATIC void xlog_state_done_syncing(
+ struct xlog_in_core *iclog,
+ bool aborted);
STATIC int
xlog_state_get_iclog_space(
struct xlog *log,
@@ -107,8 +96,7 @@ STATIC void
xlog_verify_iclog(
struct xlog *log,
struct xlog_in_core *iclog,
- int count,
- bool syncing);
+ int count);
STATIC void
xlog_verify_tail_lsn(
struct xlog *log,
@@ -117,7 +105,7 @@ xlog_verify_tail_lsn(
#else
#define xlog_verify_dest_ptr(a,b)
#define xlog_verify_grant_tail(a)
-#define xlog_verify_iclog(a,b,c,d)
+#define xlog_verify_iclog(a,b,c)
#define xlog_verify_tail_lsn(a,b,c)
#endif
@@ -541,32 +529,6 @@ xfs_log_done(
return lsn;
}
-/*
- * Attaches a new iclog I/O completion callback routine during
- * transaction commit. If the log is in error state, a non-zero
- * return code is handed back and the caller is responsible for
- * executing the callback at an appropriate time.
- */
-int
-xfs_log_notify(
- struct xlog_in_core *iclog,
- xfs_log_callback_t *cb)
-{
- int abortflg;
-
- spin_lock(&iclog->ic_callback_lock);
- abortflg = (iclog->ic_state & XLOG_STATE_IOERROR);
- if (!abortflg) {
- ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) ||
- (iclog->ic_state == XLOG_STATE_WANT_SYNC));
- cb->cb_next = NULL;
- *(iclog->ic_callback_tail) = cb;
- iclog->ic_callback_tail = &(cb->cb_next);
- }
- spin_unlock(&iclog->ic_callback_lock);
- return abortflg;
-}
-
int
xfs_log_release_iclog(
struct xfs_mount *mp,
@@ -807,16 +769,12 @@ xfs_log_mount_finish(
* The mount has failed. Cancel the recovery if it hasn't completed and destroy
* the log.
*/
-int
+void
xfs_log_mount_cancel(
struct xfs_mount *mp)
{
- int error;
-
- error = xlog_recover_cancel(mp->m_log);
+ xlog_recover_cancel(mp->m_log);
xfs_log_unmount(mp);
-
- return error;
}
/*
@@ -932,7 +890,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
* Or, if we are doing a forced umount (typically because of IO errors).
*/
if (mp->m_flags & XFS_MOUNT_NORECOVERY ||
- xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) {
+ xfs_readonly_buftarg(log->l_targ)) {
ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
return 0;
}
@@ -1244,53 +1202,49 @@ xlog_space_left(
}
-/*
- * Log function which is called when an io completes.
- *
- * The log manager needs its own routine, in order to control what
- * happens with the buffer after the write completes.
- */
static void
-xlog_iodone(xfs_buf_t *bp)
+xlog_ioend_work(
+ struct work_struct *work)
{
- struct xlog_in_core *iclog = bp->b_log_item;
- struct xlog *l = iclog->ic_log;
- int aborted = 0;
+ struct xlog_in_core *iclog =
+ container_of(work, struct xlog_in_core, ic_end_io_work);
+ struct xlog *log = iclog->ic_log;
+ bool aborted = false;
+ int error;
+
+ error = blk_status_to_errno(iclog->ic_bio.bi_status);
+#ifdef DEBUG
+ /* treat writes with injected CRC errors as failed */
+ if (iclog->ic_fail_crc)
+ error = -EIO;
+#endif
/*
- * Race to shutdown the filesystem if we see an error or the iclog is in
- * IOABORT state. The IOABORT state is only set in DEBUG mode to inject
- * CRC errors into log recovery.
+ * Race to shutdown the filesystem if we see an error.
*/
- if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR) ||
- iclog->ic_state & XLOG_STATE_IOABORT) {
- if (iclog->ic_state & XLOG_STATE_IOABORT)
- iclog->ic_state &= ~XLOG_STATE_IOABORT;
-
- xfs_buf_ioerror_alert(bp, __func__);
- xfs_buf_stale(bp);
- xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR);
+ if (XFS_TEST_ERROR(error, log->l_mp, XFS_ERRTAG_IODONE_IOERR)) {
+ xfs_alert(log->l_mp, "log I/O error %d", error);
+ xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
/*
* This flag will be propagated to the trans-committed
* callback routines to let them know that the log-commit
* didn't succeed.
*/
- aborted = XFS_LI_ABORTED;
+ aborted = true;
} else if (iclog->ic_state & XLOG_STATE_IOERROR) {
- aborted = XFS_LI_ABORTED;
+ aborted = true;
}
- /* log I/O is always issued ASYNC */
- ASSERT(bp->b_flags & XBF_ASYNC);
xlog_state_done_syncing(iclog, aborted);
+ bio_uninit(&iclog->ic_bio);
/*
- * drop the buffer lock now that we are done. Nothing references
- * the buffer after this, so an unmount waiting on this lock can now
- * tear it down safely. As such, it is unsafe to reference the buffer
- * (bp) after the unlock as we could race with it being freed.
+ * Drop the lock to signal that we are done. Nothing references the
+ * iclog after this, so an unmount waiting on this lock can now tear it
+ * down safely. As such, it is unsafe to reference the iclog after the
+ * unlock as we could race with it being freed.
*/
- xfs_buf_unlock(bp);
+ up(&iclog->ic_sema);
}
/*
@@ -1301,65 +1255,26 @@ xlog_iodone(xfs_buf_t *bp)
* If the filesystem blocksize is too large, we may need to choose a
* larger size since the directory code currently logs entire blocks.
*/
-
STATIC void
xlog_get_iclog_buffer_size(
struct xfs_mount *mp,
struct xlog *log)
{
- int size;
- int xhdrs;
-
if (mp->m_logbufs <= 0)
- log->l_iclog_bufs = XLOG_MAX_ICLOGS;
- else
- log->l_iclog_bufs = mp->m_logbufs;
+ mp->m_logbufs = XLOG_MAX_ICLOGS;
+ if (mp->m_logbsize <= 0)
+ mp->m_logbsize = XLOG_BIG_RECORD_BSIZE;
+
+ log->l_iclog_bufs = mp->m_logbufs;
+ log->l_iclog_size = mp->m_logbsize;
/*
- * Buffer size passed in from mount system call.
+ * # headers = size / 32k - one header holds cycles from 32k of data.
*/
- if (mp->m_logbsize > 0) {
- size = log->l_iclog_size = mp->m_logbsize;
- log->l_iclog_size_log = 0;
- while (size != 1) {
- log->l_iclog_size_log++;
- size >>= 1;
- }
-
- if (xfs_sb_version_haslogv2(&mp->m_sb)) {
- /* # headers = size / 32k
- * one header holds cycles from 32k of data
- */
-
- xhdrs = mp->m_logbsize / XLOG_HEADER_CYCLE_SIZE;
- if (mp->m_logbsize % XLOG_HEADER_CYCLE_SIZE)
- xhdrs++;
- log->l_iclog_hsize = xhdrs << BBSHIFT;
- log->l_iclog_heads = xhdrs;
- } else {
- ASSERT(mp->m_logbsize <= XLOG_BIG_RECORD_BSIZE);
- log->l_iclog_hsize = BBSIZE;
- log->l_iclog_heads = 1;
- }
- goto done;
- }
-
- /* All machines use 32kB buffers by default. */
- log->l_iclog_size = XLOG_BIG_RECORD_BSIZE;
- log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT;
-
- /* the default log size is 16k or 32k which is one header sector */
- log->l_iclog_hsize = BBSIZE;
- log->l_iclog_heads = 1;
-
-done:
- /* are we being asked to make the sizes selected above visible? */
- if (mp->m_logbufs == 0)
- mp->m_logbufs = log->l_iclog_bufs;
- if (mp->m_logbsize == 0)
- mp->m_logbsize = log->l_iclog_size;
-} /* xlog_get_iclog_buffer_size */
-
+ log->l_iclog_heads =
+ DIV_ROUND_UP(mp->m_logbsize, XLOG_HEADER_CYCLE_SIZE);
+ log->l_iclog_hsize = log->l_iclog_heads << BBSHIFT;
+}
void
xfs_log_work_queue(
@@ -1422,7 +1337,6 @@ xlog_alloc_log(
xlog_rec_header_t *head;
xlog_in_core_t **iclogp;
xlog_in_core_t *iclog, *prev_iclog=NULL;
- xfs_buf_t *bp;
int i;
int error = -ENOMEM;
uint log2_size = 0;
@@ -1480,30 +1394,6 @@ xlog_alloc_log(
xlog_get_iclog_buffer_size(mp, log);
- /*
- * Use a NULL block for the extra log buffer used during splits so that
- * it will trigger errors if we ever try to do IO on it without first
- * having set it up properly.
- */
- error = -ENOMEM;
- bp = xfs_buf_alloc(mp->m_logdev_targp, XFS_BUF_DADDR_NULL,
- BTOBB(log->l_iclog_size), XBF_NO_IOACCT);
- if (!bp)
- goto out_free_log;
-
- /*
- * The iclogbuf buffer locks are held over IO but we are not going to do
- * IO yet. Hence unlock the buffer so that the log IO path can grab it
- * when appropriately.
- */
- ASSERT(xfs_buf_islocked(bp));
- xfs_buf_unlock(bp);
-
- /* use high priority wq for log I/O completion */
- bp->b_ioend_wq = mp->m_log_workqueue;
- bp->b_iodone = xlog_iodone;
- log->l_xbuf = bp;
-
spin_lock_init(&log->l_icloglock);
init_waitqueue_head(&log->l_flush_wait);
@@ -1516,29 +1406,22 @@ xlog_alloc_log(
* xlog_in_core_t in xfs_log_priv.h for details.
*/
ASSERT(log->l_iclog_size >= 4096);
- for (i=0; i < log->l_iclog_bufs; i++) {
- *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL);
- if (!*iclogp)
+ for (i = 0; i < log->l_iclog_bufs; i++) {
+ size_t bvec_size = howmany(log->l_iclog_size, PAGE_SIZE) *
+ sizeof(struct bio_vec);
+
+ iclog = kmem_zalloc(sizeof(*iclog) + bvec_size, KM_MAYFAIL);
+ if (!iclog)
goto out_free_iclog;
- iclog = *iclogp;
+ *iclogp = iclog;
iclog->ic_prev = prev_iclog;
prev_iclog = iclog;
- bp = xfs_buf_get_uncached(mp->m_logdev_targp,
- BTOBB(log->l_iclog_size),
- XBF_NO_IOACCT);
- if (!bp)
+ iclog->ic_data = kmem_alloc_large(log->l_iclog_size,
+ KM_MAYFAIL);
+ if (!iclog->ic_data)
goto out_free_iclog;
-
- ASSERT(xfs_buf_islocked(bp));
- xfs_buf_unlock(bp);
-
- /* use high priority wq for log I/O completion */
- bp->b_ioend_wq = mp->m_log_workqueue;
- bp->b_iodone = xlog_iodone;
- iclog->ic_bp = bp;
- iclog->ic_data = bp->b_addr;
#ifdef DEBUG
log->l_iclog_bak[i] = &iclog->ic_header;
#endif
@@ -1552,36 +1435,43 @@ xlog_alloc_log(
head->h_fmt = cpu_to_be32(XLOG_FMT);
memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
- iclog->ic_size = BBTOB(bp->b_length) - log->l_iclog_hsize;
+ iclog->ic_size = log->l_iclog_size - log->l_iclog_hsize;
iclog->ic_state = XLOG_STATE_ACTIVE;
iclog->ic_log = log;
atomic_set(&iclog->ic_refcnt, 0);
spin_lock_init(&iclog->ic_callback_lock);
- iclog->ic_callback_tail = &(iclog->ic_callback);
+ INIT_LIST_HEAD(&iclog->ic_callbacks);
iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
init_waitqueue_head(&iclog->ic_force_wait);
init_waitqueue_head(&iclog->ic_write_wait);
+ INIT_WORK(&iclog->ic_end_io_work, xlog_ioend_work);
+ sema_init(&iclog->ic_sema, 1);
iclogp = &iclog->ic_next;
}
*iclogp = log->l_iclog; /* complete ring */
log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */
+ log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s",
+ WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_HIGHPRI, 0,
+ mp->m_fsname);
+ if (!log->l_ioend_workqueue)
+ goto out_free_iclog;
+
error = xlog_cil_init(log);
if (error)
- goto out_free_iclog;
+ goto out_destroy_workqueue;
return log;
+out_destroy_workqueue:
+ destroy_workqueue(log->l_ioend_workqueue);
out_free_iclog:
for (iclog = log->l_iclog; iclog; iclog = prev_iclog) {
prev_iclog = iclog->ic_next;
- if (iclog->ic_bp)
- xfs_buf_free(iclog->ic_bp);
+ kmem_free(iclog->ic_data);
kmem_free(iclog);
}
- spinlock_destroy(&log->l_icloglock);
- xfs_buf_free(log->l_xbuf);
out_free_log:
kmem_free(log);
out:
@@ -1766,42 +1656,155 @@ xlog_cksum(
return xfs_end_cksum(crc);
}
-/*
- * The bdstrat callback function for log bufs. This gives us a central
- * place to trap bufs in case we get hit by a log I/O error and need to
- * shutdown. Actually, in practice, even when we didn't get a log error,
- * we transition the iclogs to IOERROR state *after* flushing all existing
- * iclogs to disk. This is because we don't want anymore new transactions to be
- * started or completed afterwards.
- *
- * We lock the iclogbufs here so that we can serialise against IO completion
- * during unmount. We might be processing a shutdown triggered during unmount,
- * and that can occur asynchronously to the unmount thread, and hence we need to
- * ensure that completes before tearing down the iclogbufs. Hence we need to
- * hold the buffer lock across the log IO to acheive that.
- */
-STATIC int
-xlog_bdstrat(
- struct xfs_buf *bp)
+static void
+xlog_bio_end_io(
+ struct bio *bio)
{
- struct xlog_in_core *iclog = bp->b_log_item;
+ struct xlog_in_core *iclog = bio->bi_private;
- xfs_buf_lock(bp);
- if (iclog->ic_state & XLOG_STATE_IOERROR) {
- xfs_buf_ioerror(bp, -EIO);
- xfs_buf_stale(bp);
- xfs_buf_ioend(bp);
+ queue_work(iclog->ic_log->l_ioend_workqueue,
+ &iclog->ic_end_io_work);
+}
+
+static void
+xlog_map_iclog_data(
+ struct bio *bio,
+ void *data,
+ size_t count)
+{
+ do {
+ struct page *page = kmem_to_page(data);
+ unsigned int off = offset_in_page(data);
+ size_t len = min_t(size_t, count, PAGE_SIZE - off);
+
+ WARN_ON_ONCE(bio_add_page(bio, page, len, off) != len);
+
+ data += len;
+ count -= len;
+ } while (count);
+}
+
+STATIC void
+xlog_write_iclog(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ uint64_t bno,
+ unsigned int count,
+ bool need_flush)
+{
+ ASSERT(bno < log->l_logBBsize);
+
+ /*
+ * We lock the iclogbufs here so that we can serialise against I/O
+ * completion during unmount. We might be processing a shutdown
+ * triggered during unmount, and that can occur asynchronously to the
+ * unmount thread, and hence we need to ensure that completes before
+ * tearing down the iclogbufs. Hence we need to hold the buffer lock
+ * across the log IO to archieve that.
+ */
+ down(&iclog->ic_sema);
+ if (unlikely(iclog->ic_state & XLOG_STATE_IOERROR)) {
/*
* It would seem logical to return EIO here, but we rely on
* the log state machine to propagate I/O errors instead of
- * doing it here. Similarly, IO completion will unlock the
- * buffer, so we don't do it here.
+ * doing it here. We kick of the state machine and unlock
+ * the buffer manually, the code needs to be kept in sync
+ * with the I/O completion path.
*/
- return 0;
+ xlog_state_done_syncing(iclog, XFS_LI_ABORTED);
+ up(&iclog->ic_sema);
+ return;
}
- xfs_buf_submit(bp);
- return 0;
+ iclog->ic_io_size = count;
+
+ bio_init(&iclog->ic_bio, iclog->ic_bvec, howmany(count, PAGE_SIZE));
+ bio_set_dev(&iclog->ic_bio, log->l_targ->bt_bdev);
+ iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno;
+ iclog->ic_bio.bi_end_io = xlog_bio_end_io;
+ iclog->ic_bio.bi_private = iclog;
+ iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_FUA;
+ if (need_flush)
+ iclog->ic_bio.bi_opf |= REQ_PREFLUSH;
+
+ xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, iclog->ic_io_size);
+ if (is_vmalloc_addr(iclog->ic_data))
+ flush_kernel_vmap_range(iclog->ic_data, iclog->ic_io_size);
+
+ /*
+ * If this log buffer would straddle the end of the log we will have
+ * to split it up into two bios, so that we can continue at the start.
+ */
+ if (bno + BTOBB(count) > log->l_logBBsize) {
+ struct bio *split;
+
+ split = bio_split(&iclog->ic_bio, log->l_logBBsize - bno,
+ GFP_NOIO, &fs_bio_set);
+ bio_chain(split, &iclog->ic_bio);
+ submit_bio(split);
+
+ /* restart at logical offset zero for the remainder */
+ iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart;
+ }
+
+ submit_bio(&iclog->ic_bio);
+}
+
+/*
+ * We need to bump cycle number for the part of the iclog that is
+ * written to the start of the log. Watch out for the header magic
+ * number case, though.
+ */
+static void
+xlog_split_iclog(
+ struct xlog *log,
+ void *data,
+ uint64_t bno,
+ unsigned int count)
+{
+ unsigned int split_offset = BBTOB(log->l_logBBsize - bno);
+ unsigned int i;
+
+ for (i = split_offset; i < count; i += BBSIZE) {
+ uint32_t cycle = get_unaligned_be32(data + i);
+
+ if (++cycle == XLOG_HEADER_MAGIC_NUM)
+ cycle++;
+ put_unaligned_be32(cycle, data + i);
+ }
+}
+
+static int
+xlog_calc_iclog_size(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ uint32_t *roundoff)
+{
+ uint32_t count_init, count;
+ bool use_lsunit;
+
+ use_lsunit = xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
+ log->l_mp->m_sb.sb_logsunit > 1;
+
+ /* Add for LR header */
+ count_init = log->l_iclog_hsize + iclog->ic_offset;
+
+ /* Round out the log write size */
+ if (use_lsunit) {
+ /* we have a v2 stripe unit to use */
+ count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init));
+ } else {
+ count = BBTOB(BTOBB(count_init));
+ }
+
+ ASSERT(count >= count_init);
+ *roundoff = count - count_init;
+
+ if (use_lsunit)
+ ASSERT(*roundoff < log->l_mp->m_sb.sb_logsunit);
+ else
+ ASSERT(*roundoff < BBTOB(1));
+ return count;
}
/*
@@ -1824,46 +1827,23 @@ xlog_bdstrat(
* log will require grabbing the lock though.
*
* The entire log manager uses a logical block numbering scheme. Only
- * log_sync (and then only bwrite()) know about the fact that the log may
- * not start with block zero on a given device. The log block start offset
- * is added immediately before calling bwrite().
+ * xlog_write_iclog knows about the fact that the log may not start with
+ * block zero on a given device.
*/
-
-STATIC int
+STATIC void
xlog_sync(
struct xlog *log,
struct xlog_in_core *iclog)
{
- xfs_buf_t *bp;
- int i;
- uint count; /* byte count of bwrite */
- uint count_init; /* initial count before roundup */
- int roundoff; /* roundoff to BB or stripe */
- int split = 0; /* split write into two regions */
- int error;
- int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb);
- int size;
+ unsigned int count; /* byte count of bwrite */
+ unsigned int roundoff; /* roundoff to BB or stripe */
+ uint64_t bno;
+ unsigned int size;
+ bool need_flush = true, split = false;
- XFS_STATS_INC(log->l_mp, xs_log_writes);
ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
- /* Add for LR header */
- count_init = log->l_iclog_hsize + iclog->ic_offset;
-
- /* Round out the log write size */
- if (v2 && log->l_mp->m_sb.sb_logsunit > 1) {
- /* we have a v2 stripe unit to use */
- count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init));
- } else {
- count = BBTOB(BTOBB(count_init));
- }
- roundoff = count - count_init;
- ASSERT(roundoff >= 0);
- ASSERT((v2 && log->l_mp->m_sb.sb_logsunit > 1 &&
- roundoff < log->l_mp->m_sb.sb_logsunit)
- ||
- (log->l_mp->m_sb.sb_logsunit <= 1 &&
- roundoff < BBTOB(1)));
+ count = xlog_calc_iclog_size(log, iclog, &roundoff);
/* move grant heads by roundoff in sync */
xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff);
@@ -1874,41 +1854,19 @@ xlog_sync(
/* real byte length */
size = iclog->ic_offset;
- if (v2)
+ if (xfs_sb_version_haslogv2(&log->l_mp->m_sb))
size += roundoff;
iclog->ic_header.h_len = cpu_to_be32(size);
- bp = iclog->ic_bp;
- XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)));
-
+ XFS_STATS_INC(log->l_mp, xs_log_writes);
XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count));
- /* Do we need to split this write into 2 parts? */
- if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) {
- char *dptr;
-
- split = count - (BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp)));
- count = BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp));
- iclog->ic_bwritecnt = 2;
+ bno = BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn));
- /*
- * Bump the cycle numbers at the start of each block in the
- * part of the iclog that ends up in the buffer that gets
- * written to the start of the log.
- *
- * Watch out for the header magic number case, though.
- */
- dptr = (char *)&iclog->ic_header + count;
- for (i = 0; i < split; i += BBSIZE) {
- uint32_t cycle = be32_to_cpu(*(__be32 *)dptr);
- if (++cycle == XLOG_HEADER_MAGIC_NUM)
- cycle++;
- *(__be32 *)dptr = cpu_to_be32(cycle);
-
- dptr += BBSIZE;
- }
- } else {
- iclog->ic_bwritecnt = 1;
+ /* Do we need to split this write into 2 parts? */
+ if (bno + BTOBB(count) > log->l_logBBsize) {
+ xlog_split_iclog(log, &iclog->ic_header, bno, count);
+ split = true;
}
/* calculcate the checksum */
@@ -1921,18 +1879,15 @@ xlog_sync(
* write on I/O completion and shutdown the fs. The subsequent mount
* detects the bad CRC and attempts to recover.
*/
+#ifdef DEBUG
if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) {
iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA);
- iclog->ic_state |= XLOG_STATE_IOABORT;
+ iclog->ic_fail_crc = true;
xfs_warn(log->l_mp,
"Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.",
be64_to_cpu(iclog->ic_header.h_lsn));
}
-
- bp->b_io_length = BTOBB(count);
- bp->b_log_item = iclog;
- bp->b_flags &= ~XBF_FLUSH;
- bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);
+#endif
/*
* Flush the data device before flushing the log to make sure all meta
@@ -1942,50 +1897,14 @@ xlog_sync(
* synchronously here; for an internal log we can simply use the block
* layer state machine for preflushes.
*/
- if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp)
+ if (log->l_targ != log->l_mp->m_ddev_targp || split) {
xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
- else
- bp->b_flags |= XBF_FLUSH;
-
- ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
- ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
-
- xlog_verify_iclog(log, iclog, count, true);
-
- /* account for log which doesn't start at block #0 */
- XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
-
- /*
- * Don't call xfs_bwrite here. We do log-syncs even when the filesystem
- * is shutting down.
- */
- error = xlog_bdstrat(bp);
- if (error) {
- xfs_buf_ioerror_alert(bp, "xlog_sync");
- return error;
+ need_flush = false;
}
- if (split) {
- bp = iclog->ic_log->l_xbuf;
- XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */
- xfs_buf_associate_memory(bp,
- (char *)&iclog->ic_header + count, split);
- bp->b_log_item = iclog;
- bp->b_flags &= ~XBF_FLUSH;
- bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);
-
- ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
- ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
-
- /* account for internal log which doesn't start at block #0 */
- XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
- error = xlog_bdstrat(bp);
- if (error) {
- xfs_buf_ioerror_alert(bp, "xlog_sync (split)");
- return error;
- }
- }
- return 0;
-} /* xlog_sync */
+
+ xlog_verify_iclog(log, iclog, count);
+ xlog_write_iclog(log, iclog, bno, count, need_flush);
+}
/*
* Deallocate a log structure
@@ -2005,31 +1924,21 @@ xlog_dealloc_log(
*/
iclog = log->l_iclog;
for (i = 0; i < log->l_iclog_bufs; i++) {
- xfs_buf_lock(iclog->ic_bp);
- xfs_buf_unlock(iclog->ic_bp);
+ down(&iclog->ic_sema);
+ up(&iclog->ic_sema);
iclog = iclog->ic_next;
}
- /*
- * Always need to ensure that the extra buffer does not point to memory
- * owned by another log buffer before we free it. Also, cycle the lock
- * first to ensure we've completed IO on it.
- */
- xfs_buf_lock(log->l_xbuf);
- xfs_buf_unlock(log->l_xbuf);
- xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size));
- xfs_buf_free(log->l_xbuf);
-
iclog = log->l_iclog;
for (i = 0; i < log->l_iclog_bufs; i++) {
- xfs_buf_free(iclog->ic_bp);
next_iclog = iclog->ic_next;
+ kmem_free(iclog->ic_data);
kmem_free(iclog);
iclog = next_iclog;
}
- spinlock_destroy(&log->l_icloglock);
log->l_mp->m_log = NULL;
+ destroy_workqueue(log->l_ioend_workqueue);
kmem_free(log);
} /* xlog_dealloc_log */
@@ -2610,7 +2519,7 @@ xlog_state_clean_log(
if (iclog->ic_state == XLOG_STATE_DIRTY) {
iclog->ic_state = XLOG_STATE_ACTIVE;
iclog->ic_offset = 0;
- ASSERT(iclog->ic_callback == NULL);
+ ASSERT(list_empty_careful(&iclog->ic_callbacks));
/*
* If the number of ops in this iclog indicate it just
* contains the dummy transaction, we can
@@ -2680,37 +2589,32 @@ xlog_state_clean_log(
STATIC xfs_lsn_t
xlog_get_lowest_lsn(
- struct xlog *log)
+ struct xlog *log)
{
- xlog_in_core_t *lsn_log;
- xfs_lsn_t lowest_lsn, lsn;
+ struct xlog_in_core *iclog = log->l_iclog;
+ xfs_lsn_t lowest_lsn = 0, lsn;
- lsn_log = log->l_iclog;
- lowest_lsn = 0;
do {
- if (!(lsn_log->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_DIRTY))) {
- lsn = be64_to_cpu(lsn_log->ic_header.h_lsn);
- if ((lsn && !lowest_lsn) ||
- (XFS_LSN_CMP(lsn, lowest_lsn) < 0)) {
+ if (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))
+ continue;
+
+ lsn = be64_to_cpu(iclog->ic_header.h_lsn);
+ if ((lsn && !lowest_lsn) || XFS_LSN_CMP(lsn, lowest_lsn) < 0)
lowest_lsn = lsn;
- }
- }
- lsn_log = lsn_log->ic_next;
- } while (lsn_log != log->l_iclog);
+ } while ((iclog = iclog->ic_next) != log->l_iclog);
+
return lowest_lsn;
}
-
STATIC void
xlog_state_do_callback(
struct xlog *log,
- int aborted,
+ bool aborted,
struct xlog_in_core *ciclog)
{
xlog_in_core_t *iclog;
xlog_in_core_t *first_iclog; /* used to know when we've
* processed all iclogs once */
- xfs_log_callback_t *cb, *cb_next;
int flushcnt = 0;
xfs_lsn_t lowest_lsn;
int ioerrors; /* counter: iclogs with errors */
@@ -2821,7 +2725,7 @@ xlog_state_do_callback(
*/
ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
- if (iclog->ic_callback)
+ if (!list_empty_careful(&iclog->ic_callbacks))
atomic64_set(&log->l_last_sync_lsn,
be64_to_cpu(iclog->ic_header.h_lsn));
@@ -2838,26 +2742,20 @@ xlog_state_do_callback(
* callbacks being added.
*/
spin_lock(&iclog->ic_callback_lock);
- cb = iclog->ic_callback;
- while (cb) {
- iclog->ic_callback_tail = &(iclog->ic_callback);
- iclog->ic_callback = NULL;
- spin_unlock(&iclog->ic_callback_lock);
+ while (!list_empty(&iclog->ic_callbacks)) {
+ LIST_HEAD(tmp);
- /* perform callbacks in the order given */
- for (; cb; cb = cb_next) {
- cb_next = cb->cb_next;
- cb->cb_func(cb->cb_arg, aborted);
- }
+ list_splice_init(&iclog->ic_callbacks, &tmp);
+
+ spin_unlock(&iclog->ic_callback_lock);
+ xlog_cil_process_committed(&tmp, aborted);
spin_lock(&iclog->ic_callback_lock);
- cb = iclog->ic_callback;
}
loopdidcallbacks++;
funcdidcallbacks++;
spin_lock(&log->l_icloglock);
- ASSERT(iclog->ic_callback == NULL);
spin_unlock(&iclog->ic_callback_lock);
if (!(iclog->ic_state & XLOG_STATE_IOERROR))
iclog->ic_state = XLOG_STATE_DIRTY;
@@ -2943,18 +2841,16 @@ xlog_state_do_callback(
*/
STATIC void
xlog_state_done_syncing(
- xlog_in_core_t *iclog,
- int aborted)
+ struct xlog_in_core *iclog,
+ bool aborted)
{
- struct xlog *log = iclog->ic_log;
+ struct xlog *log = iclog->ic_log;
spin_lock(&log->l_icloglock);
ASSERT(iclog->ic_state == XLOG_STATE_SYNCING ||
iclog->ic_state == XLOG_STATE_IOERROR);
ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
- ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2);
-
/*
* If we got an error, either on the first buffer, or in the case of
@@ -2962,13 +2858,8 @@ xlog_state_done_syncing(
* and none should ever be attempted to be written to disk
* again.
*/
- if (iclog->ic_state != XLOG_STATE_IOERROR) {
- if (--iclog->ic_bwritecnt == 1) {
- spin_unlock(&log->l_icloglock);
- return;
- }
+ if (iclog->ic_state != XLOG_STATE_IOERROR)
iclog->ic_state = XLOG_STATE_DONE_SYNC;
- }
/*
* Someone could be sleeping prior to writing out the next
@@ -3237,7 +3128,7 @@ xlog_state_release_iclog(
* flags after this point.
*/
if (sync)
- return xlog_sync(log, iclog);
+ xlog_sync(log, iclog);
return 0;
} /* xlog_state_release_iclog */
@@ -3828,8 +3719,7 @@ STATIC void
xlog_verify_iclog(
struct xlog *log,
struct xlog_in_core *iclog,
- int count,
- bool syncing)
+ int count)
{
xlog_op_header_t *ophead;
xlog_in_core_t *icptr;
@@ -3873,7 +3763,7 @@ xlog_verify_iclog(
/* clientid is only 1 byte */
p = &ophead->oh_clientid;
field_offset = p - base_ptr;
- if (!syncing || (field_offset & 0x1ff)) {
+ if (field_offset & 0x1ff) {
clientid = ophead->oh_clientid;
} else {
idx = BTOBBT((char *)&ophead->oh_clientid - iclog->ic_datap);
@@ -3896,7 +3786,7 @@ xlog_verify_iclog(
/* check length */
p = &ophead->oh_len;
field_offset = p - base_ptr;
- if (!syncing || (field_offset & 0x1ff)) {
+ if (field_offset & 0x1ff) {
op_len = be32_to_cpu(ophead->oh_len);
} else {
idx = BTOBBT((uintptr_t)&ophead->oh_len -
@@ -4033,7 +3923,7 @@ xfs_log_force_umount(
* avoid races.
*/
wake_up_all(&log->l_cilp->xc_commit_wait);
- xlog_state_do_callback(log, XFS_LI_ABORTED, NULL);
+ xlog_state_do_callback(log, true, NULL);
#ifdef XFSERRORDEBUG
{
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 73a64bf32f6f..84e06805160f 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -6,6 +6,8 @@
#ifndef __XFS_LOG_H__
#define __XFS_LOG_H__
+struct xfs_cil_ctx;
+
struct xfs_log_vec {
struct xfs_log_vec *lv_next; /* next lv in build list */
int lv_niovecs; /* number of iovecs in lv */
@@ -72,16 +74,6 @@ xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
}
/*
- * Structure used to pass callback function and the function's argument
- * to the log manager.
- */
-typedef struct xfs_log_callback {
- struct xfs_log_callback *cb_next;
- void (*cb_func)(void *, int);
- void *cb_arg;
-} xfs_log_callback_t;
-
-/*
* By comparing each component, we don't have to worry about extra
* endian issues in treating two 32 bit numbers as one 64 bit number
*/
@@ -125,12 +117,10 @@ int xfs_log_mount(struct xfs_mount *mp,
xfs_daddr_t start_block,
int num_bblocks);
int xfs_log_mount_finish(struct xfs_mount *mp);
-int xfs_log_mount_cancel(struct xfs_mount *);
+void xfs_log_mount_cancel(struct xfs_mount *);
xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp);
void xfs_log_space_wake(struct xfs_mount *mp);
-int xfs_log_notify(struct xlog_in_core *iclog,
- struct xfs_log_callback *callback_entry);
int xfs_log_release_iclog(struct xfs_mount *mp,
struct xlog_in_core *iclog);
int xfs_log_reserve(struct xfs_mount *mp,
@@ -148,6 +138,7 @@ void xfs_log_ticket_put(struct xlog_ticket *ticket);
void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_lsn_t *commit_lsn, bool regrant);
+void xlog_cil_process_committed(struct list_head *list, bool aborted);
bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
void xfs_log_work_queue(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 5e595948bc5a..fa5602d0fd7f 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -10,10 +10,7 @@
#include "xfs_shared.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_error.h"
-#include "xfs_alloc.h"
#include "xfs_extent_busy.h"
-#include "xfs_discard.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
#include "xfs_log.h"
@@ -246,7 +243,8 @@ xfs_cil_prepare_item(
* shadow buffer, so update the the pointer to it appropriately.
*/
if (!old_lv) {
- lv->lv_item->li_ops->iop_pin(lv->lv_item);
+ if (lv->lv_item->li_ops->iop_pin)
+ lv->lv_item->li_ops->iop_pin(lv->lv_item);
lv->lv_item->li_lv_shadow = NULL;
} else if (old_lv != lv) {
ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
@@ -576,10 +574,9 @@ xlog_discard_busy_extents(
*/
static void
xlog_cil_committed(
- void *args,
- int abort)
+ struct xfs_cil_ctx *ctx,
+ bool abort)
{
- struct xfs_cil_ctx *ctx = args;
struct xfs_mount *mp = ctx->cil->xc_log->l_mp;
/*
@@ -614,6 +611,20 @@ xlog_cil_committed(
kmem_free(ctx);
}
+void
+xlog_cil_process_committed(
+ struct list_head *list,
+ bool aborted)
+{
+ struct xfs_cil_ctx *ctx;
+
+ while ((ctx = list_first_entry_or_null(list,
+ struct xfs_cil_ctx, iclog_entry))) {
+ list_del(&ctx->iclog_entry);
+ xlog_cil_committed(ctx, aborted);
+ }
+}
+
/*
* Push the Committed Item List to the log. If @push_seq flag is zero, then it
* is a background flush and so we can chose to ignore it. Otherwise, if the
@@ -835,12 +846,15 @@ restart:
if (commit_lsn == -1)
goto out_abort;
- /* attach all the transactions w/ busy extents to iclog */
- ctx->log_cb.cb_func = xlog_cil_committed;
- ctx->log_cb.cb_arg = ctx;
- error = xfs_log_notify(commit_iclog, &ctx->log_cb);
- if (error)
+ spin_lock(&commit_iclog->ic_callback_lock);
+ if (commit_iclog->ic_state & XLOG_STATE_IOERROR) {
+ spin_unlock(&commit_iclog->ic_callback_lock);
goto out_abort;
+ }
+ ASSERT_ALWAYS(commit_iclog->ic_state == XLOG_STATE_ACTIVE ||
+ commit_iclog->ic_state == XLOG_STATE_WANT_SYNC);
+ list_add_tail(&ctx->iclog_entry, &commit_iclog->ic_callbacks);
+ spin_unlock(&commit_iclog->ic_callback_lock);
/*
* now the checkpoint commit is complete and we've attached the
@@ -864,7 +878,7 @@ out_skip:
out_abort_free_ticket:
xfs_log_ticket_put(tic);
out_abort:
- xlog_cil_committed(ctx, XFS_LI_ABORTED);
+ xlog_cil_committed(ctx, true);
return -EIO;
}
@@ -984,6 +998,7 @@ xfs_log_commit_cil(
{
struct xlog *log = mp->m_log;
struct xfs_cil *cil = log->l_cilp;
+ struct xfs_log_item *lip, *next;
xfs_lsn_t xc_commit_lsn;
/*
@@ -1008,7 +1023,7 @@ xfs_log_commit_cil(
/*
* Once all the items of the transaction have been copied to the CIL,
- * the items can be unlocked and freed.
+ * the items can be unlocked and possibly freed.
*
* This needs to be done before we drop the CIL context lock because we
* have to update state in the log items and unlock them before they go
@@ -1017,8 +1032,12 @@ xfs_log_commit_cil(
* the log items. This affects (at least) processing of stale buffers,
* inodes and EFIs.
*/
- xfs_trans_free_items(tp, xc_commit_lsn, false);
-
+ trace_xfs_trans_commit_items(tp, _RET_IP_);
+ list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
+ xfs_trans_del_item(lip);
+ if (lip->li_ops->iop_committing)
+ lip->li_ops->iop_committing(lip, xc_commit_lsn);
+ }
xlog_cil_push_background(log);
up_read(&cil->xc_ctx_lock);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index b5f82cb36202..b880c23cb6e4 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -10,7 +10,6 @@ struct xfs_buf;
struct xlog;
struct xlog_ticket;
struct xfs_mount;
-struct xfs_log_callback;
/*
* Flags for log structure
@@ -50,7 +49,6 @@ static inline uint xlog_get_client_id(__be32 i)
#define XLOG_STATE_CALLBACK 0x0020 /* Callback functions now */
#define XLOG_STATE_DIRTY 0x0040 /* Dirty IC log, not ready for ACTIVE status*/
#define XLOG_STATE_IOERROR 0x0080 /* IO error happened in sync'ing log */
-#define XLOG_STATE_IOABORT 0x0100 /* force abort on I/O completion (debug) */
#define XLOG_STATE_ALL 0x7FFF /* All possible valid flags */
#define XLOG_STATE_NOTUSED 0x8000 /* This IC log not being used */
@@ -179,11 +177,10 @@ typedef struct xlog_ticket {
* the iclog.
* - ic_forcewait is used to implement synchronous forcing of the iclog to disk.
* - ic_next is the pointer to the next iclog in the ring.
- * - ic_bp is a pointer to the buffer used to write this incore log to disk.
* - ic_log is a pointer back to the global log structure.
- * - ic_callback is a linked list of callback function/argument pairs to be
- * called after an iclog finishes writing.
- * - ic_size is the full size of the header plus data.
+ * - ic_size is the full size of the log buffer, minus the cycle headers.
+ * - ic_io_size is the size of the currently pending log buffer write, which
+ * might be smaller than ic_size
* - ic_offset is the current number of bytes written to in this iclog.
* - ic_refcnt is bumped when someone is writing to the log.
* - ic_state is the state of the iclog.
@@ -193,7 +190,7 @@ typedef struct xlog_ticket {
* structure cacheline aligned. The following fields can be contended on
* by independent processes:
*
- * - ic_callback_*
+ * - ic_callbacks
* - ic_refcnt
* - fields protected by the global l_icloglock
*
@@ -206,23 +203,28 @@ typedef struct xlog_in_core {
wait_queue_head_t ic_write_wait;
struct xlog_in_core *ic_next;
struct xlog_in_core *ic_prev;
- struct xfs_buf *ic_bp;
struct xlog *ic_log;
- int ic_size;
- int ic_offset;
- int ic_bwritecnt;
+ u32 ic_size;
+ u32 ic_io_size;
+ u32 ic_offset;
unsigned short ic_state;
char *ic_datap; /* pointer to iclog data */
/* Callback structures need their own cacheline */
spinlock_t ic_callback_lock ____cacheline_aligned_in_smp;
- struct xfs_log_callback *ic_callback;
- struct xfs_log_callback **ic_callback_tail;
+ struct list_head ic_callbacks;
/* reference counts need their own cacheline */
atomic_t ic_refcnt ____cacheline_aligned_in_smp;
xlog_in_core_2_t *ic_data;
#define ic_header ic_data->hic_header
+#ifdef DEBUG
+ bool ic_fail_crc : 1;
+#endif
+ struct semaphore ic_sema;
+ struct work_struct ic_end_io_work;
+ struct bio ic_bio;
+ struct bio_vec ic_bvec[];
} xlog_in_core_t;
/*
@@ -243,7 +245,7 @@ struct xfs_cil_ctx {
int space_used; /* aggregate size of regions */
struct list_head busy_extents; /* busy extents in chkpt */
struct xfs_log_vec *lv_chain; /* logvecs being pushed */
- struct xfs_log_callback log_cb; /* completion callback hook. */
+ struct list_head iclog_entry;
struct list_head committing; /* ctx committing list */
struct work_struct discard_endio_work;
};
@@ -350,9 +352,8 @@ struct xlog {
struct xfs_mount *l_mp; /* mount point */
struct xfs_ail *l_ailp; /* AIL log is working with */
struct xfs_cil *l_cilp; /* CIL log is working with */
- struct xfs_buf *l_xbuf; /* extra buffer for log
- * wrapping */
struct xfs_buftarg *l_targ; /* buftarg of log */
+ struct workqueue_struct *l_ioend_workqueue; /* for I/O completions */
struct delayed_work l_work; /* background flush work */
uint l_flags;
uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */
@@ -361,7 +362,6 @@ struct xlog {
int l_iclog_heads; /* # of iclog header sectors */
uint l_sectBBsize; /* sector size in BBs (2^n) */
int l_iclog_size; /* size of log in bytes */
- int l_iclog_size_log; /* log power size of log */
int l_iclog_bufs; /* number of iclog buffers */
xfs_daddr_t l_logBBstart; /* start block of log */
int l_logsize; /* size of log in bytes */
@@ -418,7 +418,7 @@ xlog_recover(
extern int
xlog_recover_finish(
struct xlog *log);
-extern int
+extern void
xlog_recover_cancel(struct xlog *);
extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 9329f5adbfbe..13d1d3e95b88 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -13,8 +13,6 @@
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_log.h"
@@ -26,7 +24,6 @@
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_quota.h"
-#include "xfs_cksum.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_bmap_btree.h"
@@ -79,7 +76,7 @@ struct xfs_buf_cancel {
* are valid, false otherwise.
*/
static inline bool
-xlog_verify_bp(
+xlog_verify_bno(
struct xlog *log,
xfs_daddr_t blk_no,
int bbcount)
@@ -92,22 +89,19 @@ xlog_verify_bp(
}
/*
- * Allocate a buffer to hold log data. The buffer needs to be able
- * to map to a range of nbblks basic blocks at any valid (basic
- * block) offset within the log.
+ * Allocate a buffer to hold log data. The buffer needs to be able to map to
+ * a range of nbblks basic blocks at any valid offset within the log.
*/
-STATIC xfs_buf_t *
-xlog_get_bp(
+static char *
+xlog_alloc_buffer(
struct xlog *log,
int nbblks)
{
- struct xfs_buf *bp;
-
/*
* Pass log block 0 since we don't have an addr yet, buffer will be
* verified on read.
*/
- if (!xlog_verify_bp(log, 0, nbblks)) {
+ if (!xlog_verify_bno(log, 0, nbblks)) {
xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
nbblks);
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
@@ -115,69 +109,48 @@ xlog_get_bp(
}
/*
- * We do log I/O in units of log sectors (a power-of-2
- * multiple of the basic block size), so we round up the
- * requested size to accommodate the basic blocks required
- * for complete log sectors.
+ * We do log I/O in units of log sectors (a power-of-2 multiple of the
+ * basic block size), so we round up the requested size to accommodate
+ * the basic blocks required for complete log sectors.
*
- * In addition, the buffer may be used for a non-sector-
- * aligned block offset, in which case an I/O of the
- * requested size could extend beyond the end of the
- * buffer. If the requested size is only 1 basic block it
- * will never straddle a sector boundary, so this won't be
- * an issue. Nor will this be a problem if the log I/O is
- * done in basic blocks (sector size 1). But otherwise we
- * extend the buffer by one extra log sector to ensure
- * there's space to accommodate this possibility.
+ * In addition, the buffer may be used for a non-sector-aligned block
+ * offset, in which case an I/O of the requested size could extend
+ * beyond the end of the buffer. If the requested size is only 1 basic
+ * block it will never straddle a sector boundary, so this won't be an
+ * issue. Nor will this be a problem if the log I/O is done in basic
+ * blocks (sector size 1). But otherwise we extend the buffer by one
+ * extra log sector to ensure there's space to accommodate this
+ * possibility.
*/
if (nbblks > 1 && log->l_sectBBsize > 1)
nbblks += log->l_sectBBsize;
nbblks = round_up(nbblks, log->l_sectBBsize);
-
- bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, nbblks, 0);
- if (bp)
- xfs_buf_unlock(bp);
- return bp;
-}
-
-STATIC void
-xlog_put_bp(
- xfs_buf_t *bp)
-{
- xfs_buf_free(bp);
+ return kmem_alloc_large(BBTOB(nbblks), KM_MAYFAIL);
}
/*
* Return the address of the start of the given block number's data
* in a log buffer. The buffer covers a log sector-aligned region.
*/
-STATIC char *
+static inline unsigned int
xlog_align(
struct xlog *log,
- xfs_daddr_t blk_no,
- int nbblks,
- struct xfs_buf *bp)
+ xfs_daddr_t blk_no)
{
- xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1);
-
- ASSERT(offset + nbblks <= bp->b_length);
- return bp->b_addr + BBTOB(offset);
+ return BBTOB(blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1));
}
-
-/*
- * nbblks should be uint, but oh well. Just want to catch that 32-bit length.
- */
-STATIC int
-xlog_bread_noalign(
- struct xlog *log,
- xfs_daddr_t blk_no,
- int nbblks,
- struct xfs_buf *bp)
+static int
+xlog_do_io(
+ struct xlog *log,
+ xfs_daddr_t blk_no,
+ unsigned int nbblks,
+ char *data,
+ unsigned int op)
{
- int error;
+ int error;
- if (!xlog_verify_bp(log, blk_no, nbblks)) {
+ if (!xlog_verify_bno(log, blk_no, nbblks)) {
xfs_warn(log->l_mp,
"Invalid log block/length (0x%llx, 0x%x) for buffer",
blk_no, nbblks);
@@ -187,107 +160,53 @@ xlog_bread_noalign(
blk_no = round_down(blk_no, log->l_sectBBsize);
nbblks = round_up(nbblks, log->l_sectBBsize);
-
ASSERT(nbblks > 0);
- ASSERT(nbblks <= bp->b_length);
-
- XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
- bp->b_flags |= XBF_READ;
- bp->b_io_length = nbblks;
- bp->b_error = 0;
- error = xfs_buf_submit(bp);
- if (error && !XFS_FORCED_SHUTDOWN(log->l_mp))
- xfs_buf_ioerror_alert(bp, __func__);
+ error = xfs_rw_bdev(log->l_targ->bt_bdev, log->l_logBBstart + blk_no,
+ BBTOB(nbblks), data, op);
+ if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) {
+ xfs_alert(log->l_mp,
+ "log recovery %s I/O error at daddr 0x%llx len %d error %d",
+ op == REQ_OP_WRITE ? "write" : "read",
+ blk_no, nbblks, error);
+ }
return error;
}
STATIC int
-xlog_bread(
+xlog_bread_noalign(
struct xlog *log,
xfs_daddr_t blk_no,
int nbblks,
- struct xfs_buf *bp,
- char **offset)
+ char *data)
{
- int error;
-
- error = xlog_bread_noalign(log, blk_no, nbblks, bp);
- if (error)
- return error;
-
- *offset = xlog_align(log, blk_no, nbblks, bp);
- return 0;
+ return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ);
}
-/*
- * Read at an offset into the buffer. Returns with the buffer in it's original
- * state regardless of the result of the read.
- */
STATIC int
-xlog_bread_offset(
+xlog_bread(
struct xlog *log,
- xfs_daddr_t blk_no, /* block to read from */
- int nbblks, /* blocks to read */
- struct xfs_buf *bp,
- char *offset)
+ xfs_daddr_t blk_no,
+ int nbblks,
+ char *data,
+ char **offset)
{
- char *orig_offset = bp->b_addr;
- int orig_len = BBTOB(bp->b_length);
- int error, error2;
-
- error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks));
- if (error)
- return error;
-
- error = xlog_bread_noalign(log, blk_no, nbblks, bp);
+ int error;
- /* must reset buffer pointer even on error */
- error2 = xfs_buf_associate_memory(bp, orig_offset, orig_len);
- if (error)
- return error;
- return error2;
+ error = xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ);
+ if (!error)
+ *offset = data + xlog_align(log, blk_no);
+ return error;
}
-/*
- * Write out the buffer at the given block for the given number of blocks.
- * The buffer is kept locked across the write and is returned locked.
- * This can only be used for synchronous log writes.
- */
STATIC int
xlog_bwrite(
struct xlog *log,
xfs_daddr_t blk_no,
int nbblks,
- struct xfs_buf *bp)
+ char *data)
{
- int error;
-
- if (!xlog_verify_bp(log, blk_no, nbblks)) {
- xfs_warn(log->l_mp,
- "Invalid log block/length (0x%llx, 0x%x) for buffer",
- blk_no, nbblks);
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
- return -EFSCORRUPTED;
- }
-
- blk_no = round_down(blk_no, log->l_sectBBsize);
- nbblks = round_up(nbblks, log->l_sectBBsize);
-
- ASSERT(nbblks > 0);
- ASSERT(nbblks <= bp->b_length);
-
- XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
- xfs_buf_hold(bp);
- xfs_buf_lock(bp);
- bp->b_io_length = nbblks;
- bp->b_error = 0;
-
- error = xfs_bwrite(bp);
- if (error)
- xfs_buf_ioerror_alert(bp, __func__);
- xfs_buf_relse(bp);
- return error;
+ return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_WRITE);
}
#ifdef DEBUG
@@ -377,10 +296,9 @@ xlog_recover_iodone(
* We're not going to bother about retrying
* this during recovery. One strike!
*/
- if (!XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
+ if (!XFS_FORCED_SHUTDOWN(bp->b_mount)) {
xfs_buf_ioerror_alert(bp, __func__);
- xfs_force_shutdown(bp->b_target->bt_mount,
- SHUTDOWN_META_IO_ERROR);
+ xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
}
}
@@ -405,7 +323,7 @@ xlog_recover_iodone(
STATIC int
xlog_find_cycle_start(
struct xlog *log,
- struct xfs_buf *bp,
+ char *buffer,
xfs_daddr_t first_blk,
xfs_daddr_t *last_blk,
uint cycle)
@@ -419,7 +337,7 @@ xlog_find_cycle_start(
end_blk = *last_blk;
mid_blk = BLK_AVG(first_blk, end_blk);
while (mid_blk != first_blk && mid_blk != end_blk) {
- error = xlog_bread(log, mid_blk, 1, bp, &offset);
+ error = xlog_bread(log, mid_blk, 1, buffer, &offset);
if (error)
return error;
mid_cycle = xlog_get_cycle(offset);
@@ -455,7 +373,7 @@ xlog_find_verify_cycle(
{
xfs_daddr_t i, j;
uint cycle;
- xfs_buf_t *bp;
+ char *buffer;
xfs_daddr_t bufblks;
char *buf = NULL;
int error = 0;
@@ -469,7 +387,7 @@ xlog_find_verify_cycle(
bufblks = 1 << ffs(nbblks);
while (bufblks > log->l_logBBsize)
bufblks >>= 1;
- while (!(bp = xlog_get_bp(log, bufblks))) {
+ while (!(buffer = xlog_alloc_buffer(log, bufblks))) {
bufblks >>= 1;
if (bufblks < log->l_sectBBsize)
return -ENOMEM;
@@ -480,7 +398,7 @@ xlog_find_verify_cycle(
bcount = min(bufblks, (start_blk + nbblks - i));
- error = xlog_bread(log, i, bcount, bp, &buf);
+ error = xlog_bread(log, i, bcount, buffer, &buf);
if (error)
goto out;
@@ -498,7 +416,7 @@ xlog_find_verify_cycle(
*new_blk = -1;
out:
- xlog_put_bp(bp);
+ kmem_free(buffer);
return error;
}
@@ -522,7 +440,7 @@ xlog_find_verify_log_record(
int extra_bblks)
{
xfs_daddr_t i;
- xfs_buf_t *bp;
+ char *buffer;
char *offset = NULL;
xlog_rec_header_t *head = NULL;
int error = 0;
@@ -532,12 +450,14 @@ xlog_find_verify_log_record(
ASSERT(start_blk != 0 || *last_blk != start_blk);
- if (!(bp = xlog_get_bp(log, num_blks))) {
- if (!(bp = xlog_get_bp(log, 1)))
+ buffer = xlog_alloc_buffer(log, num_blks);
+ if (!buffer) {
+ buffer = xlog_alloc_buffer(log, 1);
+ if (!buffer)
return -ENOMEM;
smallmem = 1;
} else {
- error = xlog_bread(log, start_blk, num_blks, bp, &offset);
+ error = xlog_bread(log, start_blk, num_blks, buffer, &offset);
if (error)
goto out;
offset += ((num_blks - 1) << BBSHIFT);
@@ -554,7 +474,7 @@ xlog_find_verify_log_record(
}
if (smallmem) {
- error = xlog_bread(log, i, 1, bp, &offset);
+ error = xlog_bread(log, i, 1, buffer, &offset);
if (error)
goto out;
}
@@ -607,7 +527,7 @@ xlog_find_verify_log_record(
*last_blk = i;
out:
- xlog_put_bp(bp);
+ kmem_free(buffer);
return error;
}
@@ -629,7 +549,7 @@ xlog_find_head(
struct xlog *log,
xfs_daddr_t *return_head_blk)
{
- xfs_buf_t *bp;
+ char *buffer;
char *offset;
xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk;
int num_scan_bblks;
@@ -659,20 +579,20 @@ xlog_find_head(
}
first_blk = 0; /* get cycle # of 1st block */
- bp = xlog_get_bp(log, 1);
- if (!bp)
+ buffer = xlog_alloc_buffer(log, 1);
+ if (!buffer)
return -ENOMEM;
- error = xlog_bread(log, 0, 1, bp, &offset);
+ error = xlog_bread(log, 0, 1, buffer, &offset);
if (error)
- goto bp_err;
+ goto out_free_buffer;
first_half_cycle = xlog_get_cycle(offset);
last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */
- error = xlog_bread(log, last_blk, 1, bp, &offset);
+ error = xlog_bread(log, last_blk, 1, buffer, &offset);
if (error)
- goto bp_err;
+ goto out_free_buffer;
last_half_cycle = xlog_get_cycle(offset);
ASSERT(last_half_cycle != 0);
@@ -740,9 +660,10 @@ xlog_find_head(
* ^ we want to locate this spot
*/
stop_on_cycle = last_half_cycle;
- if ((error = xlog_find_cycle_start(log, bp, first_blk,
- &head_blk, last_half_cycle)))
- goto bp_err;
+ error = xlog_find_cycle_start(log, buffer, first_blk, &head_blk,
+ last_half_cycle);
+ if (error)
+ goto out_free_buffer;
}
/*
@@ -762,7 +683,7 @@ xlog_find_head(
if ((error = xlog_find_verify_cycle(log,
start_blk, num_scan_bblks,
stop_on_cycle, &new_blk)))
- goto bp_err;
+ goto out_free_buffer;
if (new_blk != -1)
head_blk = new_blk;
} else { /* need to read 2 parts of log */
@@ -799,7 +720,7 @@ xlog_find_head(
if ((error = xlog_find_verify_cycle(log, start_blk,
num_scan_bblks - (int)head_blk,
(stop_on_cycle - 1), &new_blk)))
- goto bp_err;
+ goto out_free_buffer;
if (new_blk != -1) {
head_blk = new_blk;
goto validate_head;
@@ -815,7 +736,7 @@ xlog_find_head(
if ((error = xlog_find_verify_cycle(log,
start_blk, (int)head_blk,
stop_on_cycle, &new_blk)))
- goto bp_err;
+ goto out_free_buffer;
if (new_blk != -1)
head_blk = new_blk;
}
@@ -834,13 +755,13 @@ validate_head:
if (error == 1)
error = -EIO;
if (error)
- goto bp_err;
+ goto out_free_buffer;
} else {
start_blk = 0;
ASSERT(head_blk <= INT_MAX);
error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0);
if (error < 0)
- goto bp_err;
+ goto out_free_buffer;
if (error == 1) {
/* We hit the beginning of the log during our search */
start_blk = log_bbnum - (num_scan_bblks - head_blk);
@@ -853,14 +774,14 @@ validate_head:
if (error == 1)
error = -EIO;
if (error)
- goto bp_err;
+ goto out_free_buffer;
if (new_blk != log_bbnum)
head_blk = new_blk;
} else if (error)
- goto bp_err;
+ goto out_free_buffer;
}
- xlog_put_bp(bp);
+ kmem_free(buffer);
if (head_blk == log_bbnum)
*return_head_blk = 0;
else
@@ -873,9 +794,8 @@ validate_head:
*/
return 0;
- bp_err:
- xlog_put_bp(bp);
-
+out_free_buffer:
+ kmem_free(buffer);
if (error)
xfs_warn(log->l_mp, "failed to find log head");
return error;
@@ -895,7 +815,7 @@ xlog_rseek_logrec_hdr(
xfs_daddr_t head_blk,
xfs_daddr_t tail_blk,
int count,
- struct xfs_buf *bp,
+ char *buffer,
xfs_daddr_t *rblk,
struct xlog_rec_header **rhead,
bool *wrapped)
@@ -914,7 +834,7 @@ xlog_rseek_logrec_hdr(
*/
end_blk = head_blk > tail_blk ? tail_blk : 0;
for (i = (int) head_blk - 1; i >= end_blk; i--) {
- error = xlog_bread(log, i, 1, bp, &offset);
+ error = xlog_bread(log, i, 1, buffer, &offset);
if (error)
goto out_error;
@@ -933,7 +853,7 @@ xlog_rseek_logrec_hdr(
*/
if (tail_blk >= head_blk && found != count) {
for (i = log->l_logBBsize - 1; i >= (int) tail_blk; i--) {
- error = xlog_bread(log, i, 1, bp, &offset);
+ error = xlog_bread(log, i, 1, buffer, &offset);
if (error)
goto out_error;
@@ -969,7 +889,7 @@ xlog_seek_logrec_hdr(
xfs_daddr_t head_blk,
xfs_daddr_t tail_blk,
int count,
- struct xfs_buf *bp,
+ char *buffer,
xfs_daddr_t *rblk,
struct xlog_rec_header **rhead,
bool *wrapped)
@@ -988,7 +908,7 @@ xlog_seek_logrec_hdr(
*/
end_blk = head_blk > tail_blk ? head_blk : log->l_logBBsize - 1;
for (i = (int) tail_blk; i <= end_blk; i++) {
- error = xlog_bread(log, i, 1, bp, &offset);
+ error = xlog_bread(log, i, 1, buffer, &offset);
if (error)
goto out_error;
@@ -1006,7 +926,7 @@ xlog_seek_logrec_hdr(
*/
if (tail_blk > head_blk && found != count) {
for (i = 0; i < (int) head_blk; i++) {
- error = xlog_bread(log, i, 1, bp, &offset);
+ error = xlog_bread(log, i, 1, buffer, &offset);
if (error)
goto out_error;
@@ -1069,22 +989,22 @@ xlog_verify_tail(
int hsize)
{
struct xlog_rec_header *thead;
- struct xfs_buf *bp;
+ char *buffer;
xfs_daddr_t first_bad;
int error = 0;
bool wrapped;
xfs_daddr_t tmp_tail;
xfs_daddr_t orig_tail = *tail_blk;
- bp = xlog_get_bp(log, 1);
- if (!bp)
+ buffer = xlog_alloc_buffer(log, 1);
+ if (!buffer)
return -ENOMEM;
/*
* Make sure the tail points to a record (returns positive count on
* success).
*/
- error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, bp,
+ error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, buffer,
&tmp_tail, &thead, &wrapped);
if (error < 0)
goto out;
@@ -1113,8 +1033,8 @@ xlog_verify_tail(
break;
/* skip to the next record; returns positive count on success */
- error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2, bp,
- &tmp_tail, &thead, &wrapped);
+ error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2,
+ buffer, &tmp_tail, &thead, &wrapped);
if (error < 0)
goto out;
@@ -1129,7 +1049,7 @@ xlog_verify_tail(
"Tail block (0x%llx) overwrite detected. Updated to 0x%llx",
orig_tail, *tail_blk);
out:
- xlog_put_bp(bp);
+ kmem_free(buffer);
return error;
}
@@ -1151,13 +1071,13 @@ xlog_verify_head(
struct xlog *log,
xfs_daddr_t *head_blk, /* in/out: unverified head */
xfs_daddr_t *tail_blk, /* out: tail block */
- struct xfs_buf *bp,
+ char *buffer,
xfs_daddr_t *rhead_blk, /* start blk of last record */
struct xlog_rec_header **rhead, /* ptr to last record */
bool *wrapped) /* last rec. wraps phys. log */
{
struct xlog_rec_header *tmp_rhead;
- struct xfs_buf *tmp_bp;
+ char *tmp_buffer;
xfs_daddr_t first_bad;
xfs_daddr_t tmp_rhead_blk;
int found;
@@ -1168,15 +1088,15 @@ xlog_verify_head(
* Check the head of the log for torn writes. Search backwards from the
* head until we hit the tail or the maximum number of log record I/Os
* that could have been in flight at one time. Use a temporary buffer so
- * we don't trash the rhead/bp pointers from the caller.
+ * we don't trash the rhead/buffer pointers from the caller.
*/
- tmp_bp = xlog_get_bp(log, 1);
- if (!tmp_bp)
+ tmp_buffer = xlog_alloc_buffer(log, 1);
+ if (!tmp_buffer)
return -ENOMEM;
error = xlog_rseek_logrec_hdr(log, *head_blk, *tail_blk,
- XLOG_MAX_ICLOGS, tmp_bp, &tmp_rhead_blk,
- &tmp_rhead, &tmp_wrapped);
- xlog_put_bp(tmp_bp);
+ XLOG_MAX_ICLOGS, tmp_buffer,
+ &tmp_rhead_blk, &tmp_rhead, &tmp_wrapped);
+ kmem_free(tmp_buffer);
if (error < 0)
return error;
@@ -1205,8 +1125,8 @@ xlog_verify_head(
* (i.e., the records with invalid CRC) if the cycle number
* matches the the current cycle.
*/
- found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, bp,
- rhead_blk, rhead, wrapped);
+ found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1,
+ buffer, rhead_blk, rhead, wrapped);
if (found < 0)
return found;
if (found == 0) /* XXX: right thing to do here? */
@@ -1266,7 +1186,7 @@ xlog_check_unmount_rec(
xfs_daddr_t *tail_blk,
struct xlog_rec_header *rhead,
xfs_daddr_t rhead_blk,
- struct xfs_buf *bp,
+ char *buffer,
bool *clean)
{
struct xlog_op_header *op_head;
@@ -1309,7 +1229,7 @@ xlog_check_unmount_rec(
if (*head_blk == after_umount_blk &&
be32_to_cpu(rhead->h_num_logops) == 1) {
umount_data_blk = xlog_wrap_logbno(log, rhead_blk + hblks);
- error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
+ error = xlog_bread(log, umount_data_blk, 1, buffer, &offset);
if (error)
return error;
@@ -1388,7 +1308,7 @@ xlog_find_tail(
{
xlog_rec_header_t *rhead;
char *offset = NULL;
- xfs_buf_t *bp;
+ char *buffer;
int error;
xfs_daddr_t rhead_blk;
xfs_lsn_t tail_lsn;
@@ -1402,11 +1322,11 @@ xlog_find_tail(
return error;
ASSERT(*head_blk < INT_MAX);
- bp = xlog_get_bp(log, 1);
- if (!bp)
+ buffer = xlog_alloc_buffer(log, 1);
+ if (!buffer)
return -ENOMEM;
if (*head_blk == 0) { /* special case */
- error = xlog_bread(log, 0, 1, bp, &offset);
+ error = xlog_bread(log, 0, 1, buffer, &offset);
if (error)
goto done;
@@ -1422,7 +1342,7 @@ xlog_find_tail(
* block. This wraps all the way back around to the head so something is
* seriously wrong if we can't find it.
*/
- error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp,
+ error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, buffer,
&rhead_blk, &rhead, &wrapped);
if (error < 0)
return error;
@@ -1443,7 +1363,7 @@ xlog_find_tail(
* state to determine whether recovery is necessary.
*/
error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead,
- rhead_blk, bp, &clean);
+ rhead_blk, buffer, &clean);
if (error)
goto done;
@@ -1460,7 +1380,7 @@ xlog_find_tail(
if (!clean) {
xfs_daddr_t orig_head = *head_blk;
- error = xlog_verify_head(log, head_blk, tail_blk, bp,
+ error = xlog_verify_head(log, head_blk, tail_blk, buffer,
&rhead_blk, &rhead, &wrapped);
if (error)
goto done;
@@ -1471,7 +1391,7 @@ xlog_find_tail(
wrapped);
tail_lsn = atomic64_read(&log->l_tail_lsn);
error = xlog_check_unmount_rec(log, head_blk, tail_blk,
- rhead, rhead_blk, bp,
+ rhead, rhead_blk, buffer,
&clean);
if (error)
goto done;
@@ -1505,11 +1425,11 @@ xlog_find_tail(
* But... if the -device- itself is readonly, just skip this.
* We can't recover this device anyway, so it won't matter.
*/
- if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp))
+ if (!xfs_readonly_buftarg(log->l_targ))
error = xlog_clear_stale_blocks(log, tail_lsn);
done:
- xlog_put_bp(bp);
+ kmem_free(buffer);
if (error)
xfs_warn(log->l_mp, "failed to locate log tail");
@@ -1537,7 +1457,7 @@ xlog_find_zeroed(
struct xlog *log,
xfs_daddr_t *blk_no)
{
- xfs_buf_t *bp;
+ char *buffer;
char *offset;
uint first_cycle, last_cycle;
xfs_daddr_t new_blk, last_blk, start_blk;
@@ -1547,35 +1467,36 @@ xlog_find_zeroed(
*blk_no = 0;
/* check totally zeroed log */
- bp = xlog_get_bp(log, 1);
- if (!bp)
+ buffer = xlog_alloc_buffer(log, 1);
+ if (!buffer)
return -ENOMEM;
- error = xlog_bread(log, 0, 1, bp, &offset);
+ error = xlog_bread(log, 0, 1, buffer, &offset);
if (error)
- goto bp_err;
+ goto out_free_buffer;
first_cycle = xlog_get_cycle(offset);
if (first_cycle == 0) { /* completely zeroed log */
*blk_no = 0;
- xlog_put_bp(bp);
+ kmem_free(buffer);
return 1;
}
/* check partially zeroed log */
- error = xlog_bread(log, log_bbnum-1, 1, bp, &offset);
+ error = xlog_bread(log, log_bbnum-1, 1, buffer, &offset);
if (error)
- goto bp_err;
+ goto out_free_buffer;
last_cycle = xlog_get_cycle(offset);
if (last_cycle != 0) { /* log completely written to */
- xlog_put_bp(bp);
+ kmem_free(buffer);
return 0;
}
/* we have a partially zeroed log */
last_blk = log_bbnum-1;
- if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0)))
- goto bp_err;
+ error = xlog_find_cycle_start(log, buffer, 0, &last_blk, 0);
+ if (error)
+ goto out_free_buffer;
/*
* Validate the answer. Because there is no way to guarantee that
@@ -1598,7 +1519,7 @@ xlog_find_zeroed(
*/
if ((error = xlog_find_verify_cycle(log, start_blk,
(int)num_scan_bblks, 0, &new_blk)))
- goto bp_err;
+ goto out_free_buffer;
if (new_blk != -1)
last_blk = new_blk;
@@ -1610,11 +1531,11 @@ xlog_find_zeroed(
if (error == 1)
error = -EIO;
if (error)
- goto bp_err;
+ goto out_free_buffer;
*blk_no = last_blk;
-bp_err:
- xlog_put_bp(bp);
+out_free_buffer:
+ kmem_free(buffer);
if (error)
return error;
return 1;
@@ -1657,7 +1578,7 @@ xlog_write_log_records(
int tail_block)
{
char *offset;
- xfs_buf_t *bp;
+ char *buffer;
int balign, ealign;
int sectbb = log->l_sectBBsize;
int end_block = start_block + blocks;
@@ -1674,7 +1595,7 @@ xlog_write_log_records(
bufblks = 1 << ffs(blocks);
while (bufblks > log->l_logBBsize)
bufblks >>= 1;
- while (!(bp = xlog_get_bp(log, bufblks))) {
+ while (!(buffer = xlog_alloc_buffer(log, bufblks))) {
bufblks >>= 1;
if (bufblks < sectbb)
return -ENOMEM;
@@ -1686,9 +1607,9 @@ xlog_write_log_records(
*/
balign = round_down(start_block, sectbb);
if (balign != start_block) {
- error = xlog_bread_noalign(log, start_block, 1, bp);
+ error = xlog_bread_noalign(log, start_block, 1, buffer);
if (error)
- goto out_put_bp;
+ goto out_free_buffer;
j = start_block - balign;
}
@@ -1705,29 +1626,28 @@ xlog_write_log_records(
*/
ealign = round_down(end_block, sectbb);
if (j == 0 && (start_block + endcount > ealign)) {
- offset = bp->b_addr + BBTOB(ealign - start_block);
- error = xlog_bread_offset(log, ealign, sectbb,
- bp, offset);
+ error = xlog_bread_noalign(log, ealign, sectbb,
+ buffer + BBTOB(ealign - start_block));
if (error)
break;
}
- offset = xlog_align(log, start_block, endcount, bp);
+ offset = buffer + xlog_align(log, start_block);
for (; j < endcount; j++) {
xlog_add_record(log, offset, cycle, i+j,
tail_cycle, tail_block);
offset += BBSIZE;
}
- error = xlog_bwrite(log, start_block, endcount, bp);
+ error = xlog_bwrite(log, start_block, endcount, buffer);
if (error)
break;
start_block += endcount;
j = 0;
}
- out_put_bp:
- xlog_put_bp(bp);
+out_free_buffer:
+ kmem_free(buffer);
return error;
}
@@ -2162,7 +2082,7 @@ xlog_recover_do_inode_buffer(
if (xfs_sb_version_hascrc(&mp->m_sb))
bp->b_ops = &xfs_inode_buf_ops;
- inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog;
+ inodes_per_buf = BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog;
for (i = 0; i < inodes_per_buf; i++) {
next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
offsetof(xfs_dinode_t, di_next_unlinked);
@@ -2204,8 +2124,7 @@ xlog_recover_do_inode_buffer(
ASSERT(item->ri_buf[item_index].i_addr != NULL);
ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
- ASSERT((reg_buf_offset + reg_buf_bytes) <=
- BBTOB(bp->b_io_length));
+ ASSERT((reg_buf_offset + reg_buf_bytes) <= BBTOB(bp->b_length));
/*
* The current logged region contains a copy of the
@@ -2670,7 +2589,7 @@ xlog_recover_do_reg_buffer(
ASSERT(nbits > 0);
ASSERT(item->ri_buf[i].i_addr != NULL);
ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
- ASSERT(BBTOB(bp->b_io_length) >=
+ ASSERT(BBTOB(bp->b_length) >=
((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
/*
@@ -2882,23 +2801,22 @@ xlog_recover_buffer_pass2(
*
* Also make sure that only inode buffers with good sizes stay in
* the buffer cache. The kernel moves inodes in buffers of 1 block
- * or mp->m_inode_cluster_size bytes, whichever is bigger. The inode
+ * or inode_cluster_size bytes, whichever is bigger. The inode
* buffers in the log can be a different size if the log was generated
* by an older kernel using unclustered inode buffers or a newer kernel
* running with a different inode cluster size. Regardless, if the
- * the inode buffer size isn't max(blocksize, mp->m_inode_cluster_size)
- * for *our* value of mp->m_inode_cluster_size, then we need to keep
+ * the inode buffer size isn't max(blocksize, inode_cluster_size)
+ * for *our* value of inode_cluster_size, then we need to keep
* the buffer out of the buffer cache so that the buffer won't
* overlap with future reads of those inodes.
*/
if (XFS_DINODE_MAGIC ==
be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
- (BBTOB(bp->b_io_length) != max(log->l_mp->m_sb.sb_blocksize,
- (uint32_t)log->l_mp->m_inode_cluster_size))) {
+ (BBTOB(bp->b_length) != M_IGEO(log->l_mp)->inode_cluster_size)) {
xfs_buf_stale(bp);
error = xfs_bwrite(bp);
} else {
- ASSERT(bp->b_target->bt_mount == mp);
+ ASSERT(bp->b_mount == mp);
bp->b_iodone = xlog_recover_iodone;
xfs_buf_delwri_queue(bp, buffer_list);
}
@@ -3260,7 +3178,7 @@ out_owner_change:
/* re-generate the checksum. */
xfs_dinode_calc_crc(log->l_mp, dip);
- ASSERT(bp->b_target->bt_mount == mp);
+ ASSERT(bp->b_mount == mp);
bp->b_iodone = xlog_recover_iodone;
xfs_buf_delwri_queue(bp, buffer_list);
@@ -3399,7 +3317,7 @@ xlog_recover_dquot_pass2(
}
ASSERT(dq_f->qlf_size == 2);
- ASSERT(bp->b_target->bt_mount == mp);
+ ASSERT(bp->b_mount == mp);
bp->b_iodone = xlog_recover_iodone;
xfs_buf_delwri_queue(bp, buffer_list);
@@ -3463,7 +3381,7 @@ xlog_recover_efd_pass2(
{
xfs_efd_log_format_t *efd_formatp;
xfs_efi_log_item_t *efip = NULL;
- xfs_log_item_t *lip;
+ struct xfs_log_item *lip;
uint64_t efi_id;
struct xfs_ail_cursor cur;
struct xfs_ail *ailp = log->l_ailp;
@@ -3849,6 +3767,7 @@ xlog_recover_do_icreate_pass2(
{
struct xfs_mount *mp = log->l_mp;
struct xfs_icreate_log *icl;
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
xfs_agnumber_t agno;
xfs_agblock_t agbno;
unsigned int count;
@@ -3898,10 +3817,10 @@ xlog_recover_do_icreate_pass2(
/*
* The inode chunk is either full or sparse and we only support
- * m_ialloc_min_blks sized sparse allocations at this time.
+ * m_ino_geo.ialloc_min_blks sized sparse allocations at this time.
*/
- if (length != mp->m_ialloc_blks &&
- length != mp->m_ialloc_min_blks) {
+ if (length != igeo->ialloc_blks &&
+ length != igeo->ialloc_min_blks) {
xfs_warn(log->l_mp,
"%s: unsupported chunk length", __FUNCTION__);
return -EINVAL;
@@ -3921,13 +3840,13 @@ xlog_recover_do_icreate_pass2(
* buffers for cancellation so we don't overwrite anything written after
* a cancellation.
*/
- bb_per_cluster = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster);
- nbufs = length / mp->m_blocks_per_cluster;
+ bb_per_cluster = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster);
+ nbufs = length / igeo->blocks_per_cluster;
for (i = 0, cancel_count = 0; i < nbufs; i++) {
xfs_daddr_t daddr;
daddr = XFS_AGB_TO_DADDR(mp, agno,
- agbno + i * mp->m_blocks_per_cluster);
+ agbno + i * igeo->blocks_per_cluster);
if (xlog_check_buffer_cancelled(log, daddr, bb_per_cluster, 0))
cancel_count++;
}
@@ -4956,12 +4875,11 @@ out:
* A cancel occurs when the mount has failed and we're bailing out.
* Release all pending log intent items so they don't pin the AIL.
*/
-STATIC int
+STATIC void
xlog_recover_cancel_intents(
struct xlog *log)
{
struct xfs_log_item *lip;
- int error = 0;
struct xfs_ail_cursor cur;
struct xfs_ail *ailp;
@@ -5001,7 +4919,6 @@ xlog_recover_cancel_intents(
xfs_trans_ail_cursor_done(&cur);
spin_unlock(&ailp->ail_lock);
- return error;
}
/*
@@ -5307,7 +5224,7 @@ xlog_do_recovery_pass(
xfs_daddr_t blk_no, rblk_no;
xfs_daddr_t rhead_blk;
char *offset;
- xfs_buf_t *hbp, *dbp;
+ char *hbp, *dbp;
int error = 0, h_size, h_len;
int error2 = 0;
int bblks, split_bblks;
@@ -5332,7 +5249,7 @@ xlog_do_recovery_pass(
* iclog header and extract the header size from it. Get a
* new hbp that is the correct size.
*/
- hbp = xlog_get_bp(log, 1);
+ hbp = xlog_alloc_buffer(log, 1);
if (!hbp)
return -ENOMEM;
@@ -5374,23 +5291,23 @@ xlog_do_recovery_pass(
hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
if (h_size % XLOG_HEADER_CYCLE_SIZE)
hblks++;
- xlog_put_bp(hbp);
- hbp = xlog_get_bp(log, hblks);
+ kmem_free(hbp);
+ hbp = xlog_alloc_buffer(log, hblks);
} else {
hblks = 1;
}
} else {
ASSERT(log->l_sectBBsize == 1);
hblks = 1;
- hbp = xlog_get_bp(log, 1);
+ hbp = xlog_alloc_buffer(log, 1);
h_size = XLOG_BIG_RECORD_BSIZE;
}
if (!hbp)
return -ENOMEM;
- dbp = xlog_get_bp(log, BTOBB(h_size));
+ dbp = xlog_alloc_buffer(log, BTOBB(h_size));
if (!dbp) {
- xlog_put_bp(hbp);
+ kmem_free(hbp);
return -ENOMEM;
}
@@ -5405,7 +5322,7 @@ xlog_do_recovery_pass(
/*
* Check for header wrapping around physical end-of-log
*/
- offset = hbp->b_addr;
+ offset = hbp;
split_hblks = 0;
wrapped_hblks = 0;
if (blk_no + hblks <= log->l_logBBsize) {
@@ -5441,8 +5358,8 @@ xlog_do_recovery_pass(
* - order is important.
*/
wrapped_hblks = hblks - split_hblks;
- error = xlog_bread_offset(log, 0,
- wrapped_hblks, hbp,
+ error = xlog_bread_noalign(log, 0,
+ wrapped_hblks,
offset + BBTOB(split_hblks));
if (error)
goto bread_err2;
@@ -5473,7 +5390,7 @@ xlog_do_recovery_pass(
} else {
/* This log record is split across the
* physical end of log */
- offset = dbp->b_addr;
+ offset = dbp;
split_bblks = 0;
if (blk_no != log->l_logBBsize) {
/* some data is before the physical
@@ -5502,8 +5419,8 @@ xlog_do_recovery_pass(
* _first_, then the log start (LR header end)
* - order is important.
*/
- error = xlog_bread_offset(log, 0,
- bblks - split_bblks, dbp,
+ error = xlog_bread_noalign(log, 0,
+ bblks - split_bblks,
offset + BBTOB(split_bblks));
if (error)
goto bread_err2;
@@ -5551,9 +5468,9 @@ xlog_do_recovery_pass(
}
bread_err2:
- xlog_put_bp(dbp);
+ kmem_free(dbp);
bread_err1:
- xlog_put_bp(hbp);
+ kmem_free(hbp);
/*
* Submit buffers that have been added from the last record processed,
@@ -5687,7 +5604,7 @@ xlog_do_recover(
* Now that we've finished replaying all buffer and inode
* updates, re-read in the superblock and reverify it.
*/
- bp = xfs_getsb(mp, 0);
+ bp = xfs_getsb(mp);
bp->b_flags &= ~(XBF_DONE | XBF_ASYNC);
ASSERT(!(bp->b_flags & XBF_WRITE));
bp->b_flags |= XBF_READ;
@@ -5860,16 +5777,12 @@ xlog_recover_finish(
return 0;
}
-int
+void
xlog_recover_cancel(
struct xlog *log)
{
- int error = 0;
-
if (log->l_flags & XLOG_RECOVERY_NEEDED)
- error = xlog_recover_cancel_intents(log);
-
- return error;
+ xlog_recover_cancel_intents(log);
}
#if defined(DEBUG)
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
index 6b736ea58d35..9804efe525a9 100644
--- a/fs/xfs/xfs_message.c
+++ b/fs/xfs/xfs_message.c
@@ -6,8 +6,8 @@
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_error.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 6b2bfe81dc51..322da6909290 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -12,9 +12,6 @@
#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_dir2.h"
#include "xfs_ialloc.h"
@@ -27,7 +24,6 @@
#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_fsops.h"
-#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_sysfs.h"
#include "xfs_rmap_btree.h"
@@ -430,30 +426,6 @@ xfs_update_alignment(xfs_mount_t *mp)
}
/*
- * Set the maximum inode count for this filesystem
- */
-STATIC void
-xfs_set_maxicount(xfs_mount_t *mp)
-{
- xfs_sb_t *sbp = &(mp->m_sb);
- uint64_t icount;
-
- if (sbp->sb_imax_pct) {
- /*
- * Make sure the maximum inode count is a multiple
- * of the units we allocate inodes in.
- */
- icount = sbp->sb_dblocks * sbp->sb_imax_pct;
- do_div(icount, 100);
- do_div(icount, mp->m_ialloc_blks);
- mp->m_maxicount = (icount * mp->m_ialloc_blks) <<
- sbp->sb_inopblog;
- } else {
- mp->m_maxicount = 0;
- }
-}
-
-/*
* Set the default minimum read and write sizes unless
* already specified in a mount option.
* We use smaller I/O sizes when the file system
@@ -509,29 +481,6 @@ xfs_set_low_space_thresholds(
}
}
-
-/*
- * Set whether we're using inode alignment.
- */
-STATIC void
-xfs_set_inoalignment(xfs_mount_t *mp)
-{
- if (xfs_sb_version_hasalign(&mp->m_sb) &&
- mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
- mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
- else
- mp->m_inoalign_mask = 0;
- /*
- * If we are using stripe alignment, check whether
- * the stripe unit is a multiple of the inode alignment
- */
- if (mp->m_dalign && mp->m_inoalign_mask &&
- !(mp->m_dalign & mp->m_inoalign_mask))
- mp->m_sinoalign = mp->m_dalign;
- else
- mp->m_sinoalign = 0;
-}
-
/*
* Check that the data (and log if separate) is an ok size.
*/
@@ -683,6 +632,7 @@ xfs_mountfs(
{
struct xfs_sb *sbp = &(mp->m_sb);
struct xfs_inode *rip;
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
uint64_t resblks;
uint quotamount = 0;
uint quotaflags = 0;
@@ -749,12 +699,10 @@ xfs_mountfs(
xfs_alloc_compute_maxlevels(mp);
xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
- xfs_ialloc_compute_maxlevels(mp);
+ xfs_ialloc_setup_geometry(mp);
xfs_rmapbt_compute_maxlevels(mp);
xfs_refcountbt_compute_maxlevels(mp);
- xfs_set_maxicount(mp);
-
/* enable fail_at_unmount as default */
mp->m_fail_unmount = true;
@@ -788,50 +736,22 @@ xfs_mountfs(
xfs_set_low_space_thresholds(mp);
/*
- * Set the inode cluster size.
- * This may still be overridden by the file system
- * block size if it is larger than the chosen cluster size.
- *
- * For v5 filesystems, scale the cluster size with the inode size to
- * keep a constant ratio of inode per cluster buffer, but only if mkfs
- * has set the inode alignment value appropriately for larger cluster
- * sizes.
- */
- mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
- if (xfs_sb_version_hascrc(&mp->m_sb)) {
- int new_size = mp->m_inode_cluster_size;
-
- new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
- if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
- mp->m_inode_cluster_size = new_size;
- }
- mp->m_blocks_per_cluster = xfs_icluster_size_fsb(mp);
- mp->m_inodes_per_cluster = XFS_FSB_TO_INO(mp, mp->m_blocks_per_cluster);
- mp->m_cluster_align = xfs_ialloc_cluster_alignment(mp);
- mp->m_cluster_align_inodes = XFS_FSB_TO_INO(mp, mp->m_cluster_align);
-
- /*
* If enabled, sparse inode chunk alignment is expected to match the
* cluster size. Full inode chunk alignment must match the chunk size,
* but that is checked on sb read verification...
*/
if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
mp->m_sb.sb_spino_align !=
- XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) {
+ XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)) {
xfs_warn(mp,
"Sparse inode block alignment (%u) must match cluster size (%llu).",
mp->m_sb.sb_spino_align,
- XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size));
+ XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw));
error = -EINVAL;
goto out_remove_uuid;
}
/*
- * Set inode alignment fields
- */
- xfs_set_inoalignment(mp);
-
- /*
* Check that the data (and log if separate) is an ok size.
*/
error = xfs_check_sizes(mp);
@@ -1385,24 +1305,14 @@ xfs_mod_frextents(
* xfs_getsb() is called to obtain the buffer for the superblock.
* The buffer is returned locked and read in from disk.
* The buffer should be released with a call to xfs_brelse().
- *
- * If the flags parameter is BUF_TRYLOCK, then we'll only return
- * the superblock buffer if it can be locked without sleeping.
- * If it can't then we'll return NULL.
*/
struct xfs_buf *
xfs_getsb(
- struct xfs_mount *mp,
- int flags)
+ struct xfs_mount *mp)
{
struct xfs_buf *bp = mp->m_sb_bp;
- if (!xfs_buf_trylock(bp)) {
- if (flags & XBF_TRYLOCK)
- return NULL;
- xfs_buf_lock(bp);
- }
-
+ xfs_buf_lock(bp);
xfs_buf_hold(bp);
ASSERT(bp->b_flags & XBF_DONE);
return bp;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index c81a5cd7c228..4adb6837439a 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -105,6 +105,7 @@ typedef struct xfs_mount {
struct xfs_da_geometry *m_dir_geo; /* directory block geometry */
struct xfs_da_geometry *m_attr_geo; /* attribute block geometry */
struct xlog *m_log; /* log specific stuff */
+ struct xfs_ino_geometry m_ino_geo; /* inode geometry */
int m_logbufs; /* number of log buffers */
int m_logbsize; /* size of each log buffer */
uint m_rsumlevels; /* rt summary levels */
@@ -126,12 +127,6 @@ typedef struct xfs_mount {
uint8_t m_blkbit_log; /* blocklog + NBBY */
uint8_t m_blkbb_log; /* blocklog - BBSHIFT */
uint8_t m_agno_log; /* log #ag's */
- uint8_t m_agino_log; /* #bits for agino in inum */
- uint m_inode_cluster_size;/* min inode buf size */
- unsigned int m_inodes_per_cluster;
- unsigned int m_blocks_per_cluster;
- unsigned int m_cluster_align;
- unsigned int m_cluster_align_inodes;
uint m_blockmask; /* sb_blocksize-1 */
uint m_blockwsize; /* sb_blocksize in words */
uint m_blockwmask; /* blockwsize-1 */
@@ -139,15 +134,12 @@ typedef struct xfs_mount {
uint m_alloc_mnr[2]; /* min alloc btree records */
uint m_bmap_dmxr[2]; /* max bmap btree records */
uint m_bmap_dmnr[2]; /* min bmap btree records */
- uint m_inobt_mxr[2]; /* max inobt btree records */
- uint m_inobt_mnr[2]; /* min inobt btree records */
uint m_rmap_mxr[2]; /* max rmap btree records */
uint m_rmap_mnr[2]; /* min rmap btree records */
uint m_refc_mxr[2]; /* max refc btree records */
uint m_refc_mnr[2]; /* min refc btree records */
uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
- uint m_in_maxlevels; /* max inobt btree levels. */
uint m_rmap_maxlevels; /* max rmap btree levels */
uint m_refc_maxlevels; /* max refcount btree level */
xfs_extlen_t m_ag_prealloc_blocks; /* reserved ag blocks */
@@ -159,20 +151,13 @@ typedef struct xfs_mount {
int m_fixedfsid[2]; /* unchanged for life of FS */
uint64_t m_flags; /* global mount flags */
bool m_finobt_nores; /* no per-AG finobt resv. */
- int m_ialloc_inos; /* inodes in inode allocation */
- int m_ialloc_blks; /* blocks in inode allocation */
- int m_ialloc_min_blks;/* min blocks in sparse inode
- * allocation */
- int m_inoalign_mask;/* mask sb_inoalignmt if used */
uint m_qflags; /* quota status flags */
struct xfs_trans_resv m_resv; /* precomputed res values */
- uint64_t m_maxicount; /* maximum inode count */
uint64_t m_resblks; /* total reserved blocks */
uint64_t m_resblks_avail;/* available reserved blocks */
uint64_t m_resblks_save; /* reserved blks @ remount,ro */
int m_dalign; /* stripe unit */
int m_swidth; /* stripe width */
- int m_sinoalign; /* stripe unit inode alignment */
uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */
const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */
@@ -198,7 +183,6 @@ typedef struct xfs_mount {
struct workqueue_struct *m_unwritten_workqueue;
struct workqueue_struct *m_cil_workqueue;
struct workqueue_struct *m_reclaim_workqueue;
- struct workqueue_struct *m_log_workqueue;
struct workqueue_struct *m_eofblocks_workqueue;
struct workqueue_struct *m_sync_workqueue;
@@ -226,6 +210,8 @@ typedef struct xfs_mount {
#endif
} xfs_mount_t;
+#define M_IGEO(mp) (&(mp)->m_ino_geo)
+
/*
* Flags for m_flags.
*/
@@ -465,7 +451,7 @@ extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
bool reserved);
extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
-extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
+extern struct xfs_buf *xfs_getsb(xfs_mount_t *);
extern int xfs_readsb(xfs_mount_t *, int);
extern void xfs_freesb(xfs_mount_t *);
extern bool xfs_fs_writable(struct xfs_mount *mp, int level);
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index c8ba98fae30a..b6701b4f59a9 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -146,6 +146,11 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_OFFSET(struct xfs_dir3_data_hdr, hdr.magic, 0);
XFS_CHECK_OFFSET(struct xfs_dir3_free, hdr.hdr.magic, 0);
XFS_CHECK_OFFSET(struct xfs_attr3_leafblock, hdr.info.hdr, 0);
+
+ XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat, 192);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers, 24);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat_req, 64);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers_req, 64);
}
#endif /* __XFS_ONDISK_H */
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index bde2c9f56a46..0c954cad7449 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -2,23 +2,16 @@
/*
* Copyright (c) 2014 Christoph Hellwig.
*/
-#include <linux/iomap.h>
#include "xfs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_log.h"
#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
-#include "xfs_error.h"
#include "xfs_iomap.h"
-#include "xfs_shared.h"
-#include "xfs_bit.h"
-#include "xfs_pnfs.h"
/*
* Ensure that we do not have any outstanding pNFS layouts that can be used by
diff --git a/fs/xfs/xfs_pwork.c b/fs/xfs/xfs_pwork.c
new file mode 100644
index 000000000000..4bcc3e61056c
--- /dev/null
+++ b/fs/xfs/xfs_pwork.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_trace.h"
+#include "xfs_sysctl.h"
+#include "xfs_pwork.h"
+#include <linux/nmi.h>
+
+/*
+ * Parallel Work Queue
+ * ===================
+ *
+ * Abstract away the details of running a large and "obviously" parallelizable
+ * task across multiple CPUs. Callers initialize the pwork control object with
+ * a desired level of parallelization and a work function. Next, they embed
+ * struct xfs_pwork in whatever structure they use to pass work context to a
+ * worker thread and queue that pwork. The work function will be passed the
+ * pwork item when it is run (from process context) and any returned error will
+ * be recorded in xfs_pwork_ctl.error. Work functions should check for errors
+ * and abort if necessary; the non-zeroness of xfs_pwork_ctl.error does not
+ * stop workqueue item processing.
+ *
+ * This is the rough equivalent of the xfsprogs workqueue code, though we can't
+ * reuse that name here.
+ */
+
+/* Invoke our caller's function. */
+static void
+xfs_pwork_work(
+ struct work_struct *work)
+{
+ struct xfs_pwork *pwork;
+ struct xfs_pwork_ctl *pctl;
+ int error;
+
+ pwork = container_of(work, struct xfs_pwork, work);
+ pctl = pwork->pctl;
+ error = pctl->work_fn(pctl->mp, pwork);
+ if (error && !pctl->error)
+ pctl->error = error;
+ if (atomic_dec_and_test(&pctl->nr_work))
+ wake_up(&pctl->poll_wait);
+}
+
+/*
+ * Set up control data for parallel work. @work_fn is the function that will
+ * be called. @tag will be written into the kernel threads. @nr_threads is
+ * the level of parallelism desired, or 0 for no limit.
+ */
+int
+xfs_pwork_init(
+ struct xfs_mount *mp,
+ struct xfs_pwork_ctl *pctl,
+ xfs_pwork_work_fn work_fn,
+ const char *tag,
+ unsigned int nr_threads)
+{
+#ifdef DEBUG
+ if (xfs_globals.pwork_threads >= 0)
+ nr_threads = xfs_globals.pwork_threads;
+#endif
+ trace_xfs_pwork_init(mp, nr_threads, current->pid);
+
+ pctl->wq = alloc_workqueue("%s-%d", WQ_FREEZABLE, nr_threads, tag,
+ current->pid);
+ if (!pctl->wq)
+ return -ENOMEM;
+ pctl->work_fn = work_fn;
+ pctl->error = 0;
+ pctl->mp = mp;
+ atomic_set(&pctl->nr_work, 0);
+ init_waitqueue_head(&pctl->poll_wait);
+
+ return 0;
+}
+
+/* Queue some parallel work. */
+void
+xfs_pwork_queue(
+ struct xfs_pwork_ctl *pctl,
+ struct xfs_pwork *pwork)
+{
+ INIT_WORK(&pwork->work, xfs_pwork_work);
+ pwork->pctl = pctl;
+ atomic_inc(&pctl->nr_work);
+ queue_work(pctl->wq, &pwork->work);
+}
+
+/* Wait for the work to finish and tear down the control structure. */
+int
+xfs_pwork_destroy(
+ struct xfs_pwork_ctl *pctl)
+{
+ destroy_workqueue(pctl->wq);
+ pctl->wq = NULL;
+ return pctl->error;
+}
+
+/*
+ * Wait for the work to finish by polling completion status and touch the soft
+ * lockup watchdog. This is for callers such as mount which hold locks.
+ */
+void
+xfs_pwork_poll(
+ struct xfs_pwork_ctl *pctl)
+{
+ while (wait_event_timeout(pctl->poll_wait,
+ atomic_read(&pctl->nr_work) == 0, HZ) == 0)
+ touch_softlockup_watchdog();
+}
+
+/*
+ * Return the amount of parallelism that the data device can handle, or 0 for
+ * no limit.
+ */
+unsigned int
+xfs_pwork_guess_datadev_parallelism(
+ struct xfs_mount *mp)
+{
+ struct xfs_buftarg *btp = mp->m_ddev_targp;
+
+ /*
+ * For now we'll go with the most conservative setting possible,
+ * which is two threads for an SSD and 1 thread everywhere else.
+ */
+ return blk_queue_nonrot(btp->bt_bdev->bd_queue) ? 2 : 1;
+}
diff --git a/fs/xfs/xfs_pwork.h b/fs/xfs/xfs_pwork.h
new file mode 100644
index 000000000000..8133124cf3bb
--- /dev/null
+++ b/fs/xfs/xfs_pwork.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2019 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_PWORK_H__
+#define __XFS_PWORK_H__
+
+struct xfs_pwork;
+struct xfs_mount;
+
+typedef int (*xfs_pwork_work_fn)(struct xfs_mount *mp, struct xfs_pwork *pwork);
+
+/*
+ * Parallel work coordination structure.
+ */
+struct xfs_pwork_ctl {
+ struct workqueue_struct *wq;
+ struct xfs_mount *mp;
+ xfs_pwork_work_fn work_fn;
+ struct wait_queue_head poll_wait;
+ atomic_t nr_work;
+ int error;
+};
+
+/*
+ * Embed this parallel work control item inside your own work structure,
+ * then queue work with it.
+ */
+struct xfs_pwork {
+ struct work_struct work;
+ struct xfs_pwork_ctl *pctl;
+};
+
+#define XFS_PWORK_SINGLE_THREADED { .pctl = NULL }
+
+/* Have we been told to abort? */
+static inline bool
+xfs_pwork_ctl_want_abort(
+ struct xfs_pwork_ctl *pctl)
+{
+ return pctl && pctl->error;
+}
+
+/* Have we been told to abort? */
+static inline bool
+xfs_pwork_want_abort(
+ struct xfs_pwork *pwork)
+{
+ return xfs_pwork_ctl_want_abort(pwork->pctl);
+}
+
+int xfs_pwork_init(struct xfs_mount *mp, struct xfs_pwork_ctl *pctl,
+ xfs_pwork_work_fn work_fn, const char *tag,
+ unsigned int nr_threads);
+void xfs_pwork_queue(struct xfs_pwork_ctl *pctl, struct xfs_pwork *pwork);
+int xfs_pwork_destroy(struct xfs_pwork_ctl *pctl);
+void xfs_pwork_poll(struct xfs_pwork_ctl *pctl);
+unsigned int xfs_pwork_guess_datadev_parallelism(struct xfs_mount *mp);
+
+#endif /* __XFS_PWORK_H__ */
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index aa6b6db3db0e..5e7a37f0cf84 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -13,19 +13,15 @@
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
-#include "xfs_ialloc.h"
-#include "xfs_itable.h"
+#include "xfs_iwalk.h"
#include "xfs_quota.h"
-#include "xfs_error.h"
#include "xfs_bmap.h"
-#include "xfs_bmap_btree.h"
#include "xfs_bmap_util.h"
#include "xfs_trans.h"
#include "xfs_trans_space.h"
#include "xfs_qm.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
-#include "xfs_cksum.h"
/*
* The global quota manager. There is only one of these for the entire
@@ -1118,17 +1114,15 @@ xfs_qm_quotacheck_dqadjust(
/* ARGSUSED */
STATIC int
xfs_qm_dqusage_adjust(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t ino, /* inode number to get data for */
- void __user *buffer, /* not used */
- int ubsize, /* not used */
- int *ubused, /* not used */
- int *res) /* result code value */
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_ino_t ino,
+ void *data)
{
- xfs_inode_t *ip;
- xfs_qcnt_t nblks;
- xfs_filblks_t rtblks = 0; /* total rt blks */
- int error;
+ struct xfs_inode *ip;
+ xfs_qcnt_t nblks;
+ xfs_filblks_t rtblks = 0; /* total rt blks */
+ int error;
ASSERT(XFS_IS_QUOTA_RUNNING(mp));
@@ -1136,20 +1130,18 @@ xfs_qm_dqusage_adjust(
* rootino must have its resources accounted for, not so with the quota
* inodes.
*/
- if (xfs_is_quota_inode(&mp->m_sb, ino)) {
- *res = BULKSTAT_RV_NOTHING;
- return -EINVAL;
- }
+ if (xfs_is_quota_inode(&mp->m_sb, ino))
+ return 0;
/*
* We don't _need_ to take the ilock EXCL here because quotacheck runs
* at mount time and therefore nobody will be racing chown/chproj.
*/
- error = xfs_iget(mp, NULL, ino, XFS_IGET_DONTCACHE, 0, &ip);
- if (error) {
- *res = BULKSTAT_RV_NOTHING;
+ error = xfs_iget(mp, tp, ino, XFS_IGET_DONTCACHE, 0, &ip);
+ if (error == -EINVAL || error == -ENOENT)
+ return 0;
+ if (error)
return error;
- }
ASSERT(ip->i_delayed_blks == 0);
@@ -1157,7 +1149,7 @@ xfs_qm_dqusage_adjust(
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
if (!(ifp->if_flags & XFS_IFEXTENTS)) {
- error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
+ error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
if (error)
goto error0;
}
@@ -1200,13 +1192,8 @@ xfs_qm_dqusage_adjust(
goto error0;
}
- xfs_irele(ip);
- *res = BULKSTAT_RV_DIDONE;
- return 0;
-
error0:
xfs_irele(ip);
- *res = BULKSTAT_RV_GIVEUP;
return error;
}
@@ -1270,18 +1257,13 @@ STATIC int
xfs_qm_quotacheck(
xfs_mount_t *mp)
{
- int done, count, error, error2;
- xfs_ino_t lastino;
- size_t structsz;
+ int error, error2;
uint flags;
LIST_HEAD (buffer_list);
struct xfs_inode *uip = mp->m_quotainfo->qi_uquotaip;
struct xfs_inode *gip = mp->m_quotainfo->qi_gquotaip;
struct xfs_inode *pip = mp->m_quotainfo->qi_pquotaip;
- count = INT_MAX;
- structsz = 1;
- lastino = 0;
flags = 0;
ASSERT(uip || gip || pip);
@@ -1318,18 +1300,10 @@ xfs_qm_quotacheck(
flags |= XFS_PQUOTA_CHKD;
}
- do {
- /*
- * Iterate thru all the inodes in the file system,
- * adjusting the corresponding dquot counters in core.
- */
- error = xfs_bulkstat(mp, &lastino, &count,
- xfs_qm_dqusage_adjust,
- structsz, NULL, &done);
- if (error)
- break;
-
- } while (!done);
+ error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true,
+ NULL);
+ if (error)
+ goto error_return;
/*
* We've made all the changes that we need to make incore. Flush them
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index 3091e4bc04ef..5d72e88598b4 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -5,13 +5,13 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_quota.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
-#include "xfs_error.h"
#include "xfs_trans.h"
#include "xfs_qm.h"
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index b3190890f096..da7ad0383037 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -4,7 +4,6 @@
* All Rights Reserved.
*/
-#include <linux/capability.h>
#include "xfs.h"
#include "xfs_fs.h"
@@ -12,17 +11,13 @@
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_qm.h"
-#include "xfs_trace.h"
#include "xfs_icache.h"
-#include "xfs_defer.h"
STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index a7c0c657dfaf..cd6c7210a373 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -4,6 +4,7 @@
* All Rights Reserved.
*/
#include "xfs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
@@ -11,10 +12,8 @@
#include "xfs_inode.h"
#include "xfs_quota.h"
#include "xfs_trans.h"
-#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_qm.h"
-#include <linux/quota.h>
static void
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index fce38b56b962..d8288aa0670a 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -14,7 +14,6 @@
#include "xfs_defer.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
-#include "xfs_buf_item.h"
#include "xfs_refcount_item.h"
#include "xfs_log.h"
#include "xfs_refcount.h"
@@ -95,15 +94,6 @@ xfs_cui_item_format(
}
/*
- * Pinning has no meaning for an cui item, so just return.
- */
-STATIC void
-xfs_cui_item_pin(
- struct xfs_log_item *lip)
-{
-}
-
-/*
* The unpin operation is the last place an CUI is manipulated in the log. It is
* either inserted in the AIL or aborted in the event of a log I/O error. In
* either case, the CUI transaction has been successfully committed to make it
@@ -122,71 +112,22 @@ xfs_cui_item_unpin(
}
/*
- * CUI items have no locking or pushing. However, since CUIs are pulled from
- * the AIL when their corresponding CUDs are committed to disk, their situation
- * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller
- * will eventually flush the log. This should help in getting the CUI out of
- * the AIL.
- */
-STATIC uint
-xfs_cui_item_push(
- struct xfs_log_item *lip,
- struct list_head *buffer_list)
-{
- return XFS_ITEM_PINNED;
-}
-
-/*
* The CUI has been either committed or aborted if the transaction has been
* cancelled. If the transaction was cancelled, an CUD isn't going to be
* constructed and thus we free the CUI here directly.
*/
STATIC void
-xfs_cui_item_unlock(
+xfs_cui_item_release(
struct xfs_log_item *lip)
{
- if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
- xfs_cui_release(CUI_ITEM(lip));
+ xfs_cui_release(CUI_ITEM(lip));
}
-/*
- * The CUI is logged only once and cannot be moved in the log, so simply return
- * the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_cui_item_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
- return lsn;
-}
-
-/*
- * The CUI dependency tracking op doesn't do squat. It can't because
- * it doesn't know where the free extent is coming from. The dependency
- * tracking has to be handled by the "enclosing" metadata object. For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
-STATIC void
-xfs_cui_item_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
-}
-
-/*
- * This is the ops vector shared by all cui log items.
- */
static const struct xfs_item_ops xfs_cui_item_ops = {
.iop_size = xfs_cui_item_size,
.iop_format = xfs_cui_item_format,
- .iop_pin = xfs_cui_item_pin,
.iop_unpin = xfs_cui_item_unpin,
- .iop_unlock = xfs_cui_item_unlock,
- .iop_committed = xfs_cui_item_committed,
- .iop_push = xfs_cui_item_push,
- .iop_committing = xfs_cui_item_committing,
+ .iop_release = xfs_cui_item_release,
};
/*
@@ -254,126 +195,250 @@ xfs_cud_item_format(
}
/*
- * Pinning has no meaning for an cud item, so just return.
+ * The CUD is either committed or aborted if the transaction is cancelled. If
+ * the transaction is cancelled, drop our reference to the CUI and free the
+ * CUD.
*/
STATIC void
-xfs_cud_item_pin(
+xfs_cud_item_release(
struct xfs_log_item *lip)
{
+ struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
+
+ xfs_cui_release(cudp->cud_cuip);
+ kmem_zone_free(xfs_cud_zone, cudp);
}
-/*
- * Since pinning has no meaning for an cud item, unpinning does
- * not either.
- */
-STATIC void
-xfs_cud_item_unpin(
- struct xfs_log_item *lip,
- int remove)
+static const struct xfs_item_ops xfs_cud_item_ops = {
+ .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED,
+ .iop_size = xfs_cud_item_size,
+ .iop_format = xfs_cud_item_format,
+ .iop_release = xfs_cud_item_release,
+};
+
+static struct xfs_cud_log_item *
+xfs_trans_get_cud(
+ struct xfs_trans *tp,
+ struct xfs_cui_log_item *cuip)
{
+ struct xfs_cud_log_item *cudp;
+
+ cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP);
+ xfs_log_item_init(tp->t_mountp, &cudp->cud_item, XFS_LI_CUD,
+ &xfs_cud_item_ops);
+ cudp->cud_cuip = cuip;
+ cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id;
+
+ xfs_trans_add_item(tp, &cudp->cud_item);
+ return cudp;
}
/*
- * There isn't much you can do to push on an cud item. It is simply stuck
- * waiting for the log to be flushed to disk.
+ * Finish an refcount update and log it to the CUD. Note that the
+ * transaction is marked dirty regardless of whether the refcount
+ * update succeeds or fails to support the CUI/CUD lifecycle rules.
*/
-STATIC uint
-xfs_cud_item_push(
- struct xfs_log_item *lip,
- struct list_head *buffer_list)
+static int
+xfs_trans_log_finish_refcount_update(
+ struct xfs_trans *tp,
+ struct xfs_cud_log_item *cudp,
+ enum xfs_refcount_intent_type type,
+ xfs_fsblock_t startblock,
+ xfs_extlen_t blockcount,
+ xfs_fsblock_t *new_fsb,
+ xfs_extlen_t *new_len,
+ struct xfs_btree_cur **pcur)
{
- return XFS_ITEM_PINNED;
+ int error;
+
+ error = xfs_refcount_finish_one(tp, type, startblock,
+ blockcount, new_fsb, new_len, pcur);
+
+ /*
+ * Mark the transaction dirty, even on error. This ensures the
+ * transaction is aborted, which:
+ *
+ * 1.) releases the CUI and frees the CUD
+ * 2.) shuts down the filesystem
+ */
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags);
+
+ return error;
}
-/*
- * The CUD is either committed or aborted if the transaction is cancelled. If
- * the transaction is cancelled, drop our reference to the CUI and free the
- * CUD.
- */
-STATIC void
-xfs_cud_item_unlock(
- struct xfs_log_item *lip)
+/* Sort refcount intents by AG. */
+static int
+xfs_refcount_update_diff_items(
+ void *priv,
+ struct list_head *a,
+ struct list_head *b)
{
- struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
+ struct xfs_mount *mp = priv;
+ struct xfs_refcount_intent *ra;
+ struct xfs_refcount_intent *rb;
+
+ ra = container_of(a, struct xfs_refcount_intent, ri_list);
+ rb = container_of(b, struct xfs_refcount_intent, ri_list);
+ return XFS_FSB_TO_AGNO(mp, ra->ri_startblock) -
+ XFS_FSB_TO_AGNO(mp, rb->ri_startblock);
+}
- if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
- xfs_cui_release(cudp->cud_cuip);
- kmem_zone_free(xfs_cud_zone, cudp);
+/* Get an CUI. */
+STATIC void *
+xfs_refcount_update_create_intent(
+ struct xfs_trans *tp,
+ unsigned int count)
+{
+ struct xfs_cui_log_item *cuip;
+
+ ASSERT(tp != NULL);
+ ASSERT(count > 0);
+
+ cuip = xfs_cui_init(tp->t_mountp, count);
+ ASSERT(cuip != NULL);
+
+ /*
+ * Get a log_item_desc to point at the new item.
+ */
+ xfs_trans_add_item(tp, &cuip->cui_item);
+ return cuip;
+}
+
+/* Set the phys extent flags for this reverse mapping. */
+static void
+xfs_trans_set_refcount_flags(
+ struct xfs_phys_extent *refc,
+ enum xfs_refcount_intent_type type)
+{
+ refc->pe_flags = 0;
+ switch (type) {
+ case XFS_REFCOUNT_INCREASE:
+ case XFS_REFCOUNT_DECREASE:
+ case XFS_REFCOUNT_ALLOC_COW:
+ case XFS_REFCOUNT_FREE_COW:
+ refc->pe_flags |= type;
+ break;
+ default:
+ ASSERT(0);
}
}
-/*
- * When the cud item is committed to disk, all we need to do is delete our
- * reference to our partner cui item and then free ourselves. Since we're
- * freeing ourselves we must return -1 to keep the transaction code from
- * further referencing this item.
- */
-STATIC xfs_lsn_t
-xfs_cud_item_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
+/* Log refcount updates in the intent item. */
+STATIC void
+xfs_refcount_update_log_item(
+ struct xfs_trans *tp,
+ void *intent,
+ struct list_head *item)
{
- struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
+ struct xfs_cui_log_item *cuip = intent;
+ struct xfs_refcount_intent *refc;
+ uint next_extent;
+ struct xfs_phys_extent *ext;
+
+ refc = container_of(item, struct xfs_refcount_intent, ri_list);
+
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags);
/*
- * Drop the CUI reference regardless of whether the CUD has been
- * aborted. Once the CUD transaction is constructed, it is the sole
- * responsibility of the CUD to release the CUI (even if the CUI is
- * aborted due to log I/O error).
+ * atomic_inc_return gives us the value after the increment;
+ * we want to use it as an array index so we need to subtract 1 from
+ * it.
*/
- xfs_cui_release(cudp->cud_cuip);
- kmem_zone_free(xfs_cud_zone, cudp);
+ next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1;
+ ASSERT(next_extent < cuip->cui_format.cui_nextents);
+ ext = &cuip->cui_format.cui_extents[next_extent];
+ ext->pe_startblock = refc->ri_startblock;
+ ext->pe_len = refc->ri_blockcount;
+ xfs_trans_set_refcount_flags(ext, refc->ri_type);
+}
- return (xfs_lsn_t)-1;
+/* Get an CUD so we can process all the deferred refcount updates. */
+STATIC void *
+xfs_refcount_update_create_done(
+ struct xfs_trans *tp,
+ void *intent,
+ unsigned int count)
+{
+ return xfs_trans_get_cud(tp, intent);
}
-/*
- * The CUD dependency tracking op doesn't do squat. It can't because
- * it doesn't know where the free extent is coming from. The dependency
- * tracking has to be handled by the "enclosing" metadata object. For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
-STATIC void
-xfs_cud_item_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
+/* Process a deferred refcount update. */
+STATIC int
+xfs_refcount_update_finish_item(
+ struct xfs_trans *tp,
+ struct list_head *item,
+ void *done_item,
+ void **state)
{
+ struct xfs_refcount_intent *refc;
+ xfs_fsblock_t new_fsb;
+ xfs_extlen_t new_aglen;
+ int error;
+
+ refc = container_of(item, struct xfs_refcount_intent, ri_list);
+ error = xfs_trans_log_finish_refcount_update(tp, done_item,
+ refc->ri_type,
+ refc->ri_startblock,
+ refc->ri_blockcount,
+ &new_fsb, &new_aglen,
+ (struct xfs_btree_cur **)state);
+ /* Did we run out of reservation? Requeue what we didn't finish. */
+ if (!error && new_aglen > 0) {
+ ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE ||
+ refc->ri_type == XFS_REFCOUNT_DECREASE);
+ refc->ri_startblock = new_fsb;
+ refc->ri_blockcount = new_aglen;
+ return -EAGAIN;
+ }
+ kmem_free(refc);
+ return error;
}
-/*
- * This is the ops vector shared by all cud log items.
- */
-static const struct xfs_item_ops xfs_cud_item_ops = {
- .iop_size = xfs_cud_item_size,
- .iop_format = xfs_cud_item_format,
- .iop_pin = xfs_cud_item_pin,
- .iop_unpin = xfs_cud_item_unpin,
- .iop_unlock = xfs_cud_item_unlock,
- .iop_committed = xfs_cud_item_committed,
- .iop_push = xfs_cud_item_push,
- .iop_committing = xfs_cud_item_committing,
-};
+/* Clean up after processing deferred refcounts. */
+STATIC void
+xfs_refcount_update_finish_cleanup(
+ struct xfs_trans *tp,
+ void *state,
+ int error)
+{
+ struct xfs_btree_cur *rcur = state;
-/*
- * Allocate and initialize an cud item with the given number of extents.
- */
-struct xfs_cud_log_item *
-xfs_cud_init(
- struct xfs_mount *mp,
- struct xfs_cui_log_item *cuip)
+ xfs_refcount_finish_one_cleanup(tp, rcur, error);
+}
+/* Abort all pending CUIs. */
+STATIC void
+xfs_refcount_update_abort_intent(
+ void *intent)
{
- struct xfs_cud_log_item *cudp;
+ xfs_cui_release(intent);
+}
- cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP);
- xfs_log_item_init(mp, &cudp->cud_item, XFS_LI_CUD, &xfs_cud_item_ops);
- cudp->cud_cuip = cuip;
- cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id;
+/* Cancel a deferred refcount update. */
+STATIC void
+xfs_refcount_update_cancel_item(
+ struct list_head *item)
+{
+ struct xfs_refcount_intent *refc;
- return cudp;
+ refc = container_of(item, struct xfs_refcount_intent, ri_list);
+ kmem_free(refc);
}
+const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
+ .max_items = XFS_CUI_MAX_FAST_EXTENTS,
+ .diff_items = xfs_refcount_update_diff_items,
+ .create_intent = xfs_refcount_update_create_intent,
+ .abort_intent = xfs_refcount_update_abort_intent,
+ .log_item = xfs_refcount_update_log_item,
+ .create_done = xfs_refcount_update_create_done,
+ .finish_item = xfs_refcount_update_finish_item,
+ .finish_cleanup = xfs_refcount_update_finish_cleanup,
+ .cancel_item = xfs_refcount_update_cancel_item,
+};
+
/*
* Process a refcount update intent item that was recovered from the log.
* We need to update the refcountbt.
diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h
index 3896dcc2368f..e47530f30489 100644
--- a/fs/xfs/xfs_refcount_item.h
+++ b/fs/xfs/xfs_refcount_item.h
@@ -78,8 +78,6 @@ extern struct kmem_zone *xfs_cui_zone;
extern struct kmem_zone *xfs_cud_zone;
struct xfs_cui_log_item *xfs_cui_init(struct xfs_mount *, uint);
-struct xfs_cud_log_item *xfs_cud_init(struct xfs_mount *,
- struct xfs_cui_log_item *);
void xfs_cui_item_free(struct xfs_cui_log_item *);
void xfs_cui_release(struct xfs_cui_log_item *);
int xfs_cui_recover(struct xfs_trans *parent_tp, struct xfs_cui_log_item *cuip);
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 680ae7662a78..c4ec7afd1170 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -11,21 +11,12 @@
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
-#include "xfs_error.h"
-#include "xfs_dir2.h"
-#include "xfs_dir2_priv.h"
-#include "xfs_ioctl.h"
#include "xfs_trace.h"
-#include "xfs_log.h"
#include "xfs_icache.h"
-#include "xfs_pnfs.h"
#include "xfs_btree.h"
#include "xfs_refcount_btree.h"
#include "xfs_refcount.h"
@@ -33,11 +24,9 @@
#include "xfs_trans_space.h"
#include "xfs_bit.h"
#include "xfs_alloc.h"
-#include "xfs_quota_defs.h"
#include "xfs_quota.h"
#include "xfs_reflink.h"
#include "xfs_iomap.h"
-#include "xfs_rmap_btree.h"
#include "xfs_sb.h"
#include "xfs_ag_resv.h"
@@ -572,7 +561,7 @@ xfs_reflink_cancel_cow_range(
/* Start a rolling transaction to remove the mappings */
error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
- 0, 0, XFS_TRANS_NOFS, &tp);
+ 0, 0, 0, &tp);
if (error)
goto out;
@@ -631,7 +620,7 @@ xfs_reflink_end_cow_extent(
resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
- XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp);
+ XFS_TRANS_RESERVE, &tp);
if (error)
return error;
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index 127dc9c32a54..77ed557b6127 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -14,7 +14,6 @@
#include "xfs_defer.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
-#include "xfs_buf_item.h"
#include "xfs_rmap_item.h"
#include "xfs_log.h"
#include "xfs_rmap.h"
@@ -94,15 +93,6 @@ xfs_rui_item_format(
}
/*
- * Pinning has no meaning for an rui item, so just return.
- */
-STATIC void
-xfs_rui_item_pin(
- struct xfs_log_item *lip)
-{
-}
-
-/*
* The unpin operation is the last place an RUI is manipulated in the log. It is
* either inserted in the AIL or aborted in the event of a log I/O error. In
* either case, the RUI transaction has been successfully committed to make it
@@ -121,71 +111,22 @@ xfs_rui_item_unpin(
}
/*
- * RUI items have no locking or pushing. However, since RUIs are pulled from
- * the AIL when their corresponding RUDs are committed to disk, their situation
- * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller
- * will eventually flush the log. This should help in getting the RUI out of
- * the AIL.
- */
-STATIC uint
-xfs_rui_item_push(
- struct xfs_log_item *lip,
- struct list_head *buffer_list)
-{
- return XFS_ITEM_PINNED;
-}
-
-/*
* The RUI has been either committed or aborted if the transaction has been
* cancelled. If the transaction was cancelled, an RUD isn't going to be
* constructed and thus we free the RUI here directly.
*/
STATIC void
-xfs_rui_item_unlock(
+xfs_rui_item_release(
struct xfs_log_item *lip)
{
- if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
- xfs_rui_release(RUI_ITEM(lip));
+ xfs_rui_release(RUI_ITEM(lip));
}
-/*
- * The RUI is logged only once and cannot be moved in the log, so simply return
- * the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_rui_item_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
- return lsn;
-}
-
-/*
- * The RUI dependency tracking op doesn't do squat. It can't because
- * it doesn't know where the free extent is coming from. The dependency
- * tracking has to be handled by the "enclosing" metadata object. For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
-STATIC void
-xfs_rui_item_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
-}
-
-/*
- * This is the ops vector shared by all rui log items.
- */
static const struct xfs_item_ops xfs_rui_item_ops = {
.iop_size = xfs_rui_item_size,
.iop_format = xfs_rui_item_format,
- .iop_pin = xfs_rui_item_pin,
.iop_unpin = xfs_rui_item_unpin,
- .iop_unlock = xfs_rui_item_unlock,
- .iop_committed = xfs_rui_item_committed,
- .iop_push = xfs_rui_item_push,
- .iop_committing = xfs_rui_item_committing,
+ .iop_release = xfs_rui_item_release,
};
/*
@@ -275,126 +216,271 @@ xfs_rud_item_format(
}
/*
- * Pinning has no meaning for an rud item, so just return.
+ * The RUD is either committed or aborted if the transaction is cancelled. If
+ * the transaction is cancelled, drop our reference to the RUI and free the
+ * RUD.
*/
STATIC void
-xfs_rud_item_pin(
+xfs_rud_item_release(
struct xfs_log_item *lip)
{
+ struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
+
+ xfs_rui_release(rudp->rud_ruip);
+ kmem_zone_free(xfs_rud_zone, rudp);
}
-/*
- * Since pinning has no meaning for an rud item, unpinning does
- * not either.
- */
-STATIC void
-xfs_rud_item_unpin(
- struct xfs_log_item *lip,
- int remove)
+static const struct xfs_item_ops xfs_rud_item_ops = {
+ .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED,
+ .iop_size = xfs_rud_item_size,
+ .iop_format = xfs_rud_item_format,
+ .iop_release = xfs_rud_item_release,
+};
+
+static struct xfs_rud_log_item *
+xfs_trans_get_rud(
+ struct xfs_trans *tp,
+ struct xfs_rui_log_item *ruip)
{
+ struct xfs_rud_log_item *rudp;
+
+ rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP);
+ xfs_log_item_init(tp->t_mountp, &rudp->rud_item, XFS_LI_RUD,
+ &xfs_rud_item_ops);
+ rudp->rud_ruip = ruip;
+ rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id;
+
+ xfs_trans_add_item(tp, &rudp->rud_item);
+ return rudp;
}
-/*
- * There isn't much you can do to push on an rud item. It is simply stuck
- * waiting for the log to be flushed to disk.
- */
-STATIC uint
-xfs_rud_item_push(
- struct xfs_log_item *lip,
- struct list_head *buffer_list)
+/* Set the map extent flags for this reverse mapping. */
+static void
+xfs_trans_set_rmap_flags(
+ struct xfs_map_extent *rmap,
+ enum xfs_rmap_intent_type type,
+ int whichfork,
+ xfs_exntst_t state)
{
- return XFS_ITEM_PINNED;
+ rmap->me_flags = 0;
+ if (state == XFS_EXT_UNWRITTEN)
+ rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN;
+ if (whichfork == XFS_ATTR_FORK)
+ rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK;
+ switch (type) {
+ case XFS_RMAP_MAP:
+ rmap->me_flags |= XFS_RMAP_EXTENT_MAP;
+ break;
+ case XFS_RMAP_MAP_SHARED:
+ rmap->me_flags |= XFS_RMAP_EXTENT_MAP_SHARED;
+ break;
+ case XFS_RMAP_UNMAP:
+ rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP;
+ break;
+ case XFS_RMAP_UNMAP_SHARED:
+ rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP_SHARED;
+ break;
+ case XFS_RMAP_CONVERT:
+ rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT;
+ break;
+ case XFS_RMAP_CONVERT_SHARED:
+ rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT_SHARED;
+ break;
+ case XFS_RMAP_ALLOC:
+ rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC;
+ break;
+ case XFS_RMAP_FREE:
+ rmap->me_flags |= XFS_RMAP_EXTENT_FREE;
+ break;
+ default:
+ ASSERT(0);
+ }
}
/*
- * The RUD is either committed or aborted if the transaction is cancelled. If
- * the transaction is cancelled, drop our reference to the RUI and free the
- * RUD.
+ * Finish an rmap update and log it to the RUD. Note that the transaction is
+ * marked dirty regardless of whether the rmap update succeeds or fails to
+ * support the RUI/RUD lifecycle rules.
*/
-STATIC void
-xfs_rud_item_unlock(
- struct xfs_log_item *lip)
+static int
+xfs_trans_log_finish_rmap_update(
+ struct xfs_trans *tp,
+ struct xfs_rud_log_item *rudp,
+ enum xfs_rmap_intent_type type,
+ uint64_t owner,
+ int whichfork,
+ xfs_fileoff_t startoff,
+ xfs_fsblock_t startblock,
+ xfs_filblks_t blockcount,
+ xfs_exntst_t state,
+ struct xfs_btree_cur **pcur)
{
- struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
+ int error;
- if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
- xfs_rui_release(rudp->rud_ruip);
- kmem_zone_free(xfs_rud_zone, rudp);
- }
+ error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff,
+ startblock, blockcount, state, pcur);
+
+ /*
+ * Mark the transaction dirty, even on error. This ensures the
+ * transaction is aborted, which:
+ *
+ * 1.) releases the RUI and frees the RUD
+ * 2.) shuts down the filesystem
+ */
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags);
+
+ return error;
}
-/*
- * When the rud item is committed to disk, all we need to do is delete our
- * reference to our partner rui item and then free ourselves. Since we're
- * freeing ourselves we must return -1 to keep the transaction code from
- * further referencing this item.
- */
-STATIC xfs_lsn_t
-xfs_rud_item_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
+/* Sort rmap intents by AG. */
+static int
+xfs_rmap_update_diff_items(
+ void *priv,
+ struct list_head *a,
+ struct list_head *b)
{
- struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
+ struct xfs_mount *mp = priv;
+ struct xfs_rmap_intent *ra;
+ struct xfs_rmap_intent *rb;
+
+ ra = container_of(a, struct xfs_rmap_intent, ri_list);
+ rb = container_of(b, struct xfs_rmap_intent, ri_list);
+ return XFS_FSB_TO_AGNO(mp, ra->ri_bmap.br_startblock) -
+ XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock);
+}
+
+/* Get an RUI. */
+STATIC void *
+xfs_rmap_update_create_intent(
+ struct xfs_trans *tp,
+ unsigned int count)
+{
+ struct xfs_rui_log_item *ruip;
+
+ ASSERT(tp != NULL);
+ ASSERT(count > 0);
+
+ ruip = xfs_rui_init(tp->t_mountp, count);
+ ASSERT(ruip != NULL);
/*
- * Drop the RUI reference regardless of whether the RUD has been
- * aborted. Once the RUD transaction is constructed, it is the sole
- * responsibility of the RUD to release the RUI (even if the RUI is
- * aborted due to log I/O error).
+ * Get a log_item_desc to point at the new item.
*/
- xfs_rui_release(rudp->rud_ruip);
- kmem_zone_free(xfs_rud_zone, rudp);
-
- return (xfs_lsn_t)-1;
+ xfs_trans_add_item(tp, &ruip->rui_item);
+ return ruip;
}
-/*
- * The RUD dependency tracking op doesn't do squat. It can't because
- * it doesn't know where the free extent is coming from. The dependency
- * tracking has to be handled by the "enclosing" metadata object. For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
+/* Log rmap updates in the intent item. */
STATIC void
-xfs_rud_item_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
+xfs_rmap_update_log_item(
+ struct xfs_trans *tp,
+ void *intent,
+ struct list_head *item)
{
+ struct xfs_rui_log_item *ruip = intent;
+ struct xfs_rmap_intent *rmap;
+ uint next_extent;
+ struct xfs_map_extent *map;
+
+ rmap = container_of(item, struct xfs_rmap_intent, ri_list);
+
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags);
+
+ /*
+ * atomic_inc_return gives us the value after the increment;
+ * we want to use it as an array index so we need to subtract 1 from
+ * it.
+ */
+ next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1;
+ ASSERT(next_extent < ruip->rui_format.rui_nextents);
+ map = &ruip->rui_format.rui_extents[next_extent];
+ map->me_owner = rmap->ri_owner;
+ map->me_startblock = rmap->ri_bmap.br_startblock;
+ map->me_startoff = rmap->ri_bmap.br_startoff;
+ map->me_len = rmap->ri_bmap.br_blockcount;
+ xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork,
+ rmap->ri_bmap.br_state);
}
-/*
- * This is the ops vector shared by all rud log items.
- */
-static const struct xfs_item_ops xfs_rud_item_ops = {
- .iop_size = xfs_rud_item_size,
- .iop_format = xfs_rud_item_format,
- .iop_pin = xfs_rud_item_pin,
- .iop_unpin = xfs_rud_item_unpin,
- .iop_unlock = xfs_rud_item_unlock,
- .iop_committed = xfs_rud_item_committed,
- .iop_push = xfs_rud_item_push,
- .iop_committing = xfs_rud_item_committing,
-};
+/* Get an RUD so we can process all the deferred rmap updates. */
+STATIC void *
+xfs_rmap_update_create_done(
+ struct xfs_trans *tp,
+ void *intent,
+ unsigned int count)
+{
+ return xfs_trans_get_rud(tp, intent);
+}
-/*
- * Allocate and initialize an rud item with the given number of extents.
- */
-struct xfs_rud_log_item *
-xfs_rud_init(
- struct xfs_mount *mp,
- struct xfs_rui_log_item *ruip)
+/* Process a deferred rmap update. */
+STATIC int
+xfs_rmap_update_finish_item(
+ struct xfs_trans *tp,
+ struct list_head *item,
+ void *done_item,
+ void **state)
+{
+ struct xfs_rmap_intent *rmap;
+ int error;
+
+ rmap = container_of(item, struct xfs_rmap_intent, ri_list);
+ error = xfs_trans_log_finish_rmap_update(tp, done_item,
+ rmap->ri_type,
+ rmap->ri_owner, rmap->ri_whichfork,
+ rmap->ri_bmap.br_startoff,
+ rmap->ri_bmap.br_startblock,
+ rmap->ri_bmap.br_blockcount,
+ rmap->ri_bmap.br_state,
+ (struct xfs_btree_cur **)state);
+ kmem_free(rmap);
+ return error;
+}
+
+/* Clean up after processing deferred rmaps. */
+STATIC void
+xfs_rmap_update_finish_cleanup(
+ struct xfs_trans *tp,
+ void *state,
+ int error)
+{
+ struct xfs_btree_cur *rcur = state;
+
+ xfs_rmap_finish_one_cleanup(tp, rcur, error);
+}
+/* Abort all pending RUIs. */
+STATIC void
+xfs_rmap_update_abort_intent(
+ void *intent)
{
- struct xfs_rud_log_item *rudp;
+ xfs_rui_release(intent);
+}
- rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP);
- xfs_log_item_init(mp, &rudp->rud_item, XFS_LI_RUD, &xfs_rud_item_ops);
- rudp->rud_ruip = ruip;
- rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id;
+/* Cancel a deferred rmap update. */
+STATIC void
+xfs_rmap_update_cancel_item(
+ struct list_head *item)
+{
+ struct xfs_rmap_intent *rmap;
- return rudp;
+ rmap = container_of(item, struct xfs_rmap_intent, ri_list);
+ kmem_free(rmap);
}
+const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
+ .max_items = XFS_RUI_MAX_FAST_EXTENTS,
+ .diff_items = xfs_rmap_update_diff_items,
+ .create_intent = xfs_rmap_update_create_intent,
+ .abort_intent = xfs_rmap_update_abort_intent,
+ .log_item = xfs_rmap_update_log_item,
+ .create_done = xfs_rmap_update_create_done,
+ .finish_item = xfs_rmap_update_finish_item,
+ .finish_cleanup = xfs_rmap_update_finish_cleanup,
+ .cancel_item = xfs_rmap_update_cancel_item,
+};
+
/*
* Process an rmap update intent item that was recovered from the log.
* We need to update the rmapbt.
diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h
index 7e482baa27f5..8708e4a5aa5c 100644
--- a/fs/xfs/xfs_rmap_item.h
+++ b/fs/xfs/xfs_rmap_item.h
@@ -78,8 +78,6 @@ extern struct kmem_zone *xfs_rui_zone;
extern struct kmem_zone *xfs_rud_zone;
struct xfs_rui_log_item *xfs_rui_init(struct xfs_mount *, uint);
-struct xfs_rud_log_item *xfs_rud_init(struct xfs_mount *,
- struct xfs_rui_log_item *);
int xfs_rui_copy_format(struct xfs_log_iovec *buf,
struct xfs_rui_log_format *dst_rui_fmt);
void xfs_rui_item_free(struct xfs_rui_log_item *);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index ac0fcdad0c4e..5fa4db3c3e32 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -11,17 +11,11 @@
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
#include "xfs_bmap_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
#include "xfs_trans.h"
#include "xfs_trans_space.h"
-#include "xfs_trace.h"
-#include "xfs_buf.h"
#include "xfs_icache.h"
#include "xfs_rtalloc.h"
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index cc509743facd..113883c4f202 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -4,7 +4,6 @@
* All Rights Reserved.
*/
#include "xfs.h"
-#include <linux/proc_fs.h>
struct xstats xfsstats;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index a14d11d78bd8..f9450235533c 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -11,18 +11,15 @@
#include "xfs_trans_resv.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_bmap.h"
#include "xfs_alloc.h"
-#include "xfs_error.h"
#include "xfs_fsops.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_log.h"
#include "xfs_log_priv.h"
-#include "xfs_da_btree.h"
#include "xfs_dir2.h"
#include "xfs_extfree_item.h"
#include "xfs_mru_cache.h"
@@ -38,18 +35,8 @@
#include "xfs_refcount_item.h"
#include "xfs_bmap_item.h"
#include "xfs_reflink.h"
-#include "xfs_defer.h"
-#include <linux/namei.h>
-#include <linux/dax.h>
-#include <linux/init.h>
-#include <linux/slab.h>
#include <linux/magic.h>
-#include <linux/mount.h>
-#include <linux/mempool.h>
-#include <linux/writeback.h>
-#include <linux/kthread.h>
-#include <linux/freezer.h>
#include <linux/parser.h>
static const struct super_operations xfs_super_operations;
@@ -582,7 +569,7 @@ xfs_set_inode_alloc(
* Calculate how much should be reserved for inodes to meet
* the max inode percentage. Used only for inode32.
*/
- if (mp->m_maxicount) {
+ if (M_IGEO(mp)->maxicount) {
uint64_t icount;
icount = sbp->sb_dblocks * sbp->sb_imax_pct;
@@ -840,16 +827,10 @@ xfs_init_mount_workqueues(
if (!mp->m_reclaim_workqueue)
goto out_destroy_cil;
- mp->m_log_workqueue = alloc_workqueue("xfs-log/%s",
- WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0,
- mp->m_fsname);
- if (!mp->m_log_workqueue)
- goto out_destroy_reclaim;
-
mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
if (!mp->m_eofblocks_workqueue)
- goto out_destroy_log;
+ goto out_destroy_reclaim;
mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0,
mp->m_fsname);
@@ -860,8 +841,6 @@ xfs_init_mount_workqueues(
out_destroy_eofb:
destroy_workqueue(mp->m_eofblocks_workqueue);
-out_destroy_log:
- destroy_workqueue(mp->m_log_workqueue);
out_destroy_reclaim:
destroy_workqueue(mp->m_reclaim_workqueue);
out_destroy_cil:
@@ -880,7 +859,6 @@ xfs_destroy_mount_workqueues(
{
destroy_workqueue(mp->m_sync_workqueue);
destroy_workqueue(mp->m_eofblocks_workqueue);
- destroy_workqueue(mp->m_log_workqueue);
destroy_workqueue(mp->m_reclaim_workqueue);
destroy_workqueue(mp->m_cil_workqueue);
destroy_workqueue(mp->m_unwritten_workqueue);
@@ -1131,10 +1109,10 @@ xfs_fs_statfs(
fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree);
statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
- if (mp->m_maxicount)
+ if (M_IGEO(mp)->maxicount)
statp->f_files = min_t(typeof(statp->f_files),
statp->f_files,
- mp->m_maxicount);
+ M_IGEO(mp)->maxicount);
/* If sb_icount overshot maxicount, report actual allocation */
statp->f_files = max_t(typeof(statp->f_files),
@@ -1685,6 +1663,8 @@ xfs_fs_fill_super(
sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
sb->s_max_links = XFS_MAXLINK;
sb->s_time_gran = 1;
+ sb->s_iflags |= SB_I_CGROUPWB;
+
set_posix_acl_flag(sb);
/* version 5 superblocks support inode version counters. */
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index 21cb49a43d7c..763e43d22dee 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -38,6 +38,18 @@ extern void xfs_qm_exit(void);
# define XFS_SCRUB_STRING
#endif
+#ifdef CONFIG_XFS_ONLINE_REPAIR
+# define XFS_REPAIR_STRING "repair, "
+#else
+# define XFS_REPAIR_STRING
+#endif
+
+#ifdef CONFIG_XFS_WARN
+# define XFS_WARN_STRING "verbose warnings, "
+#else
+# define XFS_WARN_STRING
+#endif
+
#ifdef DEBUG
# define XFS_DBG_STRING "debug"
#else
@@ -49,6 +61,8 @@ extern void xfs_qm_exit(void);
XFS_SECURITY_STRING \
XFS_REALTIME_STRING \
XFS_SCRUB_STRING \
+ XFS_REPAIR_STRING \
+ XFS_WARN_STRING \
XFS_DBG_STRING /* DBG must be last */
struct xfs_inode;
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index b2c1177c717f..ed66fd2de327 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -12,23 +12,14 @@
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
-#include "xfs_defer.h"
#include "xfs_dir2.h"
#include "xfs_inode.h"
-#include "xfs_ialloc.h"
-#include "xfs_alloc.h"
#include "xfs_bmap.h"
#include "xfs_bmap_btree.h"
-#include "xfs_bmap_util.h"
-#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_trans_space.h"
#include "xfs_trace.h"
-#include "xfs_symlink.h"
#include "xfs_trans.h"
-#include "xfs_log.h"
/* ----- Kernel only functions below ----- */
int
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c
index 0cc034dfb786..31b3bdbd2eba 100644
--- a/fs/xfs/xfs_sysctl.c
+++ b/fs/xfs/xfs_sysctl.c
@@ -4,10 +4,7 @@
* All Rights Reserved.
*/
#include "xfs.h"
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
#include "xfs_error.h"
-#include "xfs_stats.h"
static struct ctl_table_header *xfs_table_header;
diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h
index ad7f9be13087..8abf4640f1d5 100644
--- a/fs/xfs/xfs_sysctl.h
+++ b/fs/xfs/xfs_sysctl.h
@@ -82,6 +82,9 @@ enum {
extern xfs_param_t xfs_params;
struct xfs_globals {
+#ifdef DEBUG
+ int pwork_threads; /* parallel workqueue threads */
+#endif
int log_recovery_delay; /* log recovery delay (secs) */
int mount_delay; /* mount setup delay (secs) */
bool bug_on_assert; /* BUG() the kernel on assert failure */
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index cabda13f3c64..ddd0bf7a4740 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -10,9 +10,7 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_sysfs.h"
-#include "xfs_log.h"
#include "xfs_log_priv.h"
-#include "xfs_stats.h"
#include "xfs_mount.h"
struct xfs_sysfs_attr {
@@ -206,11 +204,51 @@ always_cow_show(
}
XFS_SYSFS_ATTR_RW(always_cow);
+#ifdef DEBUG
+/*
+ * Override how many threads the parallel work queue is allowed to create.
+ * This has to be a debug-only global (instead of an errortag) because one of
+ * the main users of parallel workqueues is mount time quotacheck.
+ */
+STATIC ssize_t
+pwork_threads_store(
+ struct kobject *kobject,
+ const char *buf,
+ size_t count)
+{
+ int ret;
+ int val;
+
+ ret = kstrtoint(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ if (val < -1 || val > num_possible_cpus())
+ return -EINVAL;
+
+ xfs_globals.pwork_threads = val;
+
+ return count;
+}
+
+STATIC ssize_t
+pwork_threads_show(
+ struct kobject *kobject,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.pwork_threads);
+}
+XFS_SYSFS_ATTR_RW(pwork_threads);
+#endif /* DEBUG */
+
static struct attribute *xfs_dbg_attrs[] = {
ATTR_LIST(bug_on_assert),
ATTR_LIST(log_recovery_delay),
ATTR_LIST(mount_delay),
ATTR_LIST(always_cow),
+#ifdef DEBUG
+ ATTR_LIST(pwork_threads),
+#endif
NULL,
};
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index cb6489c22cad..bc85b89f88ca 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -15,24 +15,16 @@
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_da_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_itable.h"
#include "xfs_alloc.h"
#include "xfs_bmap.h"
#include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
#include "xfs_trans.h"
-#include "xfs_log.h"
#include "xfs_log_priv.h"
#include "xfs_buf_item.h"
#include "xfs_quota.h"
-#include "xfs_iomap.h"
-#include "xfs_aops.h"
#include "xfs_dquot_item.h"
#include "xfs_dquot.h"
#include "xfs_log_recover.h"
-#include "xfs_inode_item.h"
-#include "xfs_bmap_btree.h"
#include "xfs_filestream.h"
#include "xfs_fsmap.h"
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 2464ea351f83..8094b1920eef 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -475,7 +475,7 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_ordered);
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin);
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_release);
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);
@@ -3360,6 +3360,7 @@ DEFINE_TRANS_EVENT(xfs_trans_dup);
DEFINE_TRANS_EVENT(xfs_trans_free);
DEFINE_TRANS_EVENT(xfs_trans_roll);
DEFINE_TRANS_EVENT(xfs_trans_add_item);
+DEFINE_TRANS_EVENT(xfs_trans_commit_items);
DEFINE_TRANS_EVENT(xfs_trans_free_items);
TRACE_EVENT(xfs_iunlink_update_bucket,
@@ -3516,6 +3517,64 @@ DEFINE_EVENT(xfs_inode_corrupt_class, name, \
DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_sick);
DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_healthy);
+TRACE_EVENT(xfs_iwalk_ag,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+ xfs_agino_t startino),
+ TP_ARGS(mp, agno, startino),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agino_t, startino)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->startino = startino;
+ ),
+ TP_printk("dev %d:%d agno %d startino %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
+ __entry->startino)
+)
+
+TRACE_EVENT(xfs_iwalk_ag_rec,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+ struct xfs_inobt_rec_incore *irec),
+ TP_ARGS(mp, agno, irec),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agino_t, startino)
+ __field(uint64_t, freemask)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->startino = irec->ir_startino;
+ __entry->freemask = irec->ir_free;
+ ),
+ TP_printk("dev %d:%d agno %d startino %u freemask 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
+ __entry->startino, __entry->freemask)
+)
+
+TRACE_EVENT(xfs_pwork_init,
+ TP_PROTO(struct xfs_mount *mp, unsigned int nr_threads, pid_t pid),
+ TP_ARGS(mp, nr_threads, pid),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned int, nr_threads)
+ __field(pid_t, pid)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->nr_threads = nr_threads;
+ __entry->pid = pid;
+ ),
+ TP_printk("dev %d:%d nr_threads %u pid %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->nr_threads, __entry->pid)
+)
+
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 912b42f5fe4a..d42a68d8313b 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -11,7 +11,6 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_inode.h"
#include "xfs_extent_busy.h"
#include "xfs_quota.h"
#include "xfs_trans.h"
@@ -264,9 +263,7 @@ xfs_trans_alloc(
* GFP_NOFS allocation context so that we avoid lockdep false positives
* by doing GFP_KERNEL allocations inside sb_start_intwrite().
*/
- tp = kmem_zone_zalloc(xfs_trans_zone,
- (flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP);
-
+ tp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP);
if (!(flags & XFS_TRANS_NO_WRITECOUNT))
sb_start_intwrite(mp->m_super);
@@ -452,7 +449,7 @@ xfs_trans_apply_sb_deltas(
xfs_buf_t *bp;
int whole = 0;
- bp = xfs_trans_getsb(tp, tp->t_mountp, 0);
+ bp = xfs_trans_getsb(tp, tp->t_mountp);
sbp = XFS_BUF_TO_SBP(bp);
/*
@@ -767,10 +764,9 @@ xfs_trans_del_item(
}
/* Detach and unlock all of the items in a transaction */
-void
+static void
xfs_trans_free_items(
struct xfs_trans *tp,
- xfs_lsn_t commit_lsn,
bool abort)
{
struct xfs_log_item *lip, *next;
@@ -779,11 +775,10 @@ xfs_trans_free_items(
list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
xfs_trans_del_item(lip);
- if (commit_lsn != NULLCOMMITLSN)
- lip->li_ops->iop_committing(lip, commit_lsn);
if (abort)
set_bit(XFS_LI_ABORTED, &lip->li_flags);
- lip->li_ops->iop_unlock(lip);
+ if (lip->li_ops->iop_release)
+ lip->li_ops->iop_release(lip);
}
}
@@ -804,7 +799,8 @@ xfs_log_item_batch_insert(
for (i = 0; i < nr_items; i++) {
struct xfs_log_item *lip = log_items[i];
- lip->li_ops->iop_unpin(lip, 0);
+ if (lip->li_ops->iop_unpin)
+ lip->li_ops->iop_unpin(lip, 0);
}
}
@@ -815,7 +811,7 @@ xfs_log_item_batch_insert(
*
* If we are called with the aborted flag set, it is because a log write during
* a CIL checkpoint commit has failed. In this case, all the items in the
- * checkpoint have already gone through iop_commited and iop_unlock, which
+ * checkpoint have already gone through iop_committed and iop_committing, which
* means that checkpoint commit abort handling is treated exactly the same
* as an iclog write error even though we haven't started any IO yet. Hence in
* this case all we need to do is iop_committed processing, followed by an
@@ -833,7 +829,7 @@ xfs_trans_committed_bulk(
struct xfs_ail *ailp,
struct xfs_log_vec *log_vector,
xfs_lsn_t commit_lsn,
- int aborted)
+ bool aborted)
{
#define LOG_ITEM_BATCH_SIZE 32
struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE];
@@ -852,7 +848,16 @@ xfs_trans_committed_bulk(
if (aborted)
set_bit(XFS_LI_ABORTED, &lip->li_flags);
- item_lsn = lip->li_ops->iop_committed(lip, commit_lsn);
+
+ if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) {
+ lip->li_ops->iop_release(lip);
+ continue;
+ }
+
+ if (lip->li_ops->iop_committed)
+ item_lsn = lip->li_ops->iop_committed(lip, commit_lsn);
+ else
+ item_lsn = commit_lsn;
/* item_lsn of -1 means the item needs no further processing */
if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
@@ -864,7 +869,8 @@ xfs_trans_committed_bulk(
*/
if (aborted) {
ASSERT(XFS_FORCED_SHUTDOWN(ailp->ail_mount));
- lip->li_ops->iop_unpin(lip, 1);
+ if (lip->li_ops->iop_unpin)
+ lip->li_ops->iop_unpin(lip, 1);
continue;
}
@@ -882,7 +888,8 @@ xfs_trans_committed_bulk(
xfs_trans_ail_update(ailp, lip, item_lsn);
else
spin_unlock(&ailp->ail_lock);
- lip->li_ops->iop_unpin(lip, 0);
+ if (lip->li_ops->iop_unpin)
+ lip->li_ops->iop_unpin(lip, 0);
continue;
}
@@ -998,7 +1005,7 @@ out_unreserve:
tp->t_ticket = NULL;
}
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
- xfs_trans_free_items(tp, NULLCOMMITLSN, !!error);
+ xfs_trans_free_items(tp, !!error);
xfs_trans_free(tp);
XFS_STATS_INC(mp, xs_trans_empty);
@@ -1060,7 +1067,7 @@ xfs_trans_cancel(
/* mark this thread as no longer being in a transaction */
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
- xfs_trans_free_items(tp, NULLCOMMITLSN, dirty);
+ xfs_trans_free_items(tp, dirty);
xfs_trans_free(tp);
}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index c6e1c5704a8c..64d7f171ebd3 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -27,7 +27,7 @@ struct xfs_cud_log_item;
struct xfs_bui_log_item;
struct xfs_bud_log_item;
-typedef struct xfs_log_item {
+struct xfs_log_item {
struct list_head li_ail; /* AIL pointers */
struct list_head li_trans; /* transaction list */
xfs_lsn_t li_lsn; /* last on-disk lsn */
@@ -48,7 +48,7 @@ typedef struct xfs_log_item {
struct xfs_log_vec *li_lv; /* active log vector */
struct xfs_log_vec *li_lv_shadow; /* standby vector */
xfs_lsn_t li_seq; /* CIL commit seq */
-} xfs_log_item_t;
+};
/*
* li_flags use the (set/test/clear)_bit atomic interfaces because updates can
@@ -67,17 +67,24 @@ typedef struct xfs_log_item {
{ (1 << XFS_LI_DIRTY), "DIRTY" }
struct xfs_item_ops {
- void (*iop_size)(xfs_log_item_t *, int *, int *);
- void (*iop_format)(xfs_log_item_t *, struct xfs_log_vec *);
- void (*iop_pin)(xfs_log_item_t *);
- void (*iop_unpin)(xfs_log_item_t *, int remove);
+ unsigned flags;
+ void (*iop_size)(struct xfs_log_item *, int *, int *);
+ void (*iop_format)(struct xfs_log_item *, struct xfs_log_vec *);
+ void (*iop_pin)(struct xfs_log_item *);
+ void (*iop_unpin)(struct xfs_log_item *, int remove);
uint (*iop_push)(struct xfs_log_item *, struct list_head *);
- void (*iop_unlock)(xfs_log_item_t *);
- xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
- void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
- void (*iop_error)(xfs_log_item_t *, xfs_buf_t *);
+ void (*iop_committing)(struct xfs_log_item *, xfs_lsn_t commit_lsn);
+ void (*iop_release)(struct xfs_log_item *);
+ xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t);
+ void (*iop_error)(struct xfs_log_item *, xfs_buf_t *);
};
+/*
+ * Release the log item as soon as committed. This is for items just logging
+ * intents that never need to be written back in place.
+ */
+#define XFS_ITEM_RELEASE_WHEN_COMMITTED (1 << 0)
+
void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item,
int type, const struct xfs_item_ops *ops);
@@ -203,7 +210,7 @@ xfs_trans_read_buf(
flags, bpp, ops);
}
-struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int);
+struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *);
void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *);
@@ -223,14 +230,6 @@ void xfs_trans_dirty_buf(struct xfs_trans *, struct xfs_buf *);
bool xfs_trans_buf_is_dirty(struct xfs_buf *bp);
void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
-struct xfs_efd_log_item *xfs_trans_get_efd(struct xfs_trans *,
- struct xfs_efi_log_item *,
- uint);
-int xfs_trans_free_extent(struct xfs_trans *,
- struct xfs_efd_log_item *, xfs_fsblock_t,
- xfs_extlen_t,
- const struct xfs_owner_info *,
- bool);
int xfs_trans_commit(struct xfs_trans *);
int xfs_trans_roll(struct xfs_trans **);
int xfs_trans_roll_inode(struct xfs_trans **, struct xfs_inode *);
@@ -245,37 +244,4 @@ void xfs_trans_buf_copy_type(struct xfs_buf *dst_bp,
extern kmem_zone_t *xfs_trans_zone;
-/* rmap updates */
-enum xfs_rmap_intent_type;
-
-struct xfs_rud_log_item *xfs_trans_get_rud(struct xfs_trans *tp,
- struct xfs_rui_log_item *ruip);
-int xfs_trans_log_finish_rmap_update(struct xfs_trans *tp,
- struct xfs_rud_log_item *rudp, enum xfs_rmap_intent_type type,
- uint64_t owner, int whichfork, xfs_fileoff_t startoff,
- xfs_fsblock_t startblock, xfs_filblks_t blockcount,
- xfs_exntst_t state, struct xfs_btree_cur **pcur);
-
-/* refcount updates */
-enum xfs_refcount_intent_type;
-
-struct xfs_cud_log_item *xfs_trans_get_cud(struct xfs_trans *tp,
- struct xfs_cui_log_item *cuip);
-int xfs_trans_log_finish_refcount_update(struct xfs_trans *tp,
- struct xfs_cud_log_item *cudp,
- enum xfs_refcount_intent_type type, xfs_fsblock_t startblock,
- xfs_extlen_t blockcount, xfs_fsblock_t *new_fsb,
- xfs_extlen_t *new_len, struct xfs_btree_cur **pcur);
-
-/* mapping updates */
-enum xfs_bmap_intent_type;
-
-struct xfs_bud_log_item *xfs_trans_get_bud(struct xfs_trans *tp,
- struct xfs_bui_log_item *buip);
-int xfs_trans_log_finish_bmap_update(struct xfs_trans *tp,
- struct xfs_bud_log_item *rudp, enum xfs_bmap_intent_type type,
- struct xfs_inode *ip, int whichfork, xfs_fileoff_t startoff,
- xfs_fsblock_t startblock, xfs_filblks_t *blockcount,
- xfs_exntst_t state);
-
#endif /* __XFS_TRANS_H__ */
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index d3a4e89bf4a0..6ccfd75d3c24 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -6,6 +6,7 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
@@ -74,29 +75,29 @@ xfs_ail_check(
* Return a pointer to the last item in the AIL. If the AIL is empty, then
* return NULL.
*/
-static xfs_log_item_t *
+static struct xfs_log_item *
xfs_ail_max(
struct xfs_ail *ailp)
{
if (list_empty(&ailp->ail_head))
return NULL;
- return list_entry(ailp->ail_head.prev, xfs_log_item_t, li_ail);
+ return list_entry(ailp->ail_head.prev, struct xfs_log_item, li_ail);
}
/*
* Return a pointer to the item which follows the given item in the AIL. If
* the given item is the last item in the list, then return NULL.
*/
-static xfs_log_item_t *
+static struct xfs_log_item *
xfs_ail_next(
- struct xfs_ail *ailp,
- xfs_log_item_t *lip)
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip)
{
if (lip->li_ail.next == &ailp->ail_head)
return NULL;
- return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail);
+ return list_first_entry(&lip->li_ail, struct xfs_log_item, li_ail);
}
/*
@@ -109,10 +110,10 @@ xfs_ail_next(
*/
xfs_lsn_t
xfs_ail_min_lsn(
- struct xfs_ail *ailp)
+ struct xfs_ail *ailp)
{
- xfs_lsn_t lsn = 0;
- xfs_log_item_t *lip;
+ xfs_lsn_t lsn = 0;
+ struct xfs_log_item *lip;
spin_lock(&ailp->ail_lock);
lip = xfs_ail_min(ailp);
@@ -128,10 +129,10 @@ xfs_ail_min_lsn(
*/
static xfs_lsn_t
xfs_ail_max_lsn(
- struct xfs_ail *ailp)
+ struct xfs_ail *ailp)
{
- xfs_lsn_t lsn = 0;
- xfs_log_item_t *lip;
+ xfs_lsn_t lsn = 0;
+ struct xfs_log_item *lip;
spin_lock(&ailp->ail_lock);
lip = xfs_ail_max(ailp);
@@ -216,13 +217,13 @@ xfs_trans_ail_cursor_clear(
* ascending traversal. Pass a @lsn of zero to initialise the cursor to the
* first item in the AIL. Returns NULL if the list is empty.
*/
-xfs_log_item_t *
+struct xfs_log_item *
xfs_trans_ail_cursor_first(
struct xfs_ail *ailp,
struct xfs_ail_cursor *cur,
xfs_lsn_t lsn)
{
- xfs_log_item_t *lip;
+ struct xfs_log_item *lip;
xfs_trans_ail_cursor_init(ailp, cur);
@@ -248,7 +249,7 @@ __xfs_trans_ail_cursor_last(
struct xfs_ail *ailp,
xfs_lsn_t lsn)
{
- xfs_log_item_t *lip;
+ struct xfs_log_item *lip;
list_for_each_entry_reverse(lip, &ailp->ail_head, li_ail) {
if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0)
@@ -327,8 +328,8 @@ xfs_ail_splice(
*/
static void
xfs_ail_delete(
- struct xfs_ail *ailp,
- xfs_log_item_t *lip)
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip)
{
xfs_ail_check(ailp, lip);
list_del(&lip->li_ail);
@@ -347,6 +348,14 @@ xfsaild_push_item(
if (XFS_TEST_ERROR(false, ailp->ail_mount, XFS_ERRTAG_LOG_ITEM_PIN))
return XFS_ITEM_PINNED;
+ /*
+ * Consider the item pinned if a push callback is not defined so the
+ * caller will force the log. This should only happen for intent items
+ * as they are unpinned once the associated done item is committed to
+ * the on-disk log.
+ */
+ if (!lip->li_ops->iop_push)
+ return XFS_ITEM_PINNED;
return lip->li_ops->iop_push(lip, &ailp->ail_buf_list);
}
@@ -356,7 +365,7 @@ xfsaild_push(
{
xfs_mount_t *mp = ailp->ail_mount;
struct xfs_ail_cursor cur;
- xfs_log_item_t *lip;
+ struct xfs_log_item *lip;
xfs_lsn_t lsn;
xfs_lsn_t target;
long tout;
@@ -611,10 +620,10 @@ xfsaild(
*/
void
xfs_ail_push(
- struct xfs_ail *ailp,
- xfs_lsn_t threshold_lsn)
+ struct xfs_ail *ailp,
+ xfs_lsn_t threshold_lsn)
{
- xfs_log_item_t *lip;
+ struct xfs_log_item *lip;
lip = xfs_ail_min(ailp);
if (!lip || XFS_FORCED_SHUTDOWN(ailp->ail_mount) ||
@@ -699,7 +708,7 @@ xfs_trans_ail_update_bulk(
int nr_items,
xfs_lsn_t lsn) __releases(ailp->ail_lock)
{
- xfs_log_item_t *mlip;
+ struct xfs_log_item *mlip;
int mlip_changed = 0;
int i;
LIST_HEAD(tmp);
diff --git a/fs/xfs/xfs_trans_bmap.c b/fs/xfs/xfs_trans_bmap.c
deleted file mode 100644
index e1c7d55b32c3..000000000000
--- a/fs/xfs/xfs_trans_bmap.c
+++ /dev/null
@@ -1,232 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (C) 2016 Oracle. All Rights Reserved.
- * Author: Darrick J. Wong <darrick.wong@oracle.com>
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_bmap_item.h"
-#include "xfs_alloc.h"
-#include "xfs_bmap.h"
-#include "xfs_inode.h"
-
-/*
- * This routine is called to allocate a "bmap update done"
- * log item.
- */
-struct xfs_bud_log_item *
-xfs_trans_get_bud(
- struct xfs_trans *tp,
- struct xfs_bui_log_item *buip)
-{
- struct xfs_bud_log_item *budp;
-
- budp = xfs_bud_init(tp->t_mountp, buip);
- xfs_trans_add_item(tp, &budp->bud_item);
- return budp;
-}
-
-/*
- * Finish an bmap update and log it to the BUD. Note that the
- * transaction is marked dirty regardless of whether the bmap update
- * succeeds or fails to support the BUI/BUD lifecycle rules.
- */
-int
-xfs_trans_log_finish_bmap_update(
- struct xfs_trans *tp,
- struct xfs_bud_log_item *budp,
- enum xfs_bmap_intent_type type,
- struct xfs_inode *ip,
- int whichfork,
- xfs_fileoff_t startoff,
- xfs_fsblock_t startblock,
- xfs_filblks_t *blockcount,
- xfs_exntst_t state)
-{
- int error;
-
- error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff,
- startblock, blockcount, state);
-
- /*
- * Mark the transaction dirty, even on error. This ensures the
- * transaction is aborted, which:
- *
- * 1.) releases the BUI and frees the BUD
- * 2.) shuts down the filesystem
- */
- tp->t_flags |= XFS_TRANS_DIRTY;
- set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags);
-
- return error;
-}
-
-/* Sort bmap intents by inode. */
-static int
-xfs_bmap_update_diff_items(
- void *priv,
- struct list_head *a,
- struct list_head *b)
-{
- struct xfs_bmap_intent *ba;
- struct xfs_bmap_intent *bb;
-
- ba = container_of(a, struct xfs_bmap_intent, bi_list);
- bb = container_of(b, struct xfs_bmap_intent, bi_list);
- return ba->bi_owner->i_ino - bb->bi_owner->i_ino;
-}
-
-/* Get an BUI. */
-STATIC void *
-xfs_bmap_update_create_intent(
- struct xfs_trans *tp,
- unsigned int count)
-{
- struct xfs_bui_log_item *buip;
-
- ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS);
- ASSERT(tp != NULL);
-
- buip = xfs_bui_init(tp->t_mountp);
- ASSERT(buip != NULL);
-
- /*
- * Get a log_item_desc to point at the new item.
- */
- xfs_trans_add_item(tp, &buip->bui_item);
- return buip;
-}
-
-/* Set the map extent flags for this mapping. */
-static void
-xfs_trans_set_bmap_flags(
- struct xfs_map_extent *bmap,
- enum xfs_bmap_intent_type type,
- int whichfork,
- xfs_exntst_t state)
-{
- bmap->me_flags = 0;
- switch (type) {
- case XFS_BMAP_MAP:
- case XFS_BMAP_UNMAP:
- bmap->me_flags = type;
- break;
- default:
- ASSERT(0);
- }
- if (state == XFS_EXT_UNWRITTEN)
- bmap->me_flags |= XFS_BMAP_EXTENT_UNWRITTEN;
- if (whichfork == XFS_ATTR_FORK)
- bmap->me_flags |= XFS_BMAP_EXTENT_ATTR_FORK;
-}
-
-/* Log bmap updates in the intent item. */
-STATIC void
-xfs_bmap_update_log_item(
- struct xfs_trans *tp,
- void *intent,
- struct list_head *item)
-{
- struct xfs_bui_log_item *buip = intent;
- struct xfs_bmap_intent *bmap;
- uint next_extent;
- struct xfs_map_extent *map;
-
- bmap = container_of(item, struct xfs_bmap_intent, bi_list);
-
- tp->t_flags |= XFS_TRANS_DIRTY;
- set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags);
-
- /*
- * atomic_inc_return gives us the value after the increment;
- * we want to use it as an array index so we need to subtract 1 from
- * it.
- */
- next_extent = atomic_inc_return(&buip->bui_next_extent) - 1;
- ASSERT(next_extent < buip->bui_format.bui_nextents);
- map = &buip->bui_format.bui_extents[next_extent];
- map->me_owner = bmap->bi_owner->i_ino;
- map->me_startblock = bmap->bi_bmap.br_startblock;
- map->me_startoff = bmap->bi_bmap.br_startoff;
- map->me_len = bmap->bi_bmap.br_blockcount;
- xfs_trans_set_bmap_flags(map, bmap->bi_type, bmap->bi_whichfork,
- bmap->bi_bmap.br_state);
-}
-
-/* Get an BUD so we can process all the deferred rmap updates. */
-STATIC void *
-xfs_bmap_update_create_done(
- struct xfs_trans *tp,
- void *intent,
- unsigned int count)
-{
- return xfs_trans_get_bud(tp, intent);
-}
-
-/* Process a deferred rmap update. */
-STATIC int
-xfs_bmap_update_finish_item(
- struct xfs_trans *tp,
- struct list_head *item,
- void *done_item,
- void **state)
-{
- struct xfs_bmap_intent *bmap;
- xfs_filblks_t count;
- int error;
-
- bmap = container_of(item, struct xfs_bmap_intent, bi_list);
- count = bmap->bi_bmap.br_blockcount;
- error = xfs_trans_log_finish_bmap_update(tp, done_item,
- bmap->bi_type,
- bmap->bi_owner, bmap->bi_whichfork,
- bmap->bi_bmap.br_startoff,
- bmap->bi_bmap.br_startblock,
- &count,
- bmap->bi_bmap.br_state);
- if (!error && count > 0) {
- ASSERT(bmap->bi_type == XFS_BMAP_UNMAP);
- bmap->bi_bmap.br_blockcount = count;
- return -EAGAIN;
- }
- kmem_free(bmap);
- return error;
-}
-
-/* Abort all pending BUIs. */
-STATIC void
-xfs_bmap_update_abort_intent(
- void *intent)
-{
- xfs_bui_release(intent);
-}
-
-/* Cancel a deferred rmap update. */
-STATIC void
-xfs_bmap_update_cancel_item(
- struct list_head *item)
-{
- struct xfs_bmap_intent *bmap;
-
- bmap = container_of(item, struct xfs_bmap_intent, bi_list);
- kmem_free(bmap);
-}
-
-const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
- .max_items = XFS_BUI_MAX_FAST_EXTENTS,
- .diff_items = xfs_bmap_update_diff_items,
- .create_intent = xfs_bmap_update_create_intent,
- .abort_intent = xfs_bmap_update_abort_intent,
- .log_item = xfs_bmap_update_log_item,
- .create_done = xfs_bmap_update_create_done,
- .finish_item = xfs_bmap_update_finish_item,
- .cancel_item = xfs_bmap_update_cancel_item,
-};
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 7d65ebf1e847..b5b3a78ef31c 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -10,11 +10,9 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_trans_priv.h"
-#include "xfs_error.h"
#include "xfs_trace.h"
/*
@@ -174,8 +172,7 @@ xfs_trans_get_buf_map(
xfs_buf_t *
xfs_trans_getsb(
xfs_trans_t *tp,
- struct xfs_mount *mp,
- int flags)
+ struct xfs_mount *mp)
{
xfs_buf_t *bp;
struct xfs_buf_log_item *bip;
@@ -185,7 +182,7 @@ xfs_trans_getsb(
* if tp is NULL.
*/
if (tp == NULL)
- return xfs_getsb(mp, flags);
+ return xfs_getsb(mp);
/*
* If the superblock buffer already has this transaction
@@ -203,7 +200,7 @@ xfs_trans_getsb(
return bp;
}
- bp = xfs_getsb(mp, flags);
+ bp = xfs_getsb(mp);
if (bp == NULL)
return NULL;
@@ -428,7 +425,7 @@ xfs_trans_brelse(
/*
* Mark the buffer as not needing to be unlocked when the buf item's
- * iop_unlock() routine is called. The buffer must already be locked
+ * iop_committing() routine is called. The buffer must already be locked
* and associated with the given transaction.
*/
/* ARGSUSED */
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index cd664a03613f..1027c9ca6eb8 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -11,7 +11,6 @@
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
-#include "xfs_error.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
#include "xfs_quota.h"
@@ -29,7 +28,6 @@ xfs_trans_dqjoin(
xfs_trans_t *tp,
xfs_dquot_t *dqp)
{
- ASSERT(dqp->q_transp != tp);
ASSERT(XFS_DQ_IS_LOCKED(dqp));
ASSERT(dqp->q_logitem.qli_dquot == dqp);
@@ -37,15 +35,8 @@ xfs_trans_dqjoin(
* Get a log_item_desc to point at the new item.
*/
xfs_trans_add_item(tp, &dqp->q_logitem.qli_item);
-
- /*
- * Initialize d_transp so we can later determine if this dquot is
- * associated with this transaction.
- */
- dqp->q_transp = tp;
}
-
/*
* This is called to mark the dquot as needing
* to be logged when the transaction is committed. The dquot must
@@ -61,7 +52,6 @@ xfs_trans_log_dquot(
xfs_trans_t *tp,
xfs_dquot_t *dqp)
{
- ASSERT(dqp->q_transp == tp);
ASSERT(XFS_DQ_IS_LOCKED(dqp));
tp->t_flags |= XFS_TRANS_DIRTY;
@@ -347,7 +337,6 @@ xfs_trans_apply_dquot_deltas(
break;
ASSERT(XFS_DQ_IS_LOCKED(dqp));
- ASSERT(dqp->q_transp == tp);
/*
* adjust the actual number of blocks used
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c
deleted file mode 100644
index 8ee7a3f8bb20..000000000000
--- a/fs/xfs/xfs_trans_extfree.c
+++ /dev/null
@@ -1,286 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2000,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_extfree_item.h"
-#include "xfs_alloc.h"
-#include "xfs_bmap.h"
-#include "xfs_trace.h"
-
-/*
- * This routine is called to allocate an "extent free done"
- * log item that will hold nextents worth of extents. The
- * caller must use all nextents extents, because we are not
- * flexible about this at all.
- */
-struct xfs_efd_log_item *
-xfs_trans_get_efd(struct xfs_trans *tp,
- struct xfs_efi_log_item *efip,
- uint nextents)
-{
- struct xfs_efd_log_item *efdp;
-
- ASSERT(tp != NULL);
- ASSERT(nextents > 0);
-
- efdp = xfs_efd_init(tp->t_mountp, efip, nextents);
- ASSERT(efdp != NULL);
-
- /*
- * Get a log_item_desc to point at the new item.
- */
- xfs_trans_add_item(tp, &efdp->efd_item);
- return efdp;
-}
-
-/*
- * Free an extent and log it to the EFD. Note that the transaction is marked
- * dirty regardless of whether the extent free succeeds or fails to support the
- * EFI/EFD lifecycle rules.
- */
-int
-xfs_trans_free_extent(
- struct xfs_trans *tp,
- struct xfs_efd_log_item *efdp,
- xfs_fsblock_t start_block,
- xfs_extlen_t ext_len,
- const struct xfs_owner_info *oinfo,
- bool skip_discard)
-{
- struct xfs_mount *mp = tp->t_mountp;
- struct xfs_extent *extp;
- uint next_extent;
- xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block);
- xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp,
- start_block);
- int error;
-
- trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len);
-
- error = __xfs_free_extent(tp, start_block, ext_len,
- oinfo, XFS_AG_RESV_NONE, skip_discard);
- /*
- * Mark the transaction dirty, even on error. This ensures the
- * transaction is aborted, which:
- *
- * 1.) releases the EFI and frees the EFD
- * 2.) shuts down the filesystem
- */
- tp->t_flags |= XFS_TRANS_DIRTY;
- set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
-
- next_extent = efdp->efd_next_extent;
- ASSERT(next_extent < efdp->efd_format.efd_nextents);
- extp = &(efdp->efd_format.efd_extents[next_extent]);
- extp->ext_start = start_block;
- extp->ext_len = ext_len;
- efdp->efd_next_extent++;
-
- return error;
-}
-
-/* Sort bmap items by AG. */
-static int
-xfs_extent_free_diff_items(
- void *priv,
- struct list_head *a,
- struct list_head *b)
-{
- struct xfs_mount *mp = priv;
- struct xfs_extent_free_item *ra;
- struct xfs_extent_free_item *rb;
-
- ra = container_of(a, struct xfs_extent_free_item, xefi_list);
- rb = container_of(b, struct xfs_extent_free_item, xefi_list);
- return XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) -
- XFS_FSB_TO_AGNO(mp, rb->xefi_startblock);
-}
-
-/* Get an EFI. */
-STATIC void *
-xfs_extent_free_create_intent(
- struct xfs_trans *tp,
- unsigned int count)
-{
- struct xfs_efi_log_item *efip;
-
- ASSERT(tp != NULL);
- ASSERT(count > 0);
-
- efip = xfs_efi_init(tp->t_mountp, count);
- ASSERT(efip != NULL);
-
- /*
- * Get a log_item_desc to point at the new item.
- */
- xfs_trans_add_item(tp, &efip->efi_item);
- return efip;
-}
-
-/* Log a free extent to the intent item. */
-STATIC void
-xfs_extent_free_log_item(
- struct xfs_trans *tp,
- void *intent,
- struct list_head *item)
-{
- struct xfs_efi_log_item *efip = intent;
- struct xfs_extent_free_item *free;
- uint next_extent;
- struct xfs_extent *extp;
-
- free = container_of(item, struct xfs_extent_free_item, xefi_list);
-
- tp->t_flags |= XFS_TRANS_DIRTY;
- set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags);
-
- /*
- * atomic_inc_return gives us the value after the increment;
- * we want to use it as an array index so we need to subtract 1 from
- * it.
- */
- next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
- ASSERT(next_extent < efip->efi_format.efi_nextents);
- extp = &efip->efi_format.efi_extents[next_extent];
- extp->ext_start = free->xefi_startblock;
- extp->ext_len = free->xefi_blockcount;
-}
-
-/* Get an EFD so we can process all the free extents. */
-STATIC void *
-xfs_extent_free_create_done(
- struct xfs_trans *tp,
- void *intent,
- unsigned int count)
-{
- return xfs_trans_get_efd(tp, intent, count);
-}
-
-/* Process a free extent. */
-STATIC int
-xfs_extent_free_finish_item(
- struct xfs_trans *tp,
- struct list_head *item,
- void *done_item,
- void **state)
-{
- struct xfs_extent_free_item *free;
- int error;
-
- free = container_of(item, struct xfs_extent_free_item, xefi_list);
- error = xfs_trans_free_extent(tp, done_item,
- free->xefi_startblock,
- free->xefi_blockcount,
- &free->xefi_oinfo, free->xefi_skip_discard);
- kmem_free(free);
- return error;
-}
-
-/* Abort all pending EFIs. */
-STATIC void
-xfs_extent_free_abort_intent(
- void *intent)
-{
- xfs_efi_release(intent);
-}
-
-/* Cancel a free extent. */
-STATIC void
-xfs_extent_free_cancel_item(
- struct list_head *item)
-{
- struct xfs_extent_free_item *free;
-
- free = container_of(item, struct xfs_extent_free_item, xefi_list);
- kmem_free(free);
-}
-
-const struct xfs_defer_op_type xfs_extent_free_defer_type = {
- .max_items = XFS_EFI_MAX_FAST_EXTENTS,
- .diff_items = xfs_extent_free_diff_items,
- .create_intent = xfs_extent_free_create_intent,
- .abort_intent = xfs_extent_free_abort_intent,
- .log_item = xfs_extent_free_log_item,
- .create_done = xfs_extent_free_create_done,
- .finish_item = xfs_extent_free_finish_item,
- .cancel_item = xfs_extent_free_cancel_item,
-};
-
-/*
- * AGFL blocks are accounted differently in the reserve pools and are not
- * inserted into the busy extent list.
- */
-STATIC int
-xfs_agfl_free_finish_item(
- struct xfs_trans *tp,
- struct list_head *item,
- void *done_item,
- void **state)
-{
- struct xfs_mount *mp = tp->t_mountp;
- struct xfs_efd_log_item *efdp = done_item;
- struct xfs_extent_free_item *free;
- struct xfs_extent *extp;
- struct xfs_buf *agbp;
- int error;
- xfs_agnumber_t agno;
- xfs_agblock_t agbno;
- uint next_extent;
-
- free = container_of(item, struct xfs_extent_free_item, xefi_list);
- ASSERT(free->xefi_blockcount == 1);
- agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock);
- agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock);
-
- trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount);
-
- error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
- if (!error)
- error = xfs_free_agfl_block(tp, agno, agbno, agbp,
- &free->xefi_oinfo);
-
- /*
- * Mark the transaction dirty, even on error. This ensures the
- * transaction is aborted, which:
- *
- * 1.) releases the EFI and frees the EFD
- * 2.) shuts down the filesystem
- */
- tp->t_flags |= XFS_TRANS_DIRTY;
- set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
-
- next_extent = efdp->efd_next_extent;
- ASSERT(next_extent < efdp->efd_format.efd_nextents);
- extp = &(efdp->efd_format.efd_extents[next_extent]);
- extp->ext_start = free->xefi_startblock;
- extp->ext_len = free->xefi_blockcount;
- efdp->efd_next_extent++;
-
- kmem_free(free);
- return error;
-}
-
-
-/* sub-type with special handling for AGFL deferred frees */
-const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
- .max_items = XFS_EFI_MAX_FAST_EXTENTS,
- .diff_items = xfs_extent_free_diff_items,
- .create_intent = xfs_extent_free_create_intent,
- .abort_intent = xfs_extent_free_abort_intent,
- .log_item = xfs_extent_free_log_item,
- .create_done = xfs_extent_free_create_done,
- .finish_item = xfs_agfl_free_finish_item,
- .cancel_item = xfs_extent_free_cancel_item,
-};
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 542927321a61..93d14e47269d 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -8,13 +8,10 @@
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
#include "xfs_inode_item.h"
-#include "xfs_trace.h"
#include <linux/iversion.h>
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 091eae9f4e74..2e073c1c4614 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -16,12 +16,10 @@ struct xfs_log_vec;
void xfs_trans_init(struct xfs_mount *);
void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
void xfs_trans_del_item(struct xfs_log_item *);
-void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
- bool abort);
void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv,
- xfs_lsn_t commit_lsn, int aborted);
+ xfs_lsn_t commit_lsn, bool aborted);
/*
* AIL traversal cursor.
*
diff --git a/fs/xfs/xfs_trans_refcount.c b/fs/xfs/xfs_trans_refcount.c
deleted file mode 100644
index 8d734728dd1b..000000000000
--- a/fs/xfs/xfs_trans_refcount.c
+++ /dev/null
@@ -1,240 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (C) 2016 Oracle. All Rights Reserved.
- * Author: Darrick J. Wong <darrick.wong@oracle.com>
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_refcount_item.h"
-#include "xfs_alloc.h"
-#include "xfs_refcount.h"
-
-/*
- * This routine is called to allocate a "refcount update done"
- * log item.
- */
-struct xfs_cud_log_item *
-xfs_trans_get_cud(
- struct xfs_trans *tp,
- struct xfs_cui_log_item *cuip)
-{
- struct xfs_cud_log_item *cudp;
-
- cudp = xfs_cud_init(tp->t_mountp, cuip);
- xfs_trans_add_item(tp, &cudp->cud_item);
- return cudp;
-}
-
-/*
- * Finish an refcount update and log it to the CUD. Note that the
- * transaction is marked dirty regardless of whether the refcount
- * update succeeds or fails to support the CUI/CUD lifecycle rules.
- */
-int
-xfs_trans_log_finish_refcount_update(
- struct xfs_trans *tp,
- struct xfs_cud_log_item *cudp,
- enum xfs_refcount_intent_type type,
- xfs_fsblock_t startblock,
- xfs_extlen_t blockcount,
- xfs_fsblock_t *new_fsb,
- xfs_extlen_t *new_len,
- struct xfs_btree_cur **pcur)
-{
- int error;
-
- error = xfs_refcount_finish_one(tp, type, startblock,
- blockcount, new_fsb, new_len, pcur);
-
- /*
- * Mark the transaction dirty, even on error. This ensures the
- * transaction is aborted, which:
- *
- * 1.) releases the CUI and frees the CUD
- * 2.) shuts down the filesystem
- */
- tp->t_flags |= XFS_TRANS_DIRTY;
- set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags);
-
- return error;
-}
-
-/* Sort refcount intents by AG. */
-static int
-xfs_refcount_update_diff_items(
- void *priv,
- struct list_head *a,
- struct list_head *b)
-{
- struct xfs_mount *mp = priv;
- struct xfs_refcount_intent *ra;
- struct xfs_refcount_intent *rb;
-
- ra = container_of(a, struct xfs_refcount_intent, ri_list);
- rb = container_of(b, struct xfs_refcount_intent, ri_list);
- return XFS_FSB_TO_AGNO(mp, ra->ri_startblock) -
- XFS_FSB_TO_AGNO(mp, rb->ri_startblock);
-}
-
-/* Get an CUI. */
-STATIC void *
-xfs_refcount_update_create_intent(
- struct xfs_trans *tp,
- unsigned int count)
-{
- struct xfs_cui_log_item *cuip;
-
- ASSERT(tp != NULL);
- ASSERT(count > 0);
-
- cuip = xfs_cui_init(tp->t_mountp, count);
- ASSERT(cuip != NULL);
-
- /*
- * Get a log_item_desc to point at the new item.
- */
- xfs_trans_add_item(tp, &cuip->cui_item);
- return cuip;
-}
-
-/* Set the phys extent flags for this reverse mapping. */
-static void
-xfs_trans_set_refcount_flags(
- struct xfs_phys_extent *refc,
- enum xfs_refcount_intent_type type)
-{
- refc->pe_flags = 0;
- switch (type) {
- case XFS_REFCOUNT_INCREASE:
- case XFS_REFCOUNT_DECREASE:
- case XFS_REFCOUNT_ALLOC_COW:
- case XFS_REFCOUNT_FREE_COW:
- refc->pe_flags |= type;
- break;
- default:
- ASSERT(0);
- }
-}
-
-/* Log refcount updates in the intent item. */
-STATIC void
-xfs_refcount_update_log_item(
- struct xfs_trans *tp,
- void *intent,
- struct list_head *item)
-{
- struct xfs_cui_log_item *cuip = intent;
- struct xfs_refcount_intent *refc;
- uint next_extent;
- struct xfs_phys_extent *ext;
-
- refc = container_of(item, struct xfs_refcount_intent, ri_list);
-
- tp->t_flags |= XFS_TRANS_DIRTY;
- set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags);
-
- /*
- * atomic_inc_return gives us the value after the increment;
- * we want to use it as an array index so we need to subtract 1 from
- * it.
- */
- next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1;
- ASSERT(next_extent < cuip->cui_format.cui_nextents);
- ext = &cuip->cui_format.cui_extents[next_extent];
- ext->pe_startblock = refc->ri_startblock;
- ext->pe_len = refc->ri_blockcount;
- xfs_trans_set_refcount_flags(ext, refc->ri_type);
-}
-
-/* Get an CUD so we can process all the deferred refcount updates. */
-STATIC void *
-xfs_refcount_update_create_done(
- struct xfs_trans *tp,
- void *intent,
- unsigned int count)
-{
- return xfs_trans_get_cud(tp, intent);
-}
-
-/* Process a deferred refcount update. */
-STATIC int
-xfs_refcount_update_finish_item(
- struct xfs_trans *tp,
- struct list_head *item,
- void *done_item,
- void **state)
-{
- struct xfs_refcount_intent *refc;
- xfs_fsblock_t new_fsb;
- xfs_extlen_t new_aglen;
- int error;
-
- refc = container_of(item, struct xfs_refcount_intent, ri_list);
- error = xfs_trans_log_finish_refcount_update(tp, done_item,
- refc->ri_type,
- refc->ri_startblock,
- refc->ri_blockcount,
- &new_fsb, &new_aglen,
- (struct xfs_btree_cur **)state);
- /* Did we run out of reservation? Requeue what we didn't finish. */
- if (!error && new_aglen > 0) {
- ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE ||
- refc->ri_type == XFS_REFCOUNT_DECREASE);
- refc->ri_startblock = new_fsb;
- refc->ri_blockcount = new_aglen;
- return -EAGAIN;
- }
- kmem_free(refc);
- return error;
-}
-
-/* Clean up after processing deferred refcounts. */
-STATIC void
-xfs_refcount_update_finish_cleanup(
- struct xfs_trans *tp,
- void *state,
- int error)
-{
- struct xfs_btree_cur *rcur = state;
-
- xfs_refcount_finish_one_cleanup(tp, rcur, error);
-}
-
-/* Abort all pending CUIs. */
-STATIC void
-xfs_refcount_update_abort_intent(
- void *intent)
-{
- xfs_cui_release(intent);
-}
-
-/* Cancel a deferred refcount update. */
-STATIC void
-xfs_refcount_update_cancel_item(
- struct list_head *item)
-{
- struct xfs_refcount_intent *refc;
-
- refc = container_of(item, struct xfs_refcount_intent, ri_list);
- kmem_free(refc);
-}
-
-const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
- .max_items = XFS_CUI_MAX_FAST_EXTENTS,
- .diff_items = xfs_refcount_update_diff_items,
- .create_intent = xfs_refcount_update_create_intent,
- .abort_intent = xfs_refcount_update_abort_intent,
- .log_item = xfs_refcount_update_log_item,
- .create_done = xfs_refcount_update_create_done,
- .finish_item = xfs_refcount_update_finish_item,
- .finish_cleanup = xfs_refcount_update_finish_cleanup,
- .cancel_item = xfs_refcount_update_cancel_item,
-};
diff --git a/fs/xfs/xfs_trans_rmap.c b/fs/xfs/xfs_trans_rmap.c
deleted file mode 100644
index 5c7936b1be13..000000000000
--- a/fs/xfs/xfs_trans_rmap.c
+++ /dev/null
@@ -1,257 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (C) 2016 Oracle. All Rights Reserved.
- * Author: Darrick J. Wong <darrick.wong@oracle.com>
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_rmap_item.h"
-#include "xfs_alloc.h"
-#include "xfs_rmap.h"
-
-/* Set the map extent flags for this reverse mapping. */
-static void
-xfs_trans_set_rmap_flags(
- struct xfs_map_extent *rmap,
- enum xfs_rmap_intent_type type,
- int whichfork,
- xfs_exntst_t state)
-{
- rmap->me_flags = 0;
- if (state == XFS_EXT_UNWRITTEN)
- rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN;
- if (whichfork == XFS_ATTR_FORK)
- rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK;
- switch (type) {
- case XFS_RMAP_MAP:
- rmap->me_flags |= XFS_RMAP_EXTENT_MAP;
- break;
- case XFS_RMAP_MAP_SHARED:
- rmap->me_flags |= XFS_RMAP_EXTENT_MAP_SHARED;
- break;
- case XFS_RMAP_UNMAP:
- rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP;
- break;
- case XFS_RMAP_UNMAP_SHARED:
- rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP_SHARED;
- break;
- case XFS_RMAP_CONVERT:
- rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT;
- break;
- case XFS_RMAP_CONVERT_SHARED:
- rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT_SHARED;
- break;
- case XFS_RMAP_ALLOC:
- rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC;
- break;
- case XFS_RMAP_FREE:
- rmap->me_flags |= XFS_RMAP_EXTENT_FREE;
- break;
- default:
- ASSERT(0);
- }
-}
-
-struct xfs_rud_log_item *
-xfs_trans_get_rud(
- struct xfs_trans *tp,
- struct xfs_rui_log_item *ruip)
-{
- struct xfs_rud_log_item *rudp;
-
- rudp = xfs_rud_init(tp->t_mountp, ruip);
- xfs_trans_add_item(tp, &rudp->rud_item);
- return rudp;
-}
-
-/*
- * Finish an rmap update and log it to the RUD. Note that the transaction is
- * marked dirty regardless of whether the rmap update succeeds or fails to
- * support the RUI/RUD lifecycle rules.
- */
-int
-xfs_trans_log_finish_rmap_update(
- struct xfs_trans *tp,
- struct xfs_rud_log_item *rudp,
- enum xfs_rmap_intent_type type,
- uint64_t owner,
- int whichfork,
- xfs_fileoff_t startoff,
- xfs_fsblock_t startblock,
- xfs_filblks_t blockcount,
- xfs_exntst_t state,
- struct xfs_btree_cur **pcur)
-{
- int error;
-
- error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff,
- startblock, blockcount, state, pcur);
-
- /*
- * Mark the transaction dirty, even on error. This ensures the
- * transaction is aborted, which:
- *
- * 1.) releases the RUI and frees the RUD
- * 2.) shuts down the filesystem
- */
- tp->t_flags |= XFS_TRANS_DIRTY;
- set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags);
-
- return error;
-}
-
-/* Sort rmap intents by AG. */
-static int
-xfs_rmap_update_diff_items(
- void *priv,
- struct list_head *a,
- struct list_head *b)
-{
- struct xfs_mount *mp = priv;
- struct xfs_rmap_intent *ra;
- struct xfs_rmap_intent *rb;
-
- ra = container_of(a, struct xfs_rmap_intent, ri_list);
- rb = container_of(b, struct xfs_rmap_intent, ri_list);
- return XFS_FSB_TO_AGNO(mp, ra->ri_bmap.br_startblock) -
- XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock);
-}
-
-/* Get an RUI. */
-STATIC void *
-xfs_rmap_update_create_intent(
- struct xfs_trans *tp,
- unsigned int count)
-{
- struct xfs_rui_log_item *ruip;
-
- ASSERT(tp != NULL);
- ASSERT(count > 0);
-
- ruip = xfs_rui_init(tp->t_mountp, count);
- ASSERT(ruip != NULL);
-
- /*
- * Get a log_item_desc to point at the new item.
- */
- xfs_trans_add_item(tp, &ruip->rui_item);
- return ruip;
-}
-
-/* Log rmap updates in the intent item. */
-STATIC void
-xfs_rmap_update_log_item(
- struct xfs_trans *tp,
- void *intent,
- struct list_head *item)
-{
- struct xfs_rui_log_item *ruip = intent;
- struct xfs_rmap_intent *rmap;
- uint next_extent;
- struct xfs_map_extent *map;
-
- rmap = container_of(item, struct xfs_rmap_intent, ri_list);
-
- tp->t_flags |= XFS_TRANS_DIRTY;
- set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags);
-
- /*
- * atomic_inc_return gives us the value after the increment;
- * we want to use it as an array index so we need to subtract 1 from
- * it.
- */
- next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1;
- ASSERT(next_extent < ruip->rui_format.rui_nextents);
- map = &ruip->rui_format.rui_extents[next_extent];
- map->me_owner = rmap->ri_owner;
- map->me_startblock = rmap->ri_bmap.br_startblock;
- map->me_startoff = rmap->ri_bmap.br_startoff;
- map->me_len = rmap->ri_bmap.br_blockcount;
- xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork,
- rmap->ri_bmap.br_state);
-}
-
-/* Get an RUD so we can process all the deferred rmap updates. */
-STATIC void *
-xfs_rmap_update_create_done(
- struct xfs_trans *tp,
- void *intent,
- unsigned int count)
-{
- return xfs_trans_get_rud(tp, intent);
-}
-
-/* Process a deferred rmap update. */
-STATIC int
-xfs_rmap_update_finish_item(
- struct xfs_trans *tp,
- struct list_head *item,
- void *done_item,
- void **state)
-{
- struct xfs_rmap_intent *rmap;
- int error;
-
- rmap = container_of(item, struct xfs_rmap_intent, ri_list);
- error = xfs_trans_log_finish_rmap_update(tp, done_item,
- rmap->ri_type,
- rmap->ri_owner, rmap->ri_whichfork,
- rmap->ri_bmap.br_startoff,
- rmap->ri_bmap.br_startblock,
- rmap->ri_bmap.br_blockcount,
- rmap->ri_bmap.br_state,
- (struct xfs_btree_cur **)state);
- kmem_free(rmap);
- return error;
-}
-
-/* Clean up after processing deferred rmaps. */
-STATIC void
-xfs_rmap_update_finish_cleanup(
- struct xfs_trans *tp,
- void *state,
- int error)
-{
- struct xfs_btree_cur *rcur = state;
-
- xfs_rmap_finish_one_cleanup(tp, rcur, error);
-}
-
-/* Abort all pending RUIs. */
-STATIC void
-xfs_rmap_update_abort_intent(
- void *intent)
-{
- xfs_rui_release(intent);
-}
-
-/* Cancel a deferred rmap update. */
-STATIC void
-xfs_rmap_update_cancel_item(
- struct list_head *item)
-{
- struct xfs_rmap_intent *rmap;
-
- rmap = container_of(item, struct xfs_rmap_intent, ri_list);
- kmem_free(rmap);
-}
-
-const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
- .max_items = XFS_RUI_MAX_FAST_EXTENTS,
- .diff_items = xfs_rmap_update_diff_items,
- .create_intent = xfs_rmap_update_create_intent,
- .abort_intent = xfs_rmap_update_abort_intent,
- .log_item = xfs_rmap_update_log_item,
- .create_done = xfs_rmap_update_create_done,
- .finish_item = xfs_rmap_update_finish_item,
- .finish_cleanup = xfs_rmap_update_finish_cleanup,
- .cancel_item = xfs_rmap_update_cancel_item,
-};
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index 9a63016009a1..3123b5aaad2a 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -5,15 +5,12 @@
*/
#include "xfs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
#include "xfs_da_format.h"
#include "xfs_inode.h"
#include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_acl.h"
#include <linux/posix_acl_xattr.h>
#include <linux/xattr.h>