summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2010-11-15 06:49:30 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2010-11-15 06:49:30 +0000
commitc94f28c383f58c9de74678e0f1624db9c5f8a8cb (patch)
tree3281184f026cb79cee6c20fe29c994ba654cbbe4 /fs
parentdf15315899c0641412bd54b29565a70b078a6ac8 (diff)
parent1bb95834bbcdc969e477a9284cf96c17a4c2616f (diff)
Merge branch 'drm-intel-fixes' into drm-intel-next
Conflicts: drivers/gpu/drm/i915/i915_gem.c drivers/gpu/drm/i915/intel_ringbuffer.c
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/Kconfig13
-rw-r--r--fs/9p/Makefile1
-rw-r--r--fs/9p/acl.c392
-rw-r--r--fs/9p/acl.h49
-rw-r--r--fs/9p/fid.c1
-rw-r--r--fs/9p/v9fs.c22
-rw-r--r--fs/9p/v9fs.h10
-rw-r--r--fs/9p/v9fs_vfs.h4
-rw-r--r--fs/9p/vfs_addr.c30
-rw-r--r--fs/9p/vfs_dir.c4
-rw-r--r--fs/9p/vfs_file.c265
-rw-r--r--fs/9p/vfs_inode.c253
-rw-r--r--fs/9p/vfs_super.c36
-rw-r--r--fs/9p/xattr.c52
-rw-r--r--fs/9p/xattr.h6
-rw-r--r--fs/Kconfig3
-rw-r--r--fs/Kconfig.binfmt4
-rw-r--r--fs/Makefile2
-rw-r--r--fs/adfs/super.c9
-rw-r--r--fs/affs/super.c9
-rw-r--r--fs/afs/super.c19
-rw-r--r--fs/anon_inodes.c10
-rw-r--r--fs/autofs/Kconfig22
-rw-r--r--fs/autofs/Makefile7
-rw-r--r--fs/autofs/autofs_i.h165
-rw-r--r--fs/autofs/dirhash.c250
-rw-r--r--fs/autofs/init.c52
-rw-r--r--fs/autofs/inode.c288
-rw-r--r--fs/autofs/root.c645
-rw-r--r--fs/autofs/symlink.c26
-rw-r--r--fs/autofs/waitq.c205
-rw-r--r--fs/autofs4/init.c8
-rw-r--r--fs/befs/linuxvfs.c11
-rw-r--r--fs/bfs/inode.c8
-rw-r--r--fs/binfmt_misc.c8
-rw-r--r--fs/bio.c23
-rw-r--r--fs/block_dev.c8
-rw-r--r--fs/btrfs/compression.c2
-rw-r--r--fs/btrfs/ctree.c57
-rw-r--r--fs/btrfs/ctree.h100
-rw-r--r--fs/btrfs/dir-item.c2
-rw-r--r--fs/btrfs/disk-io.c32
-rw-r--r--fs/btrfs/extent-tree.c694
-rw-r--r--fs/btrfs/extent_io.c168
-rw-r--r--fs/btrfs/extent_io.h4
-rw-r--r--fs/btrfs/extent_map.c4
-rw-r--r--fs/btrfs/free-space-cache.c751
-rw-r--r--fs/btrfs/free-space-cache.h18
-rw-r--r--fs/btrfs/inode.c202
-rw-r--r--fs/btrfs/ioctl.c398
-rw-r--r--fs/btrfs/ioctl.h13
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/relocation.c109
-rw-r--r--fs/btrfs/root-tree.c2
-rw-r--r--fs/btrfs/super.c57
-rw-r--r--fs/btrfs/transaction.c234
-rw-r--r--fs/btrfs/transaction.h8
-rw-r--r--fs/btrfs/tree-defrag.c2
-rw-r--r--fs/btrfs/tree-log.c17
-rw-r--r--fs/btrfs/volumes.c7
-rw-r--r--fs/btrfs/xattr.c2
-rw-r--r--fs/btrfs/zlib.c5
-rw-r--r--fs/ceph/super.c50
-rw-r--r--fs/cifs/Kconfig3
-rw-r--r--fs/cifs/TODO2
-rw-r--r--fs/cifs/cifs_fs_sb.h6
-rw-r--r--fs/cifs/cifsencrypt.c427
-rw-r--r--fs/cifs/cifsfs.c21
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h58
-rw-r--r--fs/cifs/cifspdu.h13
-rw-r--r--fs/cifs/cifsproto.h15
-rw-r--r--fs/cifs/cifssmb.c4
-rw-r--r--fs/cifs/connect.c246
-rw-r--r--fs/cifs/file.c129
-rw-r--r--fs/cifs/inode.c16
-rw-r--r--fs/cifs/ioctl.c16
-rw-r--r--fs/cifs/misc.c27
-rw-r--r--fs/cifs/sess.c166
-rw-r--r--fs/cifs/transport.c6
-rw-r--r--fs/coda/inode.c8
-rw-r--r--fs/compat.c44
-rw-r--r--fs/compat_ioctl.c29
-rw-r--r--fs/configfs/mount.c8
-rw-r--r--fs/cramfs/inode.c9
-rw-r--r--fs/debugfs/inode.c8
-rw-r--r--fs/devpts/inode.c32
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h1
-rw-r--r--fs/ecryptfs/inode.c11
-rw-r--r--fs/ecryptfs/keystore.c45
-rw-r--r--fs/ecryptfs/main.c20
-rw-r--r--fs/ecryptfs/super.c2
-rw-r--r--fs/efs/super.c8
-rw-r--r--fs/eventpoll.c35
-rw-r--r--fs/exec.c168
-rw-r--r--fs/exofs/super.c10
-rw-r--r--fs/ext2/balloc.c3
-rw-r--r--fs/ext2/super.c8
-rw-r--r--fs/ext3/balloc.c17
-rw-r--r--fs/ext3/ialloc.c11
-rw-r--r--fs/ext3/inode.c20
-rw-r--r--fs/ext3/resize.c13
-rw-r--r--fs/ext3/super.c49
-rw-r--r--fs/ext4/Makefile2
-rw-r--r--fs/ext4/balloc.c5
-rw-r--r--fs/ext4/block_validity.c7
-rw-r--r--fs/ext4/dir.c2
-rw-r--r--fs/ext4/ext4.h112
-rw-r--r--fs/ext4/ext4_extents.h65
-rw-r--r--fs/ext4/extents.c368
-rw-r--r--fs/ext4/file.c44
-rw-r--r--fs/ext4/fsync.c83
-rw-r--r--fs/ext4/ialloc.c135
-rw-r--r--fs/ext4/inode.c590
-rw-r--r--fs/ext4/mballoc.c552
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/move_extent.c22
-rw-r--r--fs/ext4/namei.c63
-rw-r--r--fs/ext4/page-io.c431
-rw-r--r--fs/ext4/resize.c52
-rw-r--r--fs/ext4/super.c619
-rw-r--r--fs/ext4/xattr.c4
-rw-r--r--fs/ext4/xattr.h10
-rw-r--r--fs/fat/namei_msdos.c9
-rw-r--r--fs/fat/namei_vfat.c9
-rw-r--r--fs/fcntl.c62
-rw-r--r--fs/freevxfs/vxfs_super.c9
-rw-r--r--fs/fs-writeback.c68
-rw-r--r--fs/fuse/control.c10
-rw-r--r--fs/fuse/dev.c7
-rw-r--r--fs/fuse/inode.c17
-rw-r--r--fs/gfs2/ops_fstype.c51
-rw-r--r--fs/hfs/super.c9
-rw-r--r--fs/hfsplus/dir.c4
-rw-r--r--fs/hfsplus/ioctl.c2
-rw-r--r--fs/hfsplus/super.c10
-rw-r--r--fs/hostfs/hostfs_kern.c8
-rw-r--r--fs/hpfs/buffer.c4
-rw-r--r--fs/hpfs/hpfs_fn.h2
-rw-r--r--fs/hpfs/super.c11
-rw-r--r--fs/hppfs/hppfs.c8
-rw-r--r--fs/hugetlbfs/inode.c11
-rw-r--r--fs/internal.h2
-rw-r--r--fs/ioctl.c39
-rw-r--r--fs/ioprio.c18
-rw-r--r--fs/isofs/inode.c49
-rw-r--r--fs/jbd/checkpoint.c4
-rw-r--r--fs/jbd/commit.c8
-rw-r--r--fs/jbd/journal.c44
-rw-r--r--fs/jbd/recovery.c2
-rw-r--r--fs/jbd/transaction.c6
-rw-r--r--fs/jbd2/checkpoint.c10
-rw-r--r--fs/jbd2/commit.c12
-rw-r--r--fs/jbd2/journal.c14
-rw-r--r--fs/jbd2/transaction.c1
-rw-r--r--fs/jffs2/build.c2
-rw-r--r--fs/jffs2/compr.c6
-rw-r--r--fs/jffs2/compr.h4
-rw-r--r--fs/jffs2/compr_lzo.c4
-rw-r--r--fs/jffs2/compr_rtime.c6
-rw-r--r--fs/jffs2/compr_rubin.c11
-rw-r--r--fs/jffs2/compr_zlib.c6
-rw-r--r--fs/jffs2/dir.c3
-rw-r--r--fs/jffs2/erase.c2
-rw-r--r--fs/jffs2/fs.c22
-rw-r--r--fs/jffs2/gc.c7
-rw-r--r--fs/jffs2/jffs2_fs_sb.h1
-rw-r--r--fs/jffs2/nodelist.c8
-rw-r--r--fs/jffs2/nodelist.h3
-rw-r--r--fs/jffs2/scan.c12
-rw-r--r--fs/jffs2/super.c9
-rw-r--r--fs/jfs/super.c9
-rw-r--r--fs/libfs.c14
-rw-r--r--fs/lockd/svc.c11
-rw-r--r--fs/lockd/svclock.c6
-rw-r--r--fs/lockd/svcsubs.c9
-rw-r--r--fs/locks.c129
-rw-r--r--fs/logfs/dev_bdev.c15
-rw-r--r--fs/logfs/dev_mtd.c18
-rw-r--r--fs/logfs/logfs.h22
-rw-r--r--fs/logfs/super.c77
-rw-r--r--fs/minix/inode.c9
-rw-r--r--fs/namei.c2
-rw-r--r--fs/ncpfs/inode.c8
-rw-r--r--fs/nfs/Kconfig1
-rw-r--r--fs/nfs/direct.c2
-rw-r--r--fs/nfs/file.c1
-rw-r--r--fs/nfs/idmap.c2
-rw-r--r--fs/nfs/nfs4proc.c4
-rw-r--r--fs/nfs/pagelist.c8
-rw-r--r--fs/nfs/super.c96
-rw-r--r--fs/nfs/unlink.c4
-rw-r--r--fs/nfsd/Kconfig1
-rw-r--r--fs/nfsd/nfs4state.c62
-rw-r--r--fs/nfsd/nfsctl.c8
-rw-r--r--fs/nilfs2/super.c16
-rw-r--r--fs/notify/Kconfig2
-rw-r--r--fs/notify/fanotify/fanotify.c27
-rw-r--r--fs/notify/fanotify/fanotify_user.c98
-rw-r--r--fs/notify/fsnotify.c35
-rw-r--r--fs/notify/inode_mark.c9
-rw-r--r--fs/notify/inotify/inotify_user.c2
-rw-r--r--fs/notify/vfsmount_mark.c6
-rw-r--r--fs/ntfs/super.c9
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c8
-rw-r--r--fs/ocfs2/super.c11
-rw-r--r--fs/omfs/inode.c9
-rw-r--r--fs/open.c6
-rw-r--r--fs/openpromfs/inode.c8
-rw-r--r--fs/pipe.c9
-rw-r--r--fs/proc/base.c8
-rw-r--r--fs/proc/root.c16
-rw-r--r--fs/proc/softirqs.c4
-rw-r--r--fs/proc/stat.c14
-rw-r--r--fs/proc/task_mmu.c6
-rw-r--r--fs/qnx4/inode.c9
-rw-r--r--fs/quota/Kconfig4
-rw-r--r--fs/quota/dquot.c30
-rw-r--r--fs/ramfs/inode.c17
-rw-r--r--fs/read_write.c62
-rw-r--r--fs/reiserfs/super.c9
-rw-r--r--fs/romfs/super.c17
-rw-r--r--fs/select.c6
-rw-r--r--fs/smbfs/Kconfig56
-rw-r--r--fs/smbfs/Makefile18
-rw-r--r--fs/smbfs/cache.c208
-rw-r--r--fs/smbfs/dir.c696
-rw-r--r--fs/smbfs/file.c454
-rw-r--r--fs/smbfs/getopt.c64
-rw-r--r--fs/smbfs/getopt.h14
-rw-r--r--fs/smbfs/inode.c843
-rw-r--r--fs/smbfs/ioctl.c69
-rw-r--r--fs/smbfs/proc.c3503
-rw-r--r--fs/smbfs/proto.h87
-rw-r--r--fs/smbfs/request.c818
-rw-r--r--fs/smbfs/request.h70
-rw-r--r--fs/smbfs/smb_debug.h34
-rw-r--r--fs/smbfs/smbiod.c344
-rw-r--r--fs/smbfs/sock.c386
-rw-r--r--fs/smbfs/symlink.c68
-rw-r--r--fs/squashfs/super.c10
-rw-r--r--fs/squashfs/xattr.c9
-rw-r--r--fs/squashfs/xattr.h4
-rw-r--r--fs/squashfs/xattr_id.c1
-rw-r--r--fs/super.c111
-rw-r--r--fs/sysfs/mount.c32
-rw-r--r--fs/sysv/super.c17
-rw-r--r--fs/ubifs/super.c13
-rw-r--r--fs/udf/super.c9
-rw-r--r--fs/ufs/super.c8
-rw-r--r--fs/xfs/Kconfig1
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c1
-rw-r--r--fs/xfs/xfs_filestream.c8
-rw-r--r--fs/xfs/xfs_mount.c1
-rw-r--r--fs/xfs/xfs_quota.h20
260 files changed, 7793 insertions, 12558 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig
index 795233702a4e..7e0511476797 100644
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -17,3 +17,16 @@ config 9P_FSCACHE
Choose Y here to enable persistent, read-only local
caching support for 9p clients using FS-Cache
+
+config 9P_FS_POSIX_ACL
+ bool "9P POSIX Access Control Lists"
+ depends on 9P_FS
+ select FS_POSIX_ACL
+ help
+ POSIX Access Control Lists (ACLs) support permissions for users and
+ groups beyond the owner/group/world scheme.
+
+ To learn more about Access Control Lists, visit the POSIX ACLs for
+ Linux website <http://acl.bestbits.at/>.
+
+ If you don't know what Access Control Lists are, say N
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index 91fba025fcbe..f8ba37effd1b 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -13,3 +13,4 @@ obj-$(CONFIG_9P_FS) := 9p.o
xattr_user.o
9p-$(CONFIG_9P_FSCACHE) += cache.o
+9p-$(CONFIG_9P_FS_POSIX_ACL) += acl.o
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
new file mode 100644
index 000000000000..12d602351dbe
--- /dev/null
+++ b/fs/9p/acl.c
@@ -0,0 +1,392 @@
+/*
+ * Copyright IBM Corporation, 2010
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/posix_acl_xattr.h>
+#include "xattr.h"
+#include "acl.h"
+#include "v9fs_vfs.h"
+#include "v9fs.h"
+
+static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name)
+{
+ ssize_t size;
+ void *value = NULL;
+ struct posix_acl *acl = NULL;;
+
+ size = v9fs_fid_xattr_get(fid, name, NULL, 0);
+ if (size > 0) {
+ value = kzalloc(size, GFP_NOFS);
+ if (!value)
+ return ERR_PTR(-ENOMEM);
+ size = v9fs_fid_xattr_get(fid, name, value, size);
+ if (size > 0) {
+ acl = posix_acl_from_xattr(value, size);
+ if (IS_ERR(acl))
+ goto err_out;
+ }
+ } else if (size == -ENODATA || size == 0 ||
+ size == -ENOSYS || size == -EOPNOTSUPP) {
+ acl = NULL;
+ } else
+ acl = ERR_PTR(-EIO);
+
+err_out:
+ kfree(value);
+ return acl;
+}
+
+int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
+{
+ int retval = 0;
+ struct posix_acl *pacl, *dacl;
+ struct v9fs_session_info *v9ses;
+
+ v9ses = v9fs_inode2v9ses(inode);
+ if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) {
+ set_cached_acl(inode, ACL_TYPE_DEFAULT, NULL);
+ set_cached_acl(inode, ACL_TYPE_ACCESS, NULL);
+ return 0;
+ }
+ /* get the default/access acl values and cache them */
+ dacl = __v9fs_get_acl(fid, POSIX_ACL_XATTR_DEFAULT);
+ pacl = __v9fs_get_acl(fid, POSIX_ACL_XATTR_ACCESS);
+
+ if (!IS_ERR(dacl) && !IS_ERR(pacl)) {
+ set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl);
+ set_cached_acl(inode, ACL_TYPE_ACCESS, pacl);
+ posix_acl_release(dacl);
+ posix_acl_release(pacl);
+ } else
+ retval = -EIO;
+
+ return retval;
+}
+
+static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type)
+{
+ struct posix_acl *acl;
+ /*
+ * 9p Always cache the acl value when
+ * instantiating the inode (v9fs_inode_from_fid)
+ */
+ acl = get_cached_acl(inode, type);
+ BUG_ON(acl == ACL_NOT_CACHED);
+ return acl;
+}
+
+int v9fs_check_acl(struct inode *inode, int mask)
+{
+ struct posix_acl *acl;
+ struct v9fs_session_info *v9ses;
+
+ v9ses = v9fs_inode2v9ses(inode);
+ if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) {
+ /*
+ * On access = client mode get the acl
+ * values from the server
+ */
+ return 0;
+ }
+ acl = v9fs_get_cached_acl(inode, ACL_TYPE_ACCESS);
+
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl) {
+ int error = posix_acl_permission(inode, acl, mask);
+ posix_acl_release(acl);
+ return error;
+ }
+ return -EAGAIN;
+}
+
+static int v9fs_set_acl(struct dentry *dentry, int type, struct posix_acl *acl)
+{
+ int retval;
+ char *name;
+ size_t size;
+ void *buffer;
+ struct inode *inode = dentry->d_inode;
+
+ set_cached_acl(inode, type, acl);
+ /* Set a setxattr request to server */
+ size = posix_acl_xattr_size(acl->a_count);
+ buffer = kmalloc(size, GFP_KERNEL);
+ if (!buffer)
+ return -ENOMEM;
+ retval = posix_acl_to_xattr(acl, buffer, size);
+ if (retval < 0)
+ goto err_free_out;
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ name = POSIX_ACL_XATTR_ACCESS;
+ break;
+ case ACL_TYPE_DEFAULT:
+ name = POSIX_ACL_XATTR_DEFAULT;
+ break;
+ default:
+ BUG();
+ }
+ retval = v9fs_xattr_set(dentry, name, buffer, size, 0);
+err_free_out:
+ kfree(buffer);
+ return retval;
+}
+
+int v9fs_acl_chmod(struct dentry *dentry)
+{
+ int retval = 0;
+ struct posix_acl *acl, *clone;
+ struct inode *inode = dentry->d_inode;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+ acl = v9fs_get_cached_acl(inode, ACL_TYPE_ACCESS);
+ if (acl) {
+ clone = posix_acl_clone(acl, GFP_KERNEL);
+ posix_acl_release(acl);
+ if (!clone)
+ return -ENOMEM;
+ retval = posix_acl_chmod_masq(clone, inode->i_mode);
+ if (!retval)
+ retval = v9fs_set_acl(dentry, ACL_TYPE_ACCESS, clone);
+ posix_acl_release(clone);
+ }
+ return retval;
+}
+
+int v9fs_set_create_acl(struct dentry *dentry,
+ struct posix_acl *dpacl, struct posix_acl *pacl)
+{
+ if (dpacl)
+ v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl);
+ if (pacl)
+ v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl);
+ posix_acl_release(dpacl);
+ posix_acl_release(pacl);
+ return 0;
+}
+
+int v9fs_acl_mode(struct inode *dir, mode_t *modep,
+ struct posix_acl **dpacl, struct posix_acl **pacl)
+{
+ int retval = 0;
+ mode_t mode = *modep;
+ struct posix_acl *acl = NULL;
+
+ if (!S_ISLNK(mode)) {
+ acl = v9fs_get_cached_acl(dir, ACL_TYPE_DEFAULT);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (!acl)
+ mode &= ~current_umask();
+ }
+ if (acl) {
+ struct posix_acl *clone;
+
+ if (S_ISDIR(mode))
+ *dpacl = acl;
+ clone = posix_acl_clone(acl, GFP_NOFS);
+ retval = -ENOMEM;
+ if (!clone)
+ goto cleanup;
+
+ retval = posix_acl_create_masq(clone, &mode);
+ if (retval < 0) {
+ posix_acl_release(clone);
+ goto cleanup;
+ }
+ if (retval > 0)
+ *pacl = clone;
+ }
+ *modep = mode;
+ return 0;
+cleanup:
+ posix_acl_release(acl);
+ return retval;
+
+}
+
+static int v9fs_remote_get_acl(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
+{
+ char *full_name;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ full_name = POSIX_ACL_XATTR_ACCESS;
+ break;
+ case ACL_TYPE_DEFAULT:
+ full_name = POSIX_ACL_XATTR_DEFAULT;
+ break;
+ default:
+ BUG();
+ }
+ return v9fs_xattr_get(dentry, full_name, buffer, size);
+}
+
+static int v9fs_xattr_get_acl(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
+{
+ struct v9fs_session_info *v9ses;
+ struct posix_acl *acl;
+ int error;
+
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+
+ v9ses = v9fs_inode2v9ses(dentry->d_inode);
+ /*
+ * We allow set/get/list of acl when access=client is not specified
+ */
+ if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT)
+ return v9fs_remote_get_acl(dentry, name, buffer, size, type);
+
+ acl = v9fs_get_cached_acl(dentry->d_inode, type);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl == NULL)
+ return -ENODATA;
+ error = posix_acl_to_xattr(acl, buffer, size);
+ posix_acl_release(acl);
+
+ return error;
+}
+
+static int v9fs_remote_set_acl(struct dentry *dentry, const char *name,
+ const void *value, size_t size,
+ int flags, int type)
+{
+ char *full_name;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ full_name = POSIX_ACL_XATTR_ACCESS;
+ break;
+ case ACL_TYPE_DEFAULT:
+ full_name = POSIX_ACL_XATTR_DEFAULT;
+ break;
+ default:
+ BUG();
+ }
+ return v9fs_xattr_set(dentry, full_name, value, size, flags);
+}
+
+
+static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name,
+ const void *value, size_t size,
+ int flags, int type)
+{
+ int retval;
+ struct posix_acl *acl;
+ struct v9fs_session_info *v9ses;
+ struct inode *inode = dentry->d_inode;
+
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+
+ v9ses = v9fs_inode2v9ses(dentry->d_inode);
+ /*
+ * set the attribute on the remote. Without even looking at the
+ * xattr value. We leave it to the server to validate
+ */
+ if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT)
+ return v9fs_remote_set_acl(dentry, name,
+ value, size, flags, type);
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+ if (!is_owner_or_cap(inode))
+ return -EPERM;
+ if (value) {
+ /* update the cached acl value */
+ acl = posix_acl_from_xattr(value, size);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ else if (acl) {
+ retval = posix_acl_valid(acl);
+ if (retval)
+ goto err_out;
+ }
+ } else
+ acl = NULL;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ name = POSIX_ACL_XATTR_ACCESS;
+ if (acl) {
+ mode_t mode = inode->i_mode;
+ retval = posix_acl_equiv_mode(acl, &mode);
+ if (retval < 0)
+ goto err_out;
+ else {
+ struct iattr iattr;
+ if (retval == 0) {
+ /*
+ * ACL can be represented
+ * by the mode bits. So don't
+ * update ACL.
+ */
+ acl = NULL;
+ value = NULL;
+ size = 0;
+ }
+ /* Updte the mode bits */
+ iattr.ia_mode = ((mode & S_IALLUGO) |
+ (inode->i_mode & ~S_IALLUGO));
+ iattr.ia_valid = ATTR_MODE;
+ /* FIXME should we update ctime ?
+ * What is the following setxattr update the
+ * mode ?
+ */
+ v9fs_vfs_setattr_dotl(dentry, &iattr);
+ }
+ }
+ break;
+ case ACL_TYPE_DEFAULT:
+ name = POSIX_ACL_XATTR_DEFAULT;
+ if (!S_ISDIR(inode->i_mode)) {
+ retval = -EINVAL;
+ goto err_out;
+ }
+ break;
+ default:
+ BUG();
+ }
+ retval = v9fs_xattr_set(dentry, name, value, size, flags);
+ if (!retval)
+ set_cached_acl(inode, type, acl);
+err_out:
+ posix_acl_release(acl);
+ return retval;
+}
+
+const struct xattr_handler v9fs_xattr_acl_access_handler = {
+ .prefix = POSIX_ACL_XATTR_ACCESS,
+ .flags = ACL_TYPE_ACCESS,
+ .get = v9fs_xattr_get_acl,
+ .set = v9fs_xattr_set_acl,
+};
+
+const struct xattr_handler v9fs_xattr_acl_default_handler = {
+ .prefix = POSIX_ACL_XATTR_DEFAULT,
+ .flags = ACL_TYPE_DEFAULT,
+ .get = v9fs_xattr_get_acl,
+ .set = v9fs_xattr_set_acl,
+};
diff --git a/fs/9p/acl.h b/fs/9p/acl.h
new file mode 100644
index 000000000000..59e18c2e8c7e
--- /dev/null
+++ b/fs/9p/acl.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright IBM Corporation, 2010
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+#ifndef FS_9P_ACL_H
+#define FS_9P_ACL_H
+
+#ifdef CONFIG_9P_FS_POSIX_ACL
+extern int v9fs_get_acl(struct inode *, struct p9_fid *);
+extern int v9fs_check_acl(struct inode *inode, int mask);
+extern int v9fs_acl_chmod(struct dentry *);
+extern int v9fs_set_create_acl(struct dentry *,
+ struct posix_acl *, struct posix_acl *);
+extern int v9fs_acl_mode(struct inode *dir, mode_t *modep,
+ struct posix_acl **dpacl, struct posix_acl **pacl);
+#else
+#define v9fs_check_acl NULL
+static inline int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
+{
+ return 0;
+}
+static inline int v9fs_acl_chmod(struct dentry *dentry)
+{
+ return 0;
+}
+static inline int v9fs_set_create_acl(struct dentry *dentry,
+ struct posix_acl *dpacl,
+ struct posix_acl *pacl)
+{
+ return 0;
+}
+static inline int v9fs_acl_mode(struct inode *dir, mode_t *modep,
+ struct posix_acl **dpacl,
+ struct posix_acl **pacl)
+{
+ return 0;
+}
+
+#endif
+#endif /* FS_9P_XATTR_H */
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 6406f896bf95..b00223c99d70 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -149,6 +149,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
switch (access) {
case V9FS_ACCESS_SINGLE:
case V9FS_ACCESS_USER:
+ case V9FS_ACCESS_CLIENT:
uid = current_fsuid();
any = 0;
break;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 38dc0e067599..2f77cd33ba83 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -193,7 +193,17 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
v9ses->flags |= V9FS_ACCESS_USER;
else if (strcmp(s, "any") == 0)
v9ses->flags |= V9FS_ACCESS_ANY;
- else {
+ else if (strcmp(s, "client") == 0) {
+#ifdef CONFIG_9P_FS_POSIX_ACL
+ v9ses->flags |= V9FS_ACCESS_CLIENT;
+#else
+ P9_DPRINTK(P9_DEBUG_ERROR,
+ "access=client option not supported\n");
+ kfree(s);
+ ret = -EINVAL;
+ goto free_and_return;
+#endif
+ } else {
v9ses->flags |= V9FS_ACCESS_SINGLE;
v9ses->uid = simple_strtoul(s, &e, 10);
if (*e != '\0')
@@ -278,6 +288,16 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ;
+ if (!v9fs_proto_dotl(v9ses) &&
+ ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)) {
+ /*
+ * We support ACCESS_CLIENT only for dotl.
+ * Fall back to ACCESS_USER
+ */
+ v9ses->flags &= ~V9FS_ACCESS_MASK;
+ v9ses->flags |= V9FS_ACCESS_USER;
+ }
+ /*FIXME !! */
/* for legacy mode, fall back to V9FS_ACCESS_ANY */
if (!(v9fs_proto_dotu(v9ses) || v9fs_proto_dotl(v9ses)) &&
((v9ses->flags&V9FS_ACCESS_MASK) == V9FS_ACCESS_USER)) {
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 4c963c9fc41f..cb6396855e2d 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -33,13 +33,17 @@
*
* Session flags reflect options selected by users at mount time
*/
+#define V9FS_ACCESS_ANY (V9FS_ACCESS_SINGLE | \
+ V9FS_ACCESS_USER | \
+ V9FS_ACCESS_CLIENT)
+#define V9FS_ACCESS_MASK V9FS_ACCESS_ANY
+
enum p9_session_flags {
V9FS_PROTO_2000U = 0x01,
V9FS_PROTO_2000L = 0x02,
V9FS_ACCESS_SINGLE = 0x04,
V9FS_ACCESS_USER = 0x08,
- V9FS_ACCESS_ANY = 0x0C,
- V9FS_ACCESS_MASK = 0x0C,
+ V9FS_ACCESS_CLIENT = 0x10
};
/* possible values of ->cache */
@@ -113,8 +117,6 @@ void v9fs_session_close(struct v9fs_session_info *v9ses);
void v9fs_session_cancel(struct v9fs_session_info *v9ses);
void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses);
-#define V9FS_MAGIC 0x01021997
-
/* other default globals */
#define V9FS_PORT 564
#define V9FS_DEFUSER "nobody"
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 88418c419ea7..bab0eac873f4 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -64,3 +64,7 @@ int v9fs_uflags2omode(int uflags, int extended);
ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
void v9fs_blank_wstat(struct p9_wstat *wstat);
+int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
+int v9fs_file_fsync_dotl(struct file *filp, int datasync);
+
+#define P9_LOCK_TIMEOUT (30*HZ)
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 90e38449f4b3..b7f2a8e3863e 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -154,10 +154,40 @@ static int v9fs_launder_page(struct page *page)
return 0;
}
+/**
+ * v9fs_direct_IO - 9P address space operation for direct I/O
+ * @rw: direction (read or write)
+ * @iocb: target I/O control block
+ * @iov: array of vectors that define I/O buffer
+ * @pos: offset in file to begin the operation
+ * @nr_segs: size of iovec array
+ *
+ * The presence of v9fs_direct_IO() in the address space ops vector
+ * allowes open() O_DIRECT flags which would have failed otherwise.
+ *
+ * In the non-cached mode, we shunt off direct read and write requests before
+ * the VFS gets them, so this method should never be called.
+ *
+ * Direct IO is not 'yet' supported in the cached mode. Hence when
+ * this routine is called through generic_file_aio_read(), the read/write fails
+ * with an error.
+ *
+ */
+ssize_t v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
+ loff_t pos, unsigned long nr_segs)
+{
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) "
+ "off/no(%lld/%lu) EINVAL\n",
+ iocb->ki_filp->f_path.dentry->d_name.name,
+ (long long) pos, nr_segs);
+
+ return -EINVAL;
+}
const struct address_space_operations v9fs_addr_operations = {
.readpage = v9fs_vfs_readpage,
.readpages = v9fs_vfs_readpages,
.releasepage = v9fs_release_page,
.invalidatepage = v9fs_invalidate_page,
.launder_page = v9fs_launder_page,
+ .direct_IO = v9fs_direct_IO,
};
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 899f168fd19c..b84ebe8cefed 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -242,7 +242,8 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
while (rdir->head < rdir->tail) {
err = p9dirent_read(rdir->buf + rdir->head,
- buflen - rdir->head, &curdirent,
+ rdir->tail - rdir->head,
+ &curdirent,
fid->clnt->proto_version);
if (err < 0) {
P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
@@ -314,4 +315,5 @@ const struct file_operations v9fs_dir_operations_dotl = {
.readdir = v9fs_dir_readdir_dotl,
.open = v9fs_file_open,
.release = v9fs_dir_release,
+ .fsync = v9fs_file_fsync_dotl,
};
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index e97c92bd6f16..240c30674396 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -33,6 +33,7 @@
#include <linux/inet.h>
#include <linux/list.h>
#include <linux/pagemap.h>
+#include <linux/utsname.h>
#include <asm/uaccess.h>
#include <linux/idr.h>
#include <net/9p/9p.h>
@@ -44,6 +45,7 @@
#include "cache.h"
static const struct file_operations v9fs_cached_file_operations;
+static const struct file_operations v9fs_cached_file_operations_dotl;
/**
* v9fs_file_open - open a file (or directory)
@@ -92,6 +94,8 @@ int v9fs_file_open(struct inode *inode, struct file *file)
/* enable cached file options */
if(file->f_op == &v9fs_file_operations)
file->f_op = &v9fs_cached_file_operations;
+ else if (file->f_op == &v9fs_file_operations_dotl)
+ file->f_op = &v9fs_cached_file_operations_dotl;
#ifdef CONFIG_9P_FSCACHE
v9fs_cache_inode_set_cookie(inode, file);
@@ -130,6 +134,206 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
return res;
}
+static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
+{
+ struct p9_flock flock;
+ struct p9_fid *fid;
+ uint8_t status;
+ int res = 0;
+ unsigned char fl_type;
+
+ fid = filp->private_data;
+ BUG_ON(fid == NULL);
+
+ if ((fl->fl_flags & FL_POSIX) != FL_POSIX)
+ BUG();
+
+ res = posix_lock_file_wait(filp, fl);
+ if (res < 0)
+ goto out;
+
+ /* convert posix lock to p9 tlock args */
+ memset(&flock, 0, sizeof(flock));
+ flock.type = fl->fl_type;
+ flock.start = fl->fl_start;
+ if (fl->fl_end == OFFSET_MAX)
+ flock.length = 0;
+ else
+ flock.length = fl->fl_end - fl->fl_start + 1;
+ flock.proc_id = fl->fl_pid;
+ flock.client_id = utsname()->nodename;
+ if (IS_SETLKW(cmd))
+ flock.flags = P9_LOCK_FLAGS_BLOCK;
+
+ /*
+ * if its a blocked request and we get P9_LOCK_BLOCKED as the status
+ * for lock request, keep on trying
+ */
+ for (;;) {
+ res = p9_client_lock_dotl(fid, &flock, &status);
+ if (res < 0)
+ break;
+
+ if (status != P9_LOCK_BLOCKED)
+ break;
+ if (status == P9_LOCK_BLOCKED && !IS_SETLKW(cmd))
+ break;
+ schedule_timeout_interruptible(P9_LOCK_TIMEOUT);
+ }
+
+ /* map 9p status to VFS status */
+ switch (status) {
+ case P9_LOCK_SUCCESS:
+ res = 0;
+ break;
+ case P9_LOCK_BLOCKED:
+ res = -EAGAIN;
+ break;
+ case P9_LOCK_ERROR:
+ case P9_LOCK_GRACE:
+ res = -ENOLCK;
+ break;
+ default:
+ BUG();
+ }
+
+ /*
+ * incase server returned error for lock request, revert
+ * it locally
+ */
+ if (res < 0 && fl->fl_type != F_UNLCK) {
+ fl_type = fl->fl_type;
+ fl->fl_type = F_UNLCK;
+ res = posix_lock_file_wait(filp, fl);
+ fl->fl_type = fl_type;
+ }
+out:
+ return res;
+}
+
+static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
+{
+ struct p9_getlock glock;
+ struct p9_fid *fid;
+ int res = 0;
+
+ fid = filp->private_data;
+ BUG_ON(fid == NULL);
+
+ posix_test_lock(filp, fl);
+ /*
+ * if we have a conflicting lock locally, no need to validate
+ * with server
+ */
+ if (fl->fl_type != F_UNLCK)
+ return res;
+
+ /* convert posix lock to p9 tgetlock args */
+ memset(&glock, 0, sizeof(glock));
+ glock.type = fl->fl_type;
+ glock.start = fl->fl_start;
+ if (fl->fl_end == OFFSET_MAX)
+ glock.length = 0;
+ else
+ glock.length = fl->fl_end - fl->fl_start + 1;
+ glock.proc_id = fl->fl_pid;
+ glock.client_id = utsname()->nodename;
+
+ res = p9_client_getlock_dotl(fid, &glock);
+ if (res < 0)
+ return res;
+ if (glock.type != F_UNLCK) {
+ fl->fl_type = glock.type;
+ fl->fl_start = glock.start;
+ if (glock.length == 0)
+ fl->fl_end = OFFSET_MAX;
+ else
+ fl->fl_end = glock.start + glock.length - 1;
+ fl->fl_pid = glock.proc_id;
+ } else
+ fl->fl_type = F_UNLCK;
+
+ return res;
+}
+
+/**
+ * v9fs_file_lock_dotl - lock a file (or directory)
+ * @filp: file to be locked
+ * @cmd: lock command
+ * @fl: file lock structure
+ *
+ */
+
+static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl)
+{
+ struct inode *inode = filp->f_path.dentry->d_inode;
+ int ret = -ENOLCK;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", filp,
+ cmd, fl, filp->f_path.dentry->d_name.name);
+
+ /* No mandatory locks */
+ if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
+ goto out_err;
+
+ if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
+ filemap_write_and_wait(inode->i_mapping);
+ invalidate_mapping_pages(&inode->i_data, 0, -1);
+ }
+
+ if (IS_SETLK(cmd) || IS_SETLKW(cmd))
+ ret = v9fs_file_do_lock(filp, cmd, fl);
+ else if (IS_GETLK(cmd))
+ ret = v9fs_file_getlock(filp, fl);
+ else
+ ret = -EINVAL;
+out_err:
+ return ret;
+}
+
+/**
+ * v9fs_file_flock_dotl - lock a file
+ * @filp: file to be locked
+ * @cmd: lock command
+ * @fl: file lock structure
+ *
+ */
+
+static int v9fs_file_flock_dotl(struct file *filp, int cmd,
+ struct file_lock *fl)
+{
+ struct inode *inode = filp->f_path.dentry->d_inode;
+ int ret = -ENOLCK;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", filp,
+ cmd, fl, filp->f_path.dentry->d_name.name);
+
+ /* No mandatory locks */
+ if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
+ goto out_err;
+
+ if (!(fl->fl_flags & FL_FLOCK))
+ goto out_err;
+
+ if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
+ filemap_write_and_wait(inode->i_mapping);
+ invalidate_mapping_pages(&inode->i_data, 0, -1);
+ }
+ /* Convert flock to posix lock */
+ fl->fl_owner = (fl_owner_t)filp;
+ fl->fl_start = 0;
+ fl->fl_end = OFFSET_MAX;
+ fl->fl_flags |= FL_POSIX;
+ fl->fl_flags ^= FL_FLOCK;
+
+ if (IS_SETLK(cmd) | IS_SETLKW(cmd))
+ ret = v9fs_file_do_lock(filp, cmd, fl);
+ else
+ ret = -EINVAL;
+out_err:
+ return ret;
+}
+
/**
* v9fs_file_readn - read from a file
* @filp: file pointer to read
@@ -219,7 +423,9 @@ static ssize_t
v9fs_file_write(struct file *filp, const char __user * data,
size_t count, loff_t * offset)
{
- int n, rsize, total = 0;
+ ssize_t retval;
+ size_t total = 0;
+ int n;
struct p9_fid *fid;
struct p9_client *clnt;
struct inode *inode = filp->f_path.dentry->d_inode;
@@ -232,14 +438,19 @@ v9fs_file_write(struct file *filp, const char __user * data,
fid = filp->private_data;
clnt = fid->clnt;
- rsize = fid->iounit ? fid->iounit : clnt->msize - P9_IOHDRSZ;
+ retval = generic_write_checks(filp, &origin, &count, 0);
+ if (retval)
+ goto out;
- do {
- if (count < rsize)
- rsize = count;
+ retval = -EINVAL;
+ if ((ssize_t) count < 0)
+ goto out;
+ retval = 0;
+ if (!count)
+ goto out;
- n = p9_client_write(fid, NULL, data+total, origin+total,
- rsize);
+ do {
+ n = p9_client_write(fid, NULL, data+total, origin+total, count);
if (n <= 0)
break;
count -= n;
@@ -258,9 +469,11 @@ v9fs_file_write(struct file *filp, const char __user * data,
}
if (n < 0)
- return n;
-
- return total;
+ retval = n;
+ else
+ retval = total;
+out:
+ return retval;
}
static int v9fs_file_fsync(struct file *filp, int datasync)
@@ -278,6 +491,20 @@ static int v9fs_file_fsync(struct file *filp, int datasync)
return retval;
}
+int v9fs_file_fsync_dotl(struct file *filp, int datasync)
+{
+ struct p9_fid *fid;
+ int retval;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_file_fsync_dotl: filp %p datasync %x\n",
+ filp, datasync);
+
+ fid = filp->private_data;
+
+ retval = p9_client_fsync(fid, datasync);
+ return retval;
+}
+
static const struct file_operations v9fs_cached_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
@@ -290,6 +517,19 @@ static const struct file_operations v9fs_cached_file_operations = {
.fsync = v9fs_file_fsync,
};
+static const struct file_operations v9fs_cached_file_operations_dotl = {
+ .llseek = generic_file_llseek,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = v9fs_file_write,
+ .open = v9fs_file_open,
+ .release = v9fs_dir_release,
+ .lock = v9fs_file_lock_dotl,
+ .flock = v9fs_file_flock_dotl,
+ .mmap = generic_file_readonly_mmap,
+ .fsync = v9fs_file_fsync_dotl,
+};
+
const struct file_operations v9fs_file_operations = {
.llseek = generic_file_llseek,
.read = v9fs_file_read,
@@ -307,7 +547,8 @@ const struct file_operations v9fs_file_operations_dotl = {
.write = v9fs_file_write,
.open = v9fs_file_open,
.release = v9fs_dir_release,
- .lock = v9fs_file_lock,
+ .lock = v9fs_file_lock_dotl,
+ .flock = v9fs_file_flock_dotl,
.mmap = generic_file_readonly_mmap,
- .fsync = v9fs_file_fsync,
+ .fsync = v9fs_file_fsync_dotl,
};
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index ef5905f7c8a3..34bf71b56542 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -36,6 +36,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/xattr.h>
+#include <linux/posix_acl.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
@@ -44,6 +45,7 @@
#include "fid.h"
#include "cache.h"
#include "xattr.h"
+#include "acl.h"
static const struct inode_operations v9fs_dir_inode_operations;
static const struct inode_operations v9fs_dir_inode_operations_dotu;
@@ -53,6 +55,10 @@ static const struct inode_operations v9fs_file_inode_operations_dotl;
static const struct inode_operations v9fs_symlink_inode_operations;
static const struct inode_operations v9fs_symlink_inode_operations_dotl;
+static int
+v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
+ dev_t rdev);
+
/**
* unixmode2p9mode - convert unix mode bits to plan 9
* @v9ses: v9fs session information
@@ -500,6 +506,11 @@ v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
v9fs_vcookie_set_qid(ret, &st->qid);
v9fs_cache_inode_get_cookie(ret);
#endif
+ err = v9fs_get_acl(ret, fid);
+ if (err) {
+ iput(ret);
+ goto error;
+ }
kfree(st);
return ret;
error:
@@ -553,13 +564,6 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
return retval;
}
-static int
-v9fs_open_created(struct inode *inode, struct file *file)
-{
- return 0;
-}
-
-
/**
* v9fs_create - Create a file
* @v9ses: session information
@@ -655,29 +659,37 @@ error:
*/
static int
-v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode,
+v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
struct nameidata *nd)
{
int err = 0;
char *name = NULL;
gid_t gid;
int flags;
+ mode_t mode;
struct v9fs_session_info *v9ses;
struct p9_fid *fid = NULL;
struct p9_fid *dfid, *ofid;
struct file *filp;
struct p9_qid qid;
struct inode *inode;
+ struct posix_acl *pacl = NULL, *dacl = NULL;
v9ses = v9fs_inode2v9ses(dir);
if (nd && nd->flags & LOOKUP_OPEN)
flags = nd->intent.open.flags - 1;
- else
- flags = O_RDWR;
+ else {
+ /*
+ * create call without LOOKUP_OPEN is due
+ * to mknod of regular files. So use mknod
+ * operation.
+ */
+ return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0);
+ }
name = (char *) dentry->d_name.name;
P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x "
- "mode:0x%x\n", name, flags, mode);
+ "mode:0x%x\n", name, flags, omode);
dfid = v9fs_fid_lookup(dentry->d_parent);
if (IS_ERR(dfid)) {
@@ -695,6 +707,15 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode,
}
gid = v9fs_get_fsgid_for_create(dir);
+
+ mode = omode;
+ /* Update mode based on ACL value */
+ err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
+ if (err) {
+ P9_DPRINTK(P9_DEBUG_VFS,
+ "Failed to get acl values in creat %d\n", err);
+ goto error;
+ }
err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid);
if (err < 0) {
P9_DPRINTK(P9_DEBUG_VFS,
@@ -702,46 +723,52 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode,
err);
goto error;
}
+ /* instantiate inode and assign the unopened fid to the dentry */
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE ||
+ (nd && nd->flags & LOOKUP_OPEN)) {
+ fid = p9_client_walk(dfid, 1, &name, 1);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+ err);
+ fid = NULL;
+ goto error;
+ }
- /* No need to populate the inode if we are not opening the file AND
- * not in cached mode.
- */
- if (!v9ses->cache && !(nd && nd->flags & LOOKUP_OPEN)) {
- /* Not in cached mode. No need to populate inode with stat */
- dentry->d_op = &v9fs_dentry_operations;
- p9_client_clunk(ofid);
- d_instantiate(dentry, NULL);
- return 0;
- }
-
- /* Now walk from the parent so we can get an unopened fid. */
- fid = p9_client_walk(dfid, 1, &name, 1);
- if (IS_ERR(fid)) {
- err = PTR_ERR(fid);
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
- fid = NULL;
- goto error;
- }
-
- /* instantiate inode and assign the unopened fid to dentry */
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
- goto error;
- }
- if (v9ses->cache)
+ inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
+ err);
+ goto error;
+ }
dentry->d_op = &v9fs_cached_dentry_operations;
- else
+ d_instantiate(dentry, inode);
+ err = v9fs_fid_add(dentry, fid);
+ if (err < 0)
+ goto error;
+ /* The fid would get clunked via a dput */
+ fid = NULL;
+ } else {
+ /*
+ * Not in cached mode. No need to populate
+ * inode with stat. We need to get an inode
+ * so that we can set the acl with dentry
+ */
+ inode = v9fs_get_inode(dir->i_sb, mode);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto error;
+ }
dentry->d_op = &v9fs_dentry_operations;
- d_instantiate(dentry, inode);
- err = v9fs_fid_add(dentry, fid);
- if (err < 0)
- goto error;
+ d_instantiate(dentry, inode);
+ }
+ /* Now set the ACL based on the default value */
+ v9fs_set_create_acl(dentry, dacl, pacl);
/* if we are opening a file, assign the open fid to the file */
if (nd && nd->flags & LOOKUP_OPEN) {
- filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created);
+ filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
if (IS_ERR(filp)) {
p9_client_clunk(ofid);
return PTR_ERR(filp);
@@ -800,7 +827,7 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
/* if we are opening a file, assign the open fid to the file */
if (nd && nd->flags & LOOKUP_OPEN) {
- filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created);
+ filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
if (IS_ERR(filp)) {
err = PTR_ERR(filp);
goto error;
@@ -859,23 +886,28 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
*
*/
-static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct dentry *dentry,
- int mode)
+static int v9fs_vfs_mkdir_dotl(struct inode *dir,
+ struct dentry *dentry, int omode)
{
int err;
struct v9fs_session_info *v9ses;
struct p9_fid *fid = NULL, *dfid = NULL;
gid_t gid;
char *name;
+ mode_t mode;
struct inode *inode;
struct p9_qid qid;
struct dentry *dir_dentry;
+ struct posix_acl *dacl = NULL, *pacl = NULL;
P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
err = 0;
v9ses = v9fs_inode2v9ses(dir);
- mode |= S_IFDIR;
+ omode |= S_IFDIR;
+ if (dir->i_mode & S_ISGID)
+ omode |= S_ISGID;
+
dir_dentry = v9fs_dentry_from_dir_inode(dir);
dfid = v9fs_fid_lookup(dir_dentry);
if (IS_ERR(dfid)) {
@@ -886,11 +918,14 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct dentry *dentry,
}
gid = v9fs_get_fsgid_for_create(dir);
- if (gid < 0) {
- P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_fsgid_for_create failed\n");
+ mode = omode;
+ /* Update mode based on ACL value */
+ err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
+ if (err) {
+ P9_DPRINTK(P9_DEBUG_VFS,
+ "Failed to get acl values in mkdir %d\n", err);
goto error;
}
-
name = (char *) dentry->d_name.name;
err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid);
if (err < 0)
@@ -920,7 +955,23 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct dentry *dentry,
if (err < 0)
goto error;
fid = NULL;
+ } else {
+ /*
+ * Not in cached mode. No need to populate
+ * inode with stat. We need to get an inode
+ * so that we can set the acl with dentry
+ */
+ inode = v9fs_get_inode(dir->i_sb, mode);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto error;
+ }
+ dentry->d_op = &v9fs_dentry_operations;
+ d_instantiate(dentry, inode);
}
+ /* Now set the ACL based on the default value */
+ v9fs_set_create_acl(dentry, dacl, pacl);
+
error:
if (fid)
p9_client_clunk(fid);
@@ -979,7 +1030,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
result = v9fs_fid_add(dentry, fid);
if (result < 0)
- goto error;
+ goto error_iput;
inst_out:
if (v9ses->cache)
@@ -990,6 +1041,8 @@ inst_out:
d_add(dentry, inode);
return NULL;
+error_iput:
+ iput(inode);
error:
p9_client_clunk(fid);
@@ -1237,7 +1290,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
*
*/
-static int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
+int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
{
int retval;
struct v9fs_session_info *v9ses;
@@ -1279,6 +1332,12 @@ static int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
setattr_copy(dentry->d_inode, iattr);
mark_inode_dirty(dentry->d_inode);
+ if (iattr->ia_valid & ATTR_MODE) {
+ /* We also want to update ACL when we update mode bits */
+ retval = v9fs_acl_chmod(dentry);
+ if (retval < 0)
+ return retval;
+ }
return 0;
}
@@ -1473,7 +1532,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
if (IS_ERR(fid))
return PTR_ERR(fid);
- if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses))
+ if (!v9fs_proto_dotu(v9ses))
return -EBADF;
st = p9_client_stat(fid);
@@ -1616,11 +1675,6 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
gid = v9fs_get_fsgid_for_create(dir);
- if (gid < 0) {
- P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_egid failed %d\n", gid);
- goto error;
- }
-
/* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */
err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid);
@@ -1855,21 +1909,23 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
*
*/
static int
-v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int mode,
+v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
dev_t rdev)
{
int err;
char *name;
+ mode_t mode;
struct v9fs_session_info *v9ses;
struct p9_fid *fid = NULL, *dfid = NULL;
struct inode *inode;
gid_t gid;
struct p9_qid qid;
struct dentry *dir_dentry;
+ struct posix_acl *dacl = NULL, *pacl = NULL;
P9_DPRINTK(P9_DEBUG_VFS,
" %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
- dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
+ dentry->d_name.name, omode, MAJOR(rdev), MINOR(rdev));
if (!new_valid_dev(rdev))
return -EINVAL;
@@ -1885,11 +1941,14 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int mode,
}
gid = v9fs_get_fsgid_for_create(dir);
- if (gid < 0) {
- P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_fsgid_for_create failed\n");
+ mode = omode;
+ /* Update mode based on ACL value */
+ err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
+ if (err) {
+ P9_DPRINTK(P9_DEBUG_VFS,
+ "Failed to get acl values in mknod %d\n", err);
goto error;
}
-
name = (char *) dentry->d_name.name;
err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid);
@@ -1933,13 +1992,68 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int mode,
dentry->d_op = &v9fs_dentry_operations;
d_instantiate(dentry, inode);
}
-
+ /* Now set the ACL based on the default value */
+ v9fs_set_create_acl(dentry, dacl, pacl);
error:
if (fid)
p9_client_clunk(fid);
return err;
}
+static int
+v9fs_vfs_readlink_dotl(struct dentry *dentry, char *buffer, int buflen)
+{
+ int retval;
+ struct p9_fid *fid;
+ char *target = NULL;
+
+ P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name);
+ retval = -EPERM;
+ fid = v9fs_fid_lookup(dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ retval = p9_client_readlink(fid, &target);
+ if (retval < 0)
+ return retval;
+
+ strncpy(buffer, target, buflen);
+ P9_DPRINTK(P9_DEBUG_VFS, "%s -> %s\n", dentry->d_name.name, buffer);
+
+ retval = strnlen(buffer, buflen);
+ return retval;
+}
+
+/**
+ * v9fs_vfs_follow_link_dotl - follow a symlink path
+ * @dentry: dentry for symlink
+ * @nd: nameidata
+ *
+ */
+
+static void *
+v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd)
+{
+ int len = 0;
+ char *link = __getname();
+
+ P9_DPRINTK(P9_DEBUG_VFS, "%s n", dentry->d_name.name);
+
+ if (!link)
+ link = ERR_PTR(-ENOMEM);
+ else {
+ len = v9fs_vfs_readlink_dotl(dentry, link, PATH_MAX);
+ if (len < 0) {
+ __putname(link);
+ link = ERR_PTR(len);
+ } else
+ link[min(len, PATH_MAX-1)] = 0;
+ }
+ nd_set_link(nd, link);
+
+ return NULL;
+}
+
static const struct inode_operations v9fs_dir_inode_operations_dotu = {
.create = v9fs_vfs_create,
.lookup = v9fs_vfs_lookup,
@@ -1970,7 +2084,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotl = {
.getxattr = generic_getxattr,
.removexattr = generic_removexattr,
.listxattr = v9fs_listxattr,
-
+ .check_acl = v9fs_check_acl,
};
static const struct inode_operations v9fs_dir_inode_operations = {
@@ -1997,6 +2111,7 @@ static const struct inode_operations v9fs_file_inode_operations_dotl = {
.getxattr = generic_getxattr,
.removexattr = generic_removexattr,
.listxattr = v9fs_listxattr,
+ .check_acl = v9fs_check_acl,
};
static const struct inode_operations v9fs_symlink_inode_operations = {
@@ -2008,8 +2123,8 @@ static const struct inode_operations v9fs_symlink_inode_operations = {
};
static const struct inode_operations v9fs_symlink_inode_operations_dotl = {
- .readlink = generic_readlink,
- .follow_link = v9fs_vfs_follow_link,
+ .readlink = v9fs_vfs_readlink_dotl,
+ .follow_link = v9fs_vfs_follow_link_dotl,
.put_link = v9fs_vfs_put_link,
.getattr = v9fs_vfs_getattr_dotl,
.setattr = v9fs_vfs_setattr_dotl,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 1d12ba0ed3db..c55c614500ad 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -39,6 +39,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/statfs.h>
+#include <linux/magic.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
@@ -46,6 +47,7 @@
#include "v9fs_vfs.h"
#include "fid.h"
#include "xattr.h"
+#include "acl.h"
static const struct super_operations v9fs_super_ops, v9fs_super_ops_dotl;
@@ -66,7 +68,7 @@ static int v9fs_set_super(struct super_block *s, void *data)
* v9fs_fill_super - populate superblock with info
* @sb: superblock
* @v9ses: session information
- * @flags: flags propagated from v9fs_get_sb()
+ * @flags: flags propagated from v9fs_mount()
*
*/
@@ -88,22 +90,25 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
MS_NOATIME;
+#ifdef CONFIG_9P_FS_POSIX_ACL
+ if ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)
+ sb->s_flags |= MS_POSIXACL;
+#endif
+
save_mount_options(sb, data);
}
/**
- * v9fs_get_sb - mount a superblock
+ * v9fs_mount - mount a superblock
* @fs_type: file system type
* @flags: mount flags
* @dev_name: device name that was mounted
* @data: mount options
- * @mnt: mountpoint record to be instantiated
*
*/
-static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data,
- struct vfsmount *mnt)
+static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
{
struct super_block *sb = NULL;
struct inode *inode = NULL;
@@ -117,7 +122,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
if (!v9ses)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
fid = v9fs_session_init(v9ses, dev_name, data);
if (IS_ERR(fid)) {
@@ -149,7 +154,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
goto release_sb;
}
sb->s_root = root;
-
if (v9fs_proto_dotl(v9ses)) {
struct p9_stat_dotl *st = NULL;
st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
@@ -174,19 +178,21 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
p9stat_free(st);
kfree(st);
}
-
+ retval = v9fs_get_acl(inode, fid);
+ if (retval)
+ goto release_sb;
v9fs_fid_add(root, fid);
P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
- simple_set_mnt(mnt, sb);
- return 0;
+ return dget(sb->s_root);
clunk_fid:
p9_client_clunk(fid);
close_session:
v9fs_session_close(v9ses);
kfree(v9ses);
- return retval;
+ return ERR_PTR(retval);
+
release_sb:
/*
* we will do the session_close and root dentry release
@@ -196,7 +202,7 @@ release_sb:
*/
p9_client_clunk(fid);
deactivate_locked_super(sb);
- return retval;
+ return ERR_PTR(retval);
}
/**
@@ -249,7 +255,7 @@ static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf)
if (v9fs_proto_dotl(v9ses)) {
res = p9_client_statfs(fid, &rs);
if (res == 0) {
- buf->f_type = rs.type;
+ buf->f_type = V9FS_MAGIC;
buf->f_bsize = rs.bsize;
buf->f_blocks = rs.blocks;
buf->f_bfree = rs.bfree;
@@ -292,7 +298,7 @@ static const struct super_operations v9fs_super_ops_dotl = {
struct file_system_type v9fs_fs_type = {
.name = "9p",
- .get_sb = v9fs_get_sb,
+ .mount = v9fs_mount,
.kill_sb = v9fs_kill_super,
.owner = THIS_MODULE,
.fs_flags = FS_RENAME_DOES_D_MOVE,
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index f88e5c2dc873..43ec7df84336 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -21,30 +21,13 @@
#include "fid.h"
#include "xattr.h"
-/*
- * v9fs_xattr_get()
- *
- * Copy an extended attribute into the buffer
- * provided, or compute the buffer size required.
- * Buffer is NULL to compute the size of the buffer required.
- *
- * Returns a negative error number on failure, or the number of bytes
- * used / required on success.
- */
-ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name,
- void *buffer, size_t buffer_size)
+ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name,
+ void *buffer, size_t buffer_size)
{
ssize_t retval;
int msize, read_count;
u64 offset = 0, attr_size;
- struct p9_fid *fid, *attr_fid;
-
- P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu\n",
- __func__, name, buffer_size);
-
- fid = v9fs_fid_lookup(dentry);
- if (IS_ERR(fid))
- return PTR_ERR(fid);
+ struct p9_fid *attr_fid;
attr_fid = p9_client_xattrwalk(fid, name, &attr_size);
if (IS_ERR(attr_fid)) {
@@ -88,6 +71,31 @@ error:
}
+
+/*
+ * v9fs_xattr_get()
+ *
+ * Copy an extended attribute into the buffer
+ * provided, or compute the buffer size required.
+ * Buffer is NULL to compute the size of the buffer required.
+ *
+ * Returns a negative error number on failure, or the number of bytes
+ * used / required on success.
+ */
+ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name,
+ void *buffer, size_t buffer_size)
+{
+ struct p9_fid *fid;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu\n",
+ __func__, name, buffer_size);
+ fid = v9fs_fid_lookup(dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ return v9fs_fid_xattr_get(fid, name, buffer, buffer_size);
+}
+
/*
* v9fs_xattr_set()
*
@@ -156,5 +164,9 @@ ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
const struct xattr_handler *v9fs_xattr_handlers[] = {
&v9fs_xattr_user_handler,
+#ifdef CONFIG_9P_FS_POSIX_ACL
+ &v9fs_xattr_acl_access_handler,
+ &v9fs_xattr_acl_default_handler,
+#endif
NULL
};
diff --git a/fs/9p/xattr.h b/fs/9p/xattr.h
index 9ddf672ae5c4..eaa837c53bd5 100644
--- a/fs/9p/xattr.h
+++ b/fs/9p/xattr.h
@@ -15,10 +15,16 @@
#define FS_9P_XATTR_H
#include <linux/xattr.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
extern const struct xattr_handler *v9fs_xattr_handlers[];
extern struct xattr_handler v9fs_xattr_user_handler;
+extern const struct xattr_handler v9fs_xattr_acl_access_handler;
+extern const struct xattr_handler v9fs_xattr_acl_default_handler;
+extern ssize_t v9fs_fid_xattr_get(struct p9_fid *, const char *,
+ void *, size_t);
extern ssize_t v9fs_xattr_get(struct dentry *, const char *,
void *, size_t);
extern int v9fs_xattr_set(struct dentry *, const char *,
diff --git a/fs/Kconfig b/fs/Kconfig
index b5e582bd769d..771f457402d4 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -53,7 +53,6 @@ config EXPORTFS
config FILE_LOCKING
bool "Enable POSIX file locking API" if EMBEDDED
default y
- select BKL # while lockd still uses it.
help
This option enables standard file locking support, required
for filesystems like NFS and for the flock() system
@@ -63,7 +62,6 @@ source "fs/notify/Kconfig"
source "fs/quota/Kconfig"
-source "fs/autofs/Kconfig"
source "fs/autofs4/Kconfig"
source "fs/fuse/Kconfig"
@@ -235,7 +233,6 @@ config NFS_COMMON
default y
source "net/sunrpc/Kconfig"
-source "fs/smbfs/Kconfig"
source "fs/ceph/Kconfig"
source "fs/cifs/Kconfig"
source "fs/ncpfs/Kconfig"
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index bb4cc5b8abc8..79e2ca7973b7 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -42,7 +42,7 @@ config BINFMT_ELF_FDPIC
config CORE_DUMP_DEFAULT_ELF_HEADERS
bool "Write ELF core dumps with partial segments"
- default n
+ default y
depends on BINFMT_ELF && ELF_CORE
help
ELF core dump files describe each memory mapping of the crashed
@@ -60,7 +60,7 @@ config CORE_DUMP_DEFAULT_ELF_HEADERS
inherited. See Documentation/filesystems/proc.txt for details.
This config option changes the default setting of coredump_filter
- seen at boot time. If unsure, say N.
+ seen at boot time. If unsure, say Y.
config BINFMT_FLAT
bool "Kernel support for flat binaries"
diff --git a/fs/Makefile b/fs/Makefile
index 26956fcec917..a7f7cef0c0c8 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -88,7 +88,6 @@ obj-$(CONFIG_NFSD) += nfsd/
obj-$(CONFIG_LOCKD) += lockd/
obj-$(CONFIG_NLS) += nls/
obj-$(CONFIG_SYSV_FS) += sysv/
-obj-$(CONFIG_SMB_FS) += smbfs/
obj-$(CONFIG_CIFS) += cifs/
obj-$(CONFIG_NCP_FS) += ncpfs/
obj-$(CONFIG_HPFS_FS) += hpfs/
@@ -101,7 +100,6 @@ obj-$(CONFIG_UBIFS_FS) += ubifs/
obj-$(CONFIG_AFFS_FS) += affs/
obj-$(CONFIG_ROMFS_FS) += romfs/
obj-$(CONFIG_QNX4FS_FS) += qnx4/
-obj-$(CONFIG_AUTOFS_FS) += autofs/
obj-$(CONFIG_AUTOFS4_FS) += autofs4/
obj-$(CONFIG_ADFS_FS) += adfs/
obj-$(CONFIG_FUSE_FS) += fuse/
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index d9803f73236f..959dbff2d42d 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -490,17 +490,16 @@ error:
return -EINVAL;
}
-static int adfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *adfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, adfs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, adfs_fill_super);
}
static struct file_system_type adfs_fs_type = {
.owner = THIS_MODULE,
.name = "adfs",
- .get_sb = adfs_get_sb,
+ .mount = adfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/affs/super.c b/fs/affs/super.c
index fa4fbe1e238a..0cf7f4384cbd 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -573,17 +573,16 @@ affs_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
-static int affs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *affs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, affs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, affs_fill_super);
}
static struct file_system_type affs_fs_type = {
.owner = THIS_MODULE,
.name = "affs",
- .get_sb = affs_get_sb,
+ .mount = affs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/afs/super.c b/fs/afs/super.c
index eacf76d98ae0..27201cffece4 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -29,9 +29,8 @@
#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
static void afs_i_init_once(void *foo);
-static int afs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name,
- void *data, struct vfsmount *mnt);
+static struct dentry *afs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data);
static struct inode *afs_alloc_inode(struct super_block *sb);
static void afs_put_super(struct super_block *sb);
static void afs_destroy_inode(struct inode *inode);
@@ -40,7 +39,7 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf);
struct file_system_type afs_fs_type = {
.owner = THIS_MODULE,
.name = "afs",
- .get_sb = afs_get_sb,
+ .mount = afs_mount,
.kill_sb = kill_anon_super,
.fs_flags = 0,
};
@@ -359,11 +358,8 @@ error:
/*
* get an AFS superblock
*/
-static int afs_get_sb(struct file_system_type *fs_type,
- int flags,
- const char *dev_name,
- void *options,
- struct vfsmount *mnt)
+static struct dentry *afs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *options)
{
struct afs_mount_params params;
struct super_block *sb;
@@ -427,12 +423,11 @@ static int afs_get_sb(struct file_system_type *fs_type,
ASSERTCMP(sb->s_flags, &, MS_ACTIVE);
}
- simple_set_mnt(mnt, sb);
afs_put_volume(params.volume);
afs_put_cell(params.cell);
kfree(new_opts);
_leave(" = 0 [%p]", sb);
- return 0;
+ return dget(sb->s_root);
error:
afs_put_volume(params.volume);
@@ -440,7 +435,7 @@ error:
key_put(params.key);
kfree(new_opts);
_leave(" = %d", ret);
- return ret;
+ return ERR_PTR(ret);
}
/*
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 5365527ca43f..57ce55b2564c 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -26,12 +26,10 @@ static struct vfsmount *anon_inode_mnt __read_mostly;
static struct inode *anon_inode_inode;
static const struct file_operations anon_inode_fops;
-static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data,
- struct vfsmount *mnt)
+static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_pseudo(fs_type, "anon_inode:", NULL, ANON_INODE_FS_MAGIC,
- mnt);
+ return mount_pseudo(fs_type, "anon_inode:", NULL, ANON_INODE_FS_MAGIC);
}
/*
@@ -45,7 +43,7 @@ static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
static struct file_system_type anon_inode_fs_type = {
.name = "anon_inodefs",
- .get_sb = anon_inodefs_get_sb,
+ .mount = anon_inodefs_mount,
.kill_sb = kill_anon_super,
};
static const struct dentry_operations anon_inodefs_dentry_operations = {
diff --git a/fs/autofs/Kconfig b/fs/autofs/Kconfig
deleted file mode 100644
index 480e210c83ab..000000000000
--- a/fs/autofs/Kconfig
+++ /dev/null
@@ -1,22 +0,0 @@
-config AUTOFS_FS
- tristate "Kernel automounter support"
- depends on BKL # unfixable, just use autofs4
- help
- The automounter is a tool to automatically mount remote file systems
- on demand. This implementation is partially kernel-based to reduce
- overhead in the already-mounted case; this is unlike the BSD
- automounter (amd), which is a pure user space daemon.
-
- To use the automounter you need the user-space tools from the autofs
- package; you can find the location in <file:Documentation/Changes>.
- You also want to answer Y to "NFS file system support", below.
-
- If you want to use the newer version of the automounter with more
- features, say N here and say Y to "Kernel automounter v4 support",
- below.
-
- To compile this support as a module, choose M here: the module will be
- called autofs.
-
- If you are not a part of a fairly large, distributed network, you
- probably do not need an automounter, and can say N here.
diff --git a/fs/autofs/Makefile b/fs/autofs/Makefile
deleted file mode 100644
index 453a60f46d05..000000000000
--- a/fs/autofs/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# Makefile for the linux autofs-filesystem routines.
-#
-
-obj-$(CONFIG_AUTOFS_FS) += autofs.o
-
-autofs-objs := dirhash.o init.o inode.o root.o symlink.o waitq.o
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
deleted file mode 100644
index 901a3e67ec45..000000000000
--- a/fs/autofs/autofs_i.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/* -*- linux-c -*- ------------------------------------------------------- *
- *
- * linux/fs/autofs/autofs_i.h
- *
- * Copyright 1997-1998 Transmeta Corporation - All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * ----------------------------------------------------------------------- */
-
-/* Internal header file for autofs */
-
-#include <linux/auto_fs.h>
-
-/* This is the range of ioctl() numbers we claim as ours */
-#define AUTOFS_IOC_FIRST AUTOFS_IOC_READY
-#define AUTOFS_IOC_COUNT 32
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/time.h>
-#include <linux/string.h>
-#include <linux/wait.h>
-#include <linux/dcache.h>
-#include <linux/namei.h>
-#include <linux/mount.h>
-#include <linux/sched.h>
-
-#include <asm/current.h>
-#include <asm/uaccess.h>
-
-#ifdef DEBUG
-#define DPRINTK(D) (printk D)
-#else
-#define DPRINTK(D) ((void)0)
-#endif
-
-/*
- * If the daemon returns a negative response (AUTOFS_IOC_FAIL) then the
- * kernel will keep the negative response cached for up to the time given
- * here, although the time can be shorter if the kernel throws the dcache
- * entry away. This probably should be settable from user space.
- */
-#define AUTOFS_NEGATIVE_TIMEOUT (60*HZ) /* 1 minute */
-
-/* Structures associated with the root directory hash table */
-
-#define AUTOFS_HASH_SIZE 67
-
-struct autofs_dir_ent {
- int hash;
- char *name;
- int len;
- ino_t ino;
- struct dentry *dentry;
- /* Linked list of entries */
- struct autofs_dir_ent *next;
- struct autofs_dir_ent **back;
- /* The following entries are for the expiry system */
- unsigned long last_usage;
- struct list_head exp;
-};
-
-struct autofs_dirhash {
- struct autofs_dir_ent *h[AUTOFS_HASH_SIZE];
- struct list_head expiry_head;
-};
-
-struct autofs_wait_queue {
- wait_queue_head_t queue;
- struct autofs_wait_queue *next;
- autofs_wqt_t wait_queue_token;
- /* We use the following to see what we are waiting for */
- int hash;
- int len;
- char *name;
- /* This is for status reporting upon return */
- int status;
- int wait_ctr;
-};
-
-struct autofs_symlink {
- char *data;
- int len;
- time_t mtime;
-};
-
-#define AUTOFS_MAX_SYMLINKS 256
-
-#define AUTOFS_ROOT_INO 1
-#define AUTOFS_FIRST_SYMLINK 2
-#define AUTOFS_FIRST_DIR_INO (AUTOFS_FIRST_SYMLINK+AUTOFS_MAX_SYMLINKS)
-
-#define AUTOFS_SYMLINK_BITMAP_LEN \
- ((AUTOFS_MAX_SYMLINKS+((sizeof(long)*1)-1))/(sizeof(long)*8))
-
-#define AUTOFS_SBI_MAGIC 0x6d4a556d
-
-struct autofs_sb_info {
- u32 magic;
- struct file *pipe;
- struct pid *oz_pgrp;
- int catatonic;
- struct super_block *sb;
- unsigned long exp_timeout;
- ino_t next_dir_ino;
- struct autofs_wait_queue *queues; /* Wait queue pointer */
- struct autofs_dirhash dirhash; /* Root directory hash */
- struct autofs_symlink symlink[AUTOFS_MAX_SYMLINKS];
- unsigned long symlink_bitmap[AUTOFS_SYMLINK_BITMAP_LEN];
-};
-
-static inline struct autofs_sb_info *autofs_sbi(struct super_block *sb)
-{
- return (struct autofs_sb_info *)(sb->s_fs_info);
-}
-
-/* autofs_oz_mode(): do we see the man behind the curtain? (The
- processes which do manipulations for us in user space sees the raw
- filesystem without "magic".) */
-
-static inline int autofs_oz_mode(struct autofs_sb_info *sbi) {
- return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp;
-}
-
-/* Hash operations */
-
-void autofs_initialize_hash(struct autofs_dirhash *);
-struct autofs_dir_ent *autofs_hash_lookup(const struct autofs_dirhash *,struct qstr *);
-void autofs_hash_insert(struct autofs_dirhash *,struct autofs_dir_ent *);
-void autofs_hash_delete(struct autofs_dir_ent *);
-struct autofs_dir_ent *autofs_hash_enum(const struct autofs_dirhash *,off_t *,struct autofs_dir_ent *);
-void autofs_hash_dputall(struct autofs_dirhash *);
-void autofs_hash_nuke(struct autofs_sb_info *);
-
-/* Expiration-handling functions */
-
-void autofs_update_usage(struct autofs_dirhash *,struct autofs_dir_ent *);
-struct autofs_dir_ent *autofs_expire(struct super_block *,struct autofs_sb_info *, struct vfsmount *mnt);
-
-/* Operations structures */
-
-extern const struct inode_operations autofs_root_inode_operations;
-extern const struct inode_operations autofs_symlink_inode_operations;
-extern const struct file_operations autofs_root_operations;
-
-/* Initializing function */
-
-int autofs_fill_super(struct super_block *, void *, int);
-void autofs_kill_sb(struct super_block *sb);
-struct inode *autofs_iget(struct super_block *, unsigned long);
-
-/* Queue management functions */
-
-int autofs_wait(struct autofs_sb_info *,struct qstr *);
-int autofs_wait_release(struct autofs_sb_info *,autofs_wqt_t,int);
-void autofs_catatonic_mode(struct autofs_sb_info *);
-
-#ifdef DEBUG
-void autofs_say(const char *name, int len);
-#else
-#define autofs_say(n,l) ((void)0)
-#endif
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
deleted file mode 100644
index e947915109e5..000000000000
--- a/fs/autofs/dirhash.c
+++ /dev/null
@@ -1,250 +0,0 @@
-/* -*- linux-c -*- --------------------------------------------------------- *
- *
- * linux/fs/autofs/dirhash.c
- *
- * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
-
-#include "autofs_i.h"
-
-/* Functions for maintenance of expiry queue */
-
-static void autofs_init_usage(struct autofs_dirhash *dh,
- struct autofs_dir_ent *ent)
-{
- list_add_tail(&ent->exp, &dh->expiry_head);
- ent->last_usage = jiffies;
-}
-
-static void autofs_delete_usage(struct autofs_dir_ent *ent)
-{
- list_del(&ent->exp);
-}
-
-void autofs_update_usage(struct autofs_dirhash *dh,
- struct autofs_dir_ent *ent)
-{
- autofs_delete_usage(ent); /* Unlink from current position */
- autofs_init_usage(dh,ent); /* Relink at queue tail */
-}
-
-struct autofs_dir_ent *autofs_expire(struct super_block *sb,
- struct autofs_sb_info *sbi,
- struct vfsmount *mnt)
-{
- struct autofs_dirhash *dh = &sbi->dirhash;
- struct autofs_dir_ent *ent;
- unsigned long timeout = sbi->exp_timeout;
-
- while (1) {
- struct path path;
- int umount_ok;
-
- if ( list_empty(&dh->expiry_head) || sbi->catatonic )
- return NULL; /* No entries */
- /* We keep the list sorted by last_usage and want old stuff */
- ent = list_entry(dh->expiry_head.next, struct autofs_dir_ent, exp);
- if (jiffies - ent->last_usage < timeout)
- break;
- /* Move to end of list in case expiry isn't desirable */
- autofs_update_usage(dh, ent);
-
- /* Check to see that entry is expirable */
- if ( ent->ino < AUTOFS_FIRST_DIR_INO )
- return ent; /* Symlinks are always expirable */
-
- /* Get the dentry for the autofs subdirectory */
- path.dentry = ent->dentry;
-
- if (!path.dentry) {
- /* Should only happen in catatonic mode */
- printk("autofs: dentry == NULL but inode range is directory, entry %s\n", ent->name);
- autofs_delete_usage(ent);
- continue;
- }
-
- if (!path.dentry->d_inode) {
- dput(path.dentry);
- printk("autofs: negative dentry on expiry queue: %s\n",
- ent->name);
- autofs_delete_usage(ent);
- continue;
- }
-
- /* Make sure entry is mounted and unused; note that dentry will
- point to the mounted-on-top root. */
- if (!S_ISDIR(path.dentry->d_inode->i_mode) ||
- !d_mountpoint(path.dentry)) {
- DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
- continue;
- }
- path.mnt = mnt;
- path_get(&path);
- if (!follow_down(&path)) {
- path_put(&path);
- DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
- continue;
- }
- while (d_mountpoint(path.dentry) && follow_down(&path))
- ;
- umount_ok = may_umount(path.mnt);
- path_put(&path);
-
- if (umount_ok) {
- DPRINTK(("autofs: signaling expire on %s\n", ent->name));
- return ent; /* Expirable! */
- }
- DPRINTK(("autofs: didn't expire due to may_umount: %s\n", ent->name));
- }
- return NULL; /* No expirable entries */
-}
-
-void autofs_initialize_hash(struct autofs_dirhash *dh) {
- memset(&dh->h, 0, AUTOFS_HASH_SIZE*sizeof(struct autofs_dir_ent *));
- INIT_LIST_HEAD(&dh->expiry_head);
-}
-
-struct autofs_dir_ent *autofs_hash_lookup(const struct autofs_dirhash *dh, struct qstr *name)
-{
- struct autofs_dir_ent *dhn;
-
- DPRINTK(("autofs_hash_lookup: hash = 0x%08x, name = ", name->hash));
- autofs_say(name->name,name->len);
-
- for ( dhn = dh->h[(unsigned) name->hash % AUTOFS_HASH_SIZE] ; dhn ; dhn = dhn->next ) {
- if ( name->hash == dhn->hash &&
- name->len == dhn->len &&
- !memcmp(name->name, dhn->name, name->len) )
- break;
- }
-
- return dhn;
-}
-
-void autofs_hash_insert(struct autofs_dirhash *dh, struct autofs_dir_ent *ent)
-{
- struct autofs_dir_ent **dhnp;
-
- DPRINTK(("autofs_hash_insert: hash = 0x%08x, name = ", ent->hash));
- autofs_say(ent->name,ent->len);
-
- autofs_init_usage(dh,ent);
- if (ent->dentry)
- dget(ent->dentry);
-
- dhnp = &dh->h[(unsigned) ent->hash % AUTOFS_HASH_SIZE];
- ent->next = *dhnp;
- ent->back = dhnp;
- *dhnp = ent;
- if ( ent->next )
- ent->next->back = &(ent->next);
-}
-
-void autofs_hash_delete(struct autofs_dir_ent *ent)
-{
- *(ent->back) = ent->next;
- if ( ent->next )
- ent->next->back = ent->back;
-
- autofs_delete_usage(ent);
-
- if ( ent->dentry )
- dput(ent->dentry);
- kfree(ent->name);
- kfree(ent);
-}
-
-/*
- * Used by readdir(). We must validate "ptr", so we can't simply make it
- * a pointer. Values below 0xffff are reserved; calling with any value
- * <= 0x10000 will return the first entry found.
- *
- * "last" can be NULL or the value returned by the last search *if* we
- * want the next sequential entry.
- */
-struct autofs_dir_ent *autofs_hash_enum(const struct autofs_dirhash *dh,
- off_t *ptr, struct autofs_dir_ent *last)
-{
- int bucket, ecount, i;
- struct autofs_dir_ent *ent;
-
- bucket = (*ptr >> 16) - 1;
- ecount = *ptr & 0xffff;
-
- if ( bucket < 0 ) {
- bucket = ecount = 0;
- }
-
- DPRINTK(("autofs_hash_enum: bucket %d, entry %d\n", bucket, ecount));
-
- ent = last ? last->next : NULL;
-
- if ( ent ) {
- ecount++;
- } else {
- while ( bucket < AUTOFS_HASH_SIZE ) {
- ent = dh->h[bucket];
- for ( i = ecount ; ent && i ; i-- )
- ent = ent->next;
-
- if (ent) {
- ecount++; /* Point to *next* entry */
- break;
- }
-
- bucket++; ecount = 0;
- }
- }
-
-#ifdef DEBUG
- if ( !ent )
- printk("autofs_hash_enum: nothing found\n");
- else {
- printk("autofs_hash_enum: found hash %08x, name", ent->hash);
- autofs_say(ent->name,ent->len);
- }
-#endif
-
- *ptr = ((bucket+1) << 16) + ecount;
- return ent;
-}
-
-/* Iterate over all the ents, and remove all dentry pointers. Used on
- entering catatonic mode, in order to make the filesystem unmountable. */
-void autofs_hash_dputall(struct autofs_dirhash *dh)
-{
- int i;
- struct autofs_dir_ent *ent;
-
- for ( i = 0 ; i < AUTOFS_HASH_SIZE ; i++ ) {
- for ( ent = dh->h[i] ; ent ; ent = ent->next ) {
- if ( ent->dentry ) {
- dput(ent->dentry);
- ent->dentry = NULL;
- }
- }
- }
-}
-
-/* Delete everything. This is used on filesystem destruction, so we
- make no attempt to keep the pointers valid */
-void autofs_hash_nuke(struct autofs_sb_info *sbi)
-{
- int i;
- struct autofs_dir_ent *ent, *nent;
-
- for ( i = 0 ; i < AUTOFS_HASH_SIZE ; i++ ) {
- for ( ent = sbi->dirhash.h[i] ; ent ; ent = nent ) {
- nent = ent->next;
- if ( ent->dentry )
- dput(ent->dentry);
- kfree(ent->name);
- kfree(ent);
- }
- }
-}
diff --git a/fs/autofs/init.c b/fs/autofs/init.c
deleted file mode 100644
index cea5219b4f37..000000000000
--- a/fs/autofs/init.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/* -*- linux-c -*- --------------------------------------------------------- *
- *
- * linux/fs/autofs/init.c
- *
- * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include "autofs_i.h"
-
-static int autofs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
-{
- return get_sb_nodev(fs_type, flags, data, autofs_fill_super, mnt);
-}
-
-static struct file_system_type autofs_fs_type = {
- .owner = THIS_MODULE,
- .name = "autofs",
- .get_sb = autofs_get_sb,
- .kill_sb = autofs_kill_sb,
-};
-
-static int __init init_autofs_fs(void)
-{
- return register_filesystem(&autofs_fs_type);
-}
-
-static void __exit exit_autofs_fs(void)
-{
- unregister_filesystem(&autofs_fs_type);
-}
-
-module_init(init_autofs_fs);
-module_exit(exit_autofs_fs);
-
-#ifdef DEBUG
-void autofs_say(const char *name, int len)
-{
- printk("(%d: ", len);
- while ( len-- )
- printk("%c", *name++);
- printk(")\n");
-}
-#endif
-MODULE_LICENSE("GPL");
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
deleted file mode 100644
index e1734f2d6e26..000000000000
--- a/fs/autofs/inode.c
+++ /dev/null
@@ -1,288 +0,0 @@
-/* -*- linux-c -*- --------------------------------------------------------- *
- *
- * linux/fs/autofs/inode.c
- *
- * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/file.h>
-#include <linux/parser.h>
-#include <linux/bitops.h>
-#include <linux/magic.h>
-#include "autofs_i.h"
-#include <linux/module.h>
-
-void autofs_kill_sb(struct super_block *sb)
-{
- struct autofs_sb_info *sbi = autofs_sbi(sb);
- unsigned int n;
-
- /*
- * In the event of a failure in get_sb_nodev the superblock
- * info is not present so nothing else has been setup, so
- * just call kill_anon_super when we are called from
- * deactivate_super.
- */
- if (!sbi)
- goto out_kill_sb;
-
- if (!sbi->catatonic)
- autofs_catatonic_mode(sbi); /* Free wait queues, close pipe */
-
- put_pid(sbi->oz_pgrp);
-
- autofs_hash_nuke(sbi);
- for (n = 0; n < AUTOFS_MAX_SYMLINKS; n++) {
- if (test_bit(n, sbi->symlink_bitmap))
- kfree(sbi->symlink[n].data);
- }
-
- kfree(sb->s_fs_info);
-
-out_kill_sb:
- DPRINTK(("autofs: shutting down\n"));
- kill_anon_super(sb);
-}
-
-static const struct super_operations autofs_sops = {
- .statfs = simple_statfs,
- .show_options = generic_show_options,
-};
-
-enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto};
-
-static const match_table_t autofs_tokens = {
- {Opt_fd, "fd=%u"},
- {Opt_uid, "uid=%u"},
- {Opt_gid, "gid=%u"},
- {Opt_pgrp, "pgrp=%u"},
- {Opt_minproto, "minproto=%u"},
- {Opt_maxproto, "maxproto=%u"},
- {Opt_err, NULL}
-};
-
-static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
- pid_t *pgrp, int *minproto, int *maxproto)
-{
- char *p;
- substring_t args[MAX_OPT_ARGS];
- int option;
-
- *uid = current_uid();
- *gid = current_gid();
- *pgrp = task_pgrp_nr(current);
-
- *minproto = *maxproto = AUTOFS_PROTO_VERSION;
-
- *pipefd = -1;
-
- if (!options)
- return 1;
-
- while ((p = strsep(&options, ",")) != NULL) {
- int token;
- if (!*p)
- continue;
-
- token = match_token(p, autofs_tokens, args);
- switch (token) {
- case Opt_fd:
- if (match_int(&args[0], &option))
- return 1;
- *pipefd = option;
- break;
- case Opt_uid:
- if (match_int(&args[0], &option))
- return 1;
- *uid = option;
- break;
- case Opt_gid:
- if (match_int(&args[0], &option))
- return 1;
- *gid = option;
- break;
- case Opt_pgrp:
- if (match_int(&args[0], &option))
- return 1;
- *pgrp = option;
- break;
- case Opt_minproto:
- if (match_int(&args[0], &option))
- return 1;
- *minproto = option;
- break;
- case Opt_maxproto:
- if (match_int(&args[0], &option))
- return 1;
- *maxproto = option;
- break;
- default:
- return 1;
- }
- }
- return (*pipefd < 0);
-}
-
-int autofs_fill_super(struct super_block *s, void *data, int silent)
-{
- struct inode * root_inode;
- struct dentry * root;
- struct file * pipe;
- int pipefd;
- struct autofs_sb_info *sbi;
- int minproto, maxproto;
- pid_t pgid;
-
- save_mount_options(s, data);
-
- sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
- if (!sbi)
- goto fail_unlock;
- DPRINTK(("autofs: starting up, sbi = %p\n",sbi));
-
- s->s_fs_info = sbi;
- sbi->magic = AUTOFS_SBI_MAGIC;
- sbi->pipe = NULL;
- sbi->catatonic = 1;
- sbi->exp_timeout = 0;
- autofs_initialize_hash(&sbi->dirhash);
- sbi->queues = NULL;
- memset(sbi->symlink_bitmap, 0, sizeof(long)*AUTOFS_SYMLINK_BITMAP_LEN);
- sbi->next_dir_ino = AUTOFS_FIRST_DIR_INO;
- s->s_blocksize = 1024;
- s->s_blocksize_bits = 10;
- s->s_magic = AUTOFS_SUPER_MAGIC;
- s->s_op = &autofs_sops;
- s->s_time_gran = 1;
- sbi->sb = s;
-
- root_inode = autofs_iget(s, AUTOFS_ROOT_INO);
- if (IS_ERR(root_inode))
- goto fail_free;
- root = d_alloc_root(root_inode);
- pipe = NULL;
-
- if (!root)
- goto fail_iput;
-
- /* Can this call block? - WTF cares? s is locked. */
- if (parse_options(data, &pipefd, &root_inode->i_uid,
- &root_inode->i_gid, &pgid, &minproto,
- &maxproto)) {
- printk("autofs: called with bogus options\n");
- goto fail_dput;
- }
-
- /* Couldn't this be tested earlier? */
- if (minproto > AUTOFS_PROTO_VERSION ||
- maxproto < AUTOFS_PROTO_VERSION) {
- printk("autofs: kernel does not match daemon version\n");
- goto fail_dput;
- }
-
- DPRINTK(("autofs: pipe fd = %d, pgrp = %u\n", pipefd, pgid));
- sbi->oz_pgrp = find_get_pid(pgid);
-
- if (!sbi->oz_pgrp) {
- printk("autofs: could not find process group %d\n", pgid);
- goto fail_dput;
- }
-
- pipe = fget(pipefd);
-
- if (!pipe) {
- printk("autofs: could not open pipe file descriptor\n");
- goto fail_put_pid;
- }
-
- if (!pipe->f_op || !pipe->f_op->write)
- goto fail_fput;
- sbi->pipe = pipe;
- sbi->catatonic = 0;
-
- /*
- * Success! Install the root dentry now to indicate completion.
- */
- s->s_root = root;
- return 0;
-
-fail_fput:
- printk("autofs: pipe file descriptor does not contain proper ops\n");
- fput(pipe);
-fail_put_pid:
- put_pid(sbi->oz_pgrp);
-fail_dput:
- dput(root);
- goto fail_free;
-fail_iput:
- printk("autofs: get root dentry failed\n");
- iput(root_inode);
-fail_free:
- kfree(sbi);
- s->s_fs_info = NULL;
-fail_unlock:
- return -EINVAL;
-}
-
-struct inode *autofs_iget(struct super_block *sb, unsigned long ino)
-{
- unsigned int n;
- struct autofs_sb_info *sbi = autofs_sbi(sb);
- struct inode *inode;
-
- inode = iget_locked(sb, ino);
- if (!inode)
- return ERR_PTR(-ENOMEM);
- if (!(inode->i_state & I_NEW))
- return inode;
-
- /* Initialize to the default case (stub directory) */
-
- inode->i_op = &simple_dir_inode_operations;
- inode->i_fop = &simple_dir_operations;
- inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
- inode->i_nlink = 2;
- inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-
- if (ino == AUTOFS_ROOT_INO) {
- inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
- inode->i_op = &autofs_root_inode_operations;
- inode->i_fop = &autofs_root_operations;
- goto done;
- }
-
- inode->i_uid = inode->i_sb->s_root->d_inode->i_uid;
- inode->i_gid = inode->i_sb->s_root->d_inode->i_gid;
-
- if (ino >= AUTOFS_FIRST_SYMLINK && ino < AUTOFS_FIRST_DIR_INO) {
- /* Symlink inode - should be in symlink list */
- struct autofs_symlink *sl;
-
- n = ino - AUTOFS_FIRST_SYMLINK;
- if (n >= AUTOFS_MAX_SYMLINKS || !test_bit(n,sbi->symlink_bitmap)) {
- printk("autofs: Looking for bad symlink inode %u\n", (unsigned int) ino);
- goto done;
- }
-
- inode->i_op = &autofs_symlink_inode_operations;
- sl = &sbi->symlink[n];
- inode->i_private = sl;
- inode->i_mode = S_IFLNK | S_IRWXUGO;
- inode->i_mtime.tv_sec = inode->i_ctime.tv_sec = sl->mtime;
- inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0;
- inode->i_size = sl->len;
- inode->i_nlink = 1;
- }
-
-done:
- unlock_new_inode(inode);
- return inode;
-}
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
deleted file mode 100644
index 0c4ca81aeaeb..000000000000
--- a/fs/autofs/root.c
+++ /dev/null
@@ -1,645 +0,0 @@
-/* -*- linux-c -*- --------------------------------------------------------- *
- *
- * linux/fs/autofs/root.c
- *
- * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
-
-#include <linux/capability.h>
-#include <linux/errno.h>
-#include <linux/stat.h>
-#include <linux/slab.h>
-#include <linux/param.h>
-#include <linux/time.h>
-#include <linux/compat.h>
-#include <linux/smp_lock.h>
-#include "autofs_i.h"
-
-static int autofs_root_readdir(struct file *,void *,filldir_t);
-static struct dentry *autofs_root_lookup(struct inode *,struct dentry *, struct nameidata *);
-static int autofs_root_symlink(struct inode *,struct dentry *,const char *);
-static int autofs_root_unlink(struct inode *,struct dentry *);
-static int autofs_root_rmdir(struct inode *,struct dentry *);
-static int autofs_root_mkdir(struct inode *,struct dentry *,int);
-static long autofs_root_ioctl(struct file *,unsigned int,unsigned long);
-#ifdef CONFIG_COMPAT
-static long autofs_root_compat_ioctl(struct file *,unsigned int,unsigned long);
-#endif
-
-const struct file_operations autofs_root_operations = {
- .llseek = generic_file_llseek,
- .read = generic_read_dir,
- .readdir = autofs_root_readdir,
- .unlocked_ioctl = autofs_root_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = autofs_root_compat_ioctl,
-#endif
-};
-
-const struct inode_operations autofs_root_inode_operations = {
- .lookup = autofs_root_lookup,
- .unlink = autofs_root_unlink,
- .symlink = autofs_root_symlink,
- .mkdir = autofs_root_mkdir,
- .rmdir = autofs_root_rmdir,
-};
-
-static int autofs_root_readdir(struct file *filp, void *dirent, filldir_t filldir)
-{
- struct autofs_dir_ent *ent = NULL;
- struct autofs_dirhash *dirhash;
- struct autofs_sb_info *sbi;
- struct inode * inode = filp->f_path.dentry->d_inode;
- off_t onr, nr;
-
- lock_kernel();
-
- sbi = autofs_sbi(inode->i_sb);
- dirhash = &sbi->dirhash;
- nr = filp->f_pos;
-
- switch(nr)
- {
- case 0:
- if (filldir(dirent, ".", 1, nr, inode->i_ino, DT_DIR) < 0)
- goto out;
- filp->f_pos = ++nr;
- /* fall through */
- case 1:
- if (filldir(dirent, "..", 2, nr, inode->i_ino, DT_DIR) < 0)
- goto out;
- filp->f_pos = ++nr;
- /* fall through */
- default:
- while (onr = nr, ent = autofs_hash_enum(dirhash,&nr,ent)) {
- if (!ent->dentry || d_mountpoint(ent->dentry)) {
- if (filldir(dirent,ent->name,ent->len,onr,ent->ino,DT_UNKNOWN) < 0)
- goto out;
- filp->f_pos = nr;
- }
- }
- break;
- }
-
-out:
- unlock_kernel();
- return 0;
-}
-
-static int try_to_fill_dentry(struct dentry *dentry, struct super_block *sb, struct autofs_sb_info *sbi)
-{
- struct inode * inode;
- struct autofs_dir_ent *ent;
- int status = 0;
-
- if (!(ent = autofs_hash_lookup(&sbi->dirhash, &dentry->d_name))) {
- do {
- if (status && dentry->d_inode) {
- if (status != -ENOENT)
- printk("autofs warning: lookup failure on positive dentry, status = %d, name = %s\n", status, dentry->d_name.name);
- return 0; /* Try to get the kernel to invalidate this dentry */
- }
-
- /* Turn this into a real negative dentry? */
- if (status == -ENOENT) {
- dentry->d_time = jiffies + AUTOFS_NEGATIVE_TIMEOUT;
- dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
- return 1;
- } else if (status) {
- /* Return a negative dentry, but leave it "pending" */
- return 1;
- }
- status = autofs_wait(sbi, &dentry->d_name);
- } while (!(ent = autofs_hash_lookup(&sbi->dirhash, &dentry->d_name)));
- }
-
- /* Abuse this field as a pointer to the directory entry, used to
- find the expire list pointers */
- dentry->d_time = (unsigned long) ent;
-
- if (!dentry->d_inode) {
- inode = autofs_iget(sb, ent->ino);
- if (IS_ERR(inode)) {
- /* Failed, but leave pending for next time */
- return 1;
- }
- dentry->d_inode = inode;
- }
-
- /* If this is a directory that isn't a mount point, bitch at the
- daemon and fix it in user space */
- if (S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry)) {
- return !autofs_wait(sbi, &dentry->d_name);
- }
-
- /* We don't update the usages for the autofs daemon itself, this
- is necessary for recursive autofs mounts */
- if (!autofs_oz_mode(sbi)) {
- autofs_update_usage(&sbi->dirhash,ent);
- }
-
- dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
- return 1;
-}
-
-
-/*
- * Revalidate is called on every cache lookup. Some of those
- * cache lookups may actually happen while the dentry is not
- * yet completely filled in, and revalidate has to delay such
- * lookups..
- */
-static int autofs_revalidate(struct dentry * dentry, struct nameidata *nd)
-{
- struct inode * dir;
- struct autofs_sb_info *sbi;
- struct autofs_dir_ent *ent;
- int res;
-
- lock_kernel();
- dir = dentry->d_parent->d_inode;
- sbi = autofs_sbi(dir->i_sb);
-
- /* Pending dentry */
- if (dentry->d_flags & DCACHE_AUTOFS_PENDING) {
- if (autofs_oz_mode(sbi))
- res = 1;
- else
- res = try_to_fill_dentry(dentry, dir->i_sb, sbi);
- unlock_kernel();
- return res;
- }
-
- /* Negative dentry.. invalidate if "old" */
- if (!dentry->d_inode) {
- unlock_kernel();
- return (dentry->d_time - jiffies <= AUTOFS_NEGATIVE_TIMEOUT);
- }
-
- /* Check for a non-mountpoint directory */
- if (S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry)) {
- if (autofs_oz_mode(sbi))
- res = 1;
- else
- res = try_to_fill_dentry(dentry, dir->i_sb, sbi);
- unlock_kernel();
- return res;
- }
-
- /* Update the usage list */
- if (!autofs_oz_mode(sbi)) {
- ent = (struct autofs_dir_ent *) dentry->d_time;
- if (ent)
- autofs_update_usage(&sbi->dirhash,ent);
- }
- unlock_kernel();
- return 1;
-}
-
-static const struct dentry_operations autofs_dentry_operations = {
- .d_revalidate = autofs_revalidate,
-};
-
-static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
-{
- struct autofs_sb_info *sbi;
- int oz_mode;
-
- DPRINTK(("autofs_root_lookup: name = "));
- lock_kernel();
- autofs_say(dentry->d_name.name,dentry->d_name.len);
-
- if (dentry->d_name.len > NAME_MAX) {
- unlock_kernel();
- return ERR_PTR(-ENAMETOOLONG);/* File name too long to exist */
- }
-
- sbi = autofs_sbi(dir->i_sb);
-
- oz_mode = autofs_oz_mode(sbi);
- DPRINTK(("autofs_lookup: pid = %u, pgrp = %u, catatonic = %d, "
- "oz_mode = %d\n", task_pid_nr(current),
- task_pgrp_nr(current), sbi->catatonic,
- oz_mode));
-
- /*
- * Mark the dentry incomplete, but add it. This is needed so
- * that the VFS layer knows about the dentry, and we can count
- * on catching any lookups through the revalidate.
- *
- * Let all the hard work be done by the revalidate function that
- * needs to be able to do this anyway..
- *
- * We need to do this before we release the directory semaphore.
- */
- dentry->d_op = &autofs_dentry_operations;
- dentry->d_flags |= DCACHE_AUTOFS_PENDING;
- d_add(dentry, NULL);
-
- mutex_unlock(&dir->i_mutex);
- autofs_revalidate(dentry, nd);
- mutex_lock(&dir->i_mutex);
-
- /*
- * If we are still pending, check if we had to handle
- * a signal. If so we can force a restart..
- */
- if (dentry->d_flags & DCACHE_AUTOFS_PENDING) {
- /* See if we were interrupted */
- if (signal_pending(current)) {
- sigset_t *sigset = &current->pending.signal;
- if (sigismember (sigset, SIGKILL) ||
- sigismember (sigset, SIGQUIT) ||
- sigismember (sigset, SIGINT)) {
- unlock_kernel();
- return ERR_PTR(-ERESTARTNOINTR);
- }
- }
- }
- unlock_kernel();
-
- /*
- * If this dentry is unhashed, then we shouldn't honour this
- * lookup even if the dentry is positive. Returning ENOENT here
- * doesn't do the right thing for all system calls, but it should
- * be OK for the operations we permit from an autofs.
- */
- if (dentry->d_inode && d_unhashed(dentry))
- return ERR_PTR(-ENOENT);
-
- return NULL;
-}
-
-static int autofs_root_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
-{
- struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
- struct autofs_dirhash *dh = &sbi->dirhash;
- struct autofs_dir_ent *ent;
- unsigned int n;
- int slsize;
- struct autofs_symlink *sl;
- struct inode *inode;
-
- DPRINTK(("autofs_root_symlink: %s <- ", symname));
- autofs_say(dentry->d_name.name,dentry->d_name.len);
-
- lock_kernel();
- if (!autofs_oz_mode(sbi)) {
- unlock_kernel();
- return -EACCES;
- }
-
- if (autofs_hash_lookup(dh, &dentry->d_name)) {
- unlock_kernel();
- return -EEXIST;
- }
-
- n = find_first_zero_bit(sbi->symlink_bitmap,AUTOFS_MAX_SYMLINKS);
- if (n >= AUTOFS_MAX_SYMLINKS) {
- unlock_kernel();
- return -ENOSPC;
- }
-
- set_bit(n,sbi->symlink_bitmap);
- sl = &sbi->symlink[n];
- sl->len = strlen(symname);
- sl->data = kmalloc(slsize = sl->len+1, GFP_KERNEL);
- if (!sl->data) {
- clear_bit(n,sbi->symlink_bitmap);
- unlock_kernel();
- return -ENOSPC;
- }
-
- ent = kmalloc(sizeof(struct autofs_dir_ent), GFP_KERNEL);
- if (!ent) {
- kfree(sl->data);
- clear_bit(n,sbi->symlink_bitmap);
- unlock_kernel();
- return -ENOSPC;
- }
-
- ent->name = kmalloc(dentry->d_name.len+1, GFP_KERNEL);
- if (!ent->name) {
- kfree(sl->data);
- kfree(ent);
- clear_bit(n,sbi->symlink_bitmap);
- unlock_kernel();
- return -ENOSPC;
- }
-
- memcpy(sl->data,symname,slsize);
- sl->mtime = get_seconds();
-
- ent->ino = AUTOFS_FIRST_SYMLINK + n;
- ent->hash = dentry->d_name.hash;
- memcpy(ent->name, dentry->d_name.name, 1+(ent->len = dentry->d_name.len));
- ent->dentry = NULL; /* We don't keep the dentry for symlinks */
-
- autofs_hash_insert(dh,ent);
-
- inode = autofs_iget(dir->i_sb, ent->ino);
- if (IS_ERR(inode))
- return PTR_ERR(inode);
-
- d_instantiate(dentry, inode);
- unlock_kernel();
- return 0;
-}
-
-/*
- * NOTE!
- *
- * Normal filesystems would do a "d_delete()" to tell the VFS dcache
- * that the file no longer exists. However, doing that means that the
- * VFS layer can turn the dentry into a negative dentry, which we
- * obviously do not want (we're dropping the entry not because it
- * doesn't exist, but because it has timed out).
- *
- * Also see autofs_root_rmdir()..
- */
-static int autofs_root_unlink(struct inode *dir, struct dentry *dentry)
-{
- struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
- struct autofs_dirhash *dh = &sbi->dirhash;
- struct autofs_dir_ent *ent;
- unsigned int n;
-
- /* This allows root to remove symlinks */
- lock_kernel();
- if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) {
- unlock_kernel();
- return -EACCES;
- }
-
- ent = autofs_hash_lookup(dh, &dentry->d_name);
- if (!ent) {
- unlock_kernel();
- return -ENOENT;
- }
-
- n = ent->ino - AUTOFS_FIRST_SYMLINK;
- if (n >= AUTOFS_MAX_SYMLINKS) {
- unlock_kernel();
- return -EISDIR; /* It's a directory, dummy */
- }
- if (!test_bit(n,sbi->symlink_bitmap)) {
- unlock_kernel();
- return -EINVAL; /* Nonexistent symlink? Shouldn't happen */
- }
-
- dentry->d_time = (unsigned long)(struct autofs_dirhash *)NULL;
- autofs_hash_delete(ent);
- clear_bit(n,sbi->symlink_bitmap);
- kfree(sbi->symlink[n].data);
- d_drop(dentry);
-
- unlock_kernel();
- return 0;
-}
-
-static int autofs_root_rmdir(struct inode *dir, struct dentry *dentry)
-{
- struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
- struct autofs_dirhash *dh = &sbi->dirhash;
- struct autofs_dir_ent *ent;
-
- lock_kernel();
- if (!autofs_oz_mode(sbi)) {
- unlock_kernel();
- return -EACCES;
- }
-
- ent = autofs_hash_lookup(dh, &dentry->d_name);
- if (!ent) {
- unlock_kernel();
- return -ENOENT;
- }
-
- if ((unsigned int)ent->ino < AUTOFS_FIRST_DIR_INO) {
- unlock_kernel();
- return -ENOTDIR; /* Not a directory */
- }
-
- if (ent->dentry != dentry) {
- printk("autofs_rmdir: odentry != dentry for entry %s\n", dentry->d_name.name);
- }
-
- dentry->d_time = (unsigned long)(struct autofs_dir_ent *)NULL;
- autofs_hash_delete(ent);
- drop_nlink(dir);
- d_drop(dentry);
- unlock_kernel();
-
- return 0;
-}
-
-static int autofs_root_mkdir(struct inode *dir, struct dentry *dentry, int mode)
-{
- struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
- struct autofs_dirhash *dh = &sbi->dirhash;
- struct autofs_dir_ent *ent;
- struct inode *inode;
- ino_t ino;
-
- lock_kernel();
- if (!autofs_oz_mode(sbi)) {
- unlock_kernel();
- return -EACCES;
- }
-
- ent = autofs_hash_lookup(dh, &dentry->d_name);
- if (ent) {
- unlock_kernel();
- return -EEXIST;
- }
-
- if (sbi->next_dir_ino < AUTOFS_FIRST_DIR_INO) {
- printk("autofs: Out of inode numbers -- what the heck did you do??\n");
- unlock_kernel();
- return -ENOSPC;
- }
- ino = sbi->next_dir_ino++;
-
- ent = kmalloc(sizeof(struct autofs_dir_ent), GFP_KERNEL);
- if (!ent) {
- unlock_kernel();
- return -ENOSPC;
- }
-
- ent->name = kmalloc(dentry->d_name.len+1, GFP_KERNEL);
- if (!ent->name) {
- kfree(ent);
- unlock_kernel();
- return -ENOSPC;
- }
-
- ent->hash = dentry->d_name.hash;
- memcpy(ent->name, dentry->d_name.name, 1+(ent->len = dentry->d_name.len));
- ent->ino = ino;
- ent->dentry = dentry;
- autofs_hash_insert(dh,ent);
-
- inc_nlink(dir);
-
- inode = autofs_iget(dir->i_sb, ino);
- if (IS_ERR(inode)) {
- drop_nlink(dir);
- return PTR_ERR(inode);
- }
-
- d_instantiate(dentry, inode);
- unlock_kernel();
-
- return 0;
-}
-
-/* Get/set timeout ioctl() operation */
-#ifdef CONFIG_COMPAT
-static inline int autofs_compat_get_set_timeout(struct autofs_sb_info *sbi,
- unsigned int __user *p)
-{
- unsigned long ntimeout;
-
- if (get_user(ntimeout, p) ||
- put_user(sbi->exp_timeout / HZ, p))
- return -EFAULT;
-
- if (ntimeout > UINT_MAX/HZ)
- sbi->exp_timeout = 0;
- else
- sbi->exp_timeout = ntimeout * HZ;
-
- return 0;
-}
-#endif
-
-static inline int autofs_get_set_timeout(struct autofs_sb_info *sbi,
- unsigned long __user *p)
-{
- unsigned long ntimeout;
-
- if (get_user(ntimeout, p) ||
- put_user(sbi->exp_timeout / HZ, p))
- return -EFAULT;
-
- if (ntimeout > ULONG_MAX/HZ)
- sbi->exp_timeout = 0;
- else
- sbi->exp_timeout = ntimeout * HZ;
-
- return 0;
-}
-
-/* Return protocol version */
-static inline int autofs_get_protover(int __user *p)
-{
- return put_user(AUTOFS_PROTO_VERSION, p);
-}
-
-/* Perform an expiry operation */
-static inline int autofs_expire_run(struct super_block *sb,
- struct autofs_sb_info *sbi,
- struct vfsmount *mnt,
- struct autofs_packet_expire __user *pkt_p)
-{
- struct autofs_dir_ent *ent;
- struct autofs_packet_expire pkt;
-
- memset(&pkt,0,sizeof pkt);
-
- pkt.hdr.proto_version = AUTOFS_PROTO_VERSION;
- pkt.hdr.type = autofs_ptype_expire;
-
- if (!sbi->exp_timeout || !(ent = autofs_expire(sb,sbi,mnt)))
- return -EAGAIN;
-
- pkt.len = ent->len;
- memcpy(pkt.name, ent->name, pkt.len);
- pkt.name[pkt.len] = '\0';
-
- if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)))
- return -EFAULT;
-
- return 0;
-}
-
-/*
- * ioctl()'s on the root directory is the chief method for the daemon to
- * generate kernel reactions
- */
-static int autofs_do_root_ioctl(struct inode *inode, struct file *filp,
- unsigned int cmd, unsigned long arg)
-{
- struct autofs_sb_info *sbi = autofs_sbi(inode->i_sb);
- void __user *argp = (void __user *)arg;
-
- DPRINTK(("autofs_ioctl: cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n",cmd,arg,sbi,task_pgrp_nr(current)));
-
- if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) ||
- _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT)
- return -ENOTTY;
-
- if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- switch(cmd) {
- case AUTOFS_IOC_READY: /* Wait queue: go ahead and retry */
- return autofs_wait_release(sbi,(autofs_wqt_t)arg,0);
- case AUTOFS_IOC_FAIL: /* Wait queue: fail with ENOENT */
- return autofs_wait_release(sbi,(autofs_wqt_t)arg,-ENOENT);
- case AUTOFS_IOC_CATATONIC: /* Enter catatonic mode (daemon shutdown) */
- autofs_catatonic_mode(sbi);
- return 0;
- case AUTOFS_IOC_PROTOVER: /* Get protocol version */
- return autofs_get_protover(argp);
-#ifdef CONFIG_COMPAT
- case AUTOFS_IOC_SETTIMEOUT32:
- return autofs_compat_get_set_timeout(sbi, argp);
-#endif
- case AUTOFS_IOC_SETTIMEOUT:
- return autofs_get_set_timeout(sbi, argp);
- case AUTOFS_IOC_EXPIRE:
- return autofs_expire_run(inode->i_sb, sbi, filp->f_path.mnt,
- argp);
- default:
- return -ENOSYS;
- }
-
-}
-
-static long autofs_root_ioctl(struct file *filp,
- unsigned int cmd, unsigned long arg)
-{
- int ret;
-
- lock_kernel();
- ret = autofs_do_root_ioctl(filp->f_path.dentry->d_inode,
- filp, cmd, arg);
- unlock_kernel();
-
- return ret;
-}
-
-#ifdef CONFIG_COMPAT
-static long autofs_root_compat_ioctl(struct file *filp,
- unsigned int cmd, unsigned long arg)
-{
- struct inode *inode = filp->f_path.dentry->d_inode;
- int ret;
-
- lock_kernel();
- if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL)
- ret = autofs_do_root_ioctl(inode, filp, cmd, arg);
- else
- ret = autofs_do_root_ioctl(inode, filp, cmd,
- (unsigned long)compat_ptr(arg));
- unlock_kernel();
-
- return ret;
-}
-#endif
diff --git a/fs/autofs/symlink.c b/fs/autofs/symlink.c
deleted file mode 100644
index 7ce9cb2c9ce2..000000000000
--- a/fs/autofs/symlink.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/* -*- linux-c -*- --------------------------------------------------------- *
- *
- * linux/fs/autofs/symlink.c
- *
- * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
-
-#include "autofs_i.h"
-
-/* Nothing to release.. */
-static void *autofs_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- char *s=((struct autofs_symlink *)dentry->d_inode->i_private)->data;
- nd_set_link(nd, s);
- return NULL;
-}
-
-const struct inode_operations autofs_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = autofs_follow_link
-};
diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
deleted file mode 100644
index be46805972f0..000000000000
--- a/fs/autofs/waitq.c
+++ /dev/null
@@ -1,205 +0,0 @@
-/* -*- linux-c -*- --------------------------------------------------------- *
- *
- * linux/fs/autofs/waitq.c
- *
- * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
-
-#include <linux/slab.h>
-#include <linux/time.h>
-#include <linux/signal.h>
-#include <linux/file.h>
-#include "autofs_i.h"
-
-/* We make this a static variable rather than a part of the superblock; it
- is better if we don't reassign numbers easily even across filesystems */
-static autofs_wqt_t autofs_next_wait_queue = 1;
-
-/* These are the signals we allow interrupting a pending mount */
-#define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGINT) | sigmask(SIGQUIT))
-
-void autofs_catatonic_mode(struct autofs_sb_info *sbi)
-{
- struct autofs_wait_queue *wq, *nwq;
-
- DPRINTK(("autofs: entering catatonic mode\n"));
-
- sbi->catatonic = 1;
- wq = sbi->queues;
- sbi->queues = NULL; /* Erase all wait queues */
- while ( wq ) {
- nwq = wq->next;
- wq->status = -ENOENT; /* Magic is gone - report failure */
- kfree(wq->name);
- wq->name = NULL;
- wake_up(&wq->queue);
- wq = nwq;
- }
- fput(sbi->pipe); /* Close the pipe */
- sbi->pipe = NULL;
- autofs_hash_dputall(&sbi->dirhash); /* Remove all dentry pointers */
-}
-
-static int autofs_write(struct file *file, const void *addr, int bytes)
-{
- unsigned long sigpipe, flags;
- mm_segment_t fs;
- const char *data = (const char *)addr;
- ssize_t wr = 0;
-
- /** WARNING: this is not safe for writing more than PIPE_BUF bytes! **/
-
- sigpipe = sigismember(&current->pending.signal, SIGPIPE);
-
- /* Save pointer to user space and point back to kernel space */
- fs = get_fs();
- set_fs(KERNEL_DS);
-
- while (bytes &&
- (wr = file->f_op->write(file,data,bytes,&file->f_pos)) > 0) {
- data += wr;
- bytes -= wr;
- }
-
- set_fs(fs);
-
- /* Keep the currently executing process from receiving a
- SIGPIPE unless it was already supposed to get one */
- if (wr == -EPIPE && !sigpipe) {
- spin_lock_irqsave(&current->sighand->siglock, flags);
- sigdelset(&current->pending.signal, SIGPIPE);
- recalc_sigpending();
- spin_unlock_irqrestore(&current->sighand->siglock, flags);
- }
-
- return (bytes > 0);
-}
-
-static void autofs_notify_daemon(struct autofs_sb_info *sbi, struct autofs_wait_queue *wq)
-{
- struct autofs_packet_missing pkt;
-
- DPRINTK(("autofs_wait: wait id = 0x%08lx, name = ", wq->wait_queue_token));
- autofs_say(wq->name,wq->len);
-
- memset(&pkt,0,sizeof pkt); /* For security reasons */
-
- pkt.hdr.proto_version = AUTOFS_PROTO_VERSION;
- pkt.hdr.type = autofs_ptype_missing;
- pkt.wait_queue_token = wq->wait_queue_token;
- pkt.len = wq->len;
- memcpy(pkt.name, wq->name, pkt.len);
- pkt.name[pkt.len] = '\0';
-
- if ( autofs_write(sbi->pipe,&pkt,sizeof(struct autofs_packet_missing)) )
- autofs_catatonic_mode(sbi);
-}
-
-int autofs_wait(struct autofs_sb_info *sbi, struct qstr *name)
-{
- struct autofs_wait_queue *wq;
- int status;
-
- /* In catatonic mode, we don't wait for nobody */
- if ( sbi->catatonic )
- return -ENOENT;
-
- /* We shouldn't be able to get here, but just in case */
- if ( name->len > NAME_MAX )
- return -ENOENT;
-
- for ( wq = sbi->queues ; wq ; wq = wq->next ) {
- if ( wq->hash == name->hash &&
- wq->len == name->len &&
- wq->name && !memcmp(wq->name,name->name,name->len) )
- break;
- }
-
- if ( !wq ) {
- /* Create a new wait queue */
- wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL);
- if ( !wq )
- return -ENOMEM;
-
- wq->name = kmalloc(name->len,GFP_KERNEL);
- if ( !wq->name ) {
- kfree(wq);
- return -ENOMEM;
- }
- wq->wait_queue_token = autofs_next_wait_queue++;
- init_waitqueue_head(&wq->queue);
- wq->hash = name->hash;
- wq->len = name->len;
- wq->status = -EINTR; /* Status return if interrupted */
- memcpy(wq->name, name->name, name->len);
- wq->next = sbi->queues;
- sbi->queues = wq;
-
- /* autofs_notify_daemon() may block */
- wq->wait_ctr = 2;
- autofs_notify_daemon(sbi,wq);
- } else
- wq->wait_ctr++;
-
- /* wq->name is NULL if and only if the lock is already released */
-
- if ( sbi->catatonic ) {
- /* We might have slept, so check again for catatonic mode */
- wq->status = -ENOENT;
- kfree(wq->name);
- wq->name = NULL;
- }
-
- if ( wq->name ) {
- /* Block all but "shutdown" signals while waiting */
- sigset_t sigmask;
-
- siginitsetinv(&sigmask, SHUTDOWN_SIGS);
- sigprocmask(SIG_BLOCK, &sigmask, &sigmask);
-
- interruptible_sleep_on(&wq->queue);
-
- sigprocmask(SIG_SETMASK, &sigmask, NULL);
- } else {
- DPRINTK(("autofs_wait: skipped sleeping\n"));
- }
-
- status = wq->status;
-
- if ( ! --wq->wait_ctr ) /* Are we the last process to need status? */
- kfree(wq);
-
- return status;
-}
-
-
-int autofs_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_token, int status)
-{
- struct autofs_wait_queue *wq, **wql;
-
- for (wql = &sbi->queues; (wq = *wql) != NULL; wql = &wq->next) {
- if ( wq->wait_queue_token == wait_queue_token )
- break;
- }
- if ( !wq )
- return -EINVAL;
-
- *wql = wq->next; /* Unlink from chain */
- kfree(wq->name);
- wq->name = NULL; /* Do not wait on this queue */
-
- wq->status = status;
-
- if ( ! --wq->wait_ctr ) /* Is anyone still waiting for this guy? */
- kfree(wq);
- else
- wake_up(&wq->queue);
-
- return 0;
-}
-
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index 9722e4bd8957..c038727b4050 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -14,16 +14,16 @@
#include <linux/init.h>
#include "autofs_i.h"
-static int autofs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *autofs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_nodev(fs_type, flags, data, autofs4_fill_super, mnt);
+ return mount_nodev(fs_type, flags, data, autofs4_fill_super);
}
static struct file_system_type autofs_fs_type = {
.owner = THIS_MODULE,
.name = "autofs",
- .get_sb = autofs_get_sb,
+ .mount = autofs_mount,
.kill_sb = autofs4_kill_sb,
};
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index dc39d2824885..aa4e7c7ae3c6 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -913,18 +913,17 @@ befs_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
-static int
-befs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name,
- void *data, struct vfsmount *mnt)
+static struct dentry *
+befs_mount(struct file_system_type *fs_type, int flags, const char *dev_name,
+ void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, befs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, befs_fill_super);
}
static struct file_system_type befs_fs_type = {
.owner = THIS_MODULE,
.name = "befs",
- .get_sb = befs_get_sb,
+ .mount = befs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 883e77acd5a8..76db6d7d49bb 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -450,16 +450,16 @@ out:
return ret;
}
-static int bfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *bfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, bfs_fill_super, mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, bfs_fill_super);
}
static struct file_system_type bfs_fs_type = {
.owner = THIS_MODULE,
.name = "bfs",
- .get_sb = bfs_get_sb,
+ .mount = bfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 29990f0eee0c..1befe2ec8186 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -706,10 +706,10 @@ static int bm_fill_super(struct super_block * sb, void * data, int silent)
return err;
}
-static int bm_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *bm_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_single(fs_type, flags, data, bm_fill_super, mnt);
+ return mount_single(fs_type, flags, data, bm_fill_super);
}
static struct linux_binfmt misc_format = {
@@ -720,7 +720,7 @@ static struct linux_binfmt misc_format = {
static struct file_system_type bm_fs_type = {
.owner = THIS_MODULE,
.name = "binfmt_misc",
- .get_sb = bm_get_sb,
+ .mount = bm_mount,
.kill_sb = kill_litter_super,
};
diff --git a/fs/bio.c b/fs/bio.c
index 8abb2dfb2e7c..4bd454fa844e 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -370,6 +370,9 @@ struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)
{
struct bio *bio;
+ if (nr_iovecs > UIO_MAXIOV)
+ return NULL;
+
bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec),
gfp_mask);
if (unlikely(!bio))
@@ -697,8 +700,12 @@ static void bio_free_map_data(struct bio_map_data *bmd)
static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
gfp_t gfp_mask)
{
- struct bio_map_data *bmd = kmalloc(sizeof(*bmd), gfp_mask);
+ struct bio_map_data *bmd;
+ if (iov_count > UIO_MAXIOV)
+ return NULL;
+
+ bmd = kmalloc(sizeof(*bmd), gfp_mask);
if (!bmd)
return NULL;
@@ -827,6 +834,12 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
start = uaddr >> PAGE_SHIFT;
+ /*
+ * Overflow, abort
+ */
+ if (end < start)
+ return ERR_PTR(-EINVAL);
+
nr_pages += end - start;
len += iov[i].iov_len;
}
@@ -955,6 +968,12 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
unsigned long start = uaddr >> PAGE_SHIFT;
+ /*
+ * Overflow, abort
+ */
+ if (end < start)
+ return ERR_PTR(-EINVAL);
+
nr_pages += end - start;
/*
* buffer must be aligned to at least hardsector size for now
@@ -982,7 +1001,7 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
unsigned long start = uaddr >> PAGE_SHIFT;
const int local_nr_pages = end - start;
const int page_limit = cur_page + local_nr_pages;
-
+
ret = get_user_pages_fast(uaddr, local_nr_pages,
write_to_vm, &pages[cur_page]);
if (ret < local_nr_pages) {
diff --git a/fs/block_dev.c b/fs/block_dev.c
index dea3b628a6ce..06e8ff12b97c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -464,15 +464,15 @@ static const struct super_operations bdev_sops = {
.evict_inode = bdev_evict_inode,
};
-static int bd_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *bd_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt);
+ return mount_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576);
}
static struct file_system_type bd_type = {
.name = "bdev",
- .get_sb = bd_get_sb,
+ .mount = bd_mount,
.kill_sb = kill_anon_super,
};
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 396039b3a8a2..7845d1f7d1d9 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -163,7 +163,6 @@ fail:
*/
static void end_compressed_bio_read(struct bio *bio, int err)
{
- struct extent_io_tree *tree;
struct compressed_bio *cb = bio->bi_private;
struct inode *inode;
struct page *page;
@@ -187,7 +186,6 @@ static void end_compressed_bio_read(struct bio *bio, int err)
/* ok, we're the last bio for this extent, lets start
* the decompression.
*/
- tree = &BTRFS_I(inode)->io_tree;
ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,
cb->start,
cb->orig_bio->bi_io_vec,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index c3df14ce2cc2..9ac171599258 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -200,7 +200,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
struct extent_buffer **cow_ret, u64 new_root_objectid)
{
struct extent_buffer *cow;
- u32 nritems;
int ret = 0;
int level;
struct btrfs_disk_key disk_key;
@@ -210,7 +209,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
WARN_ON(root->ref_cows && trans->transid != root->last_trans);
level = btrfs_header_level(buf);
- nritems = btrfs_header_nritems(buf);
if (level == 0)
btrfs_item_key(buf, &disk_key, 0);
else
@@ -1008,7 +1006,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
int wret;
int pslot;
int orig_slot = path->slots[level];
- int err_on_enospc = 0;
u64 orig_ptr;
if (level == 0)
@@ -1071,8 +1068,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
return 0;
- if (btrfs_header_nritems(mid) < 2)
- err_on_enospc = 1;
+ btrfs_header_nritems(mid);
left = read_node_slot(root, parent, pslot - 1);
if (left) {
@@ -1103,8 +1099,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
wret = push_node_left(trans, root, left, mid, 1);
if (wret < 0)
ret = wret;
- if (btrfs_header_nritems(mid) < 2)
- err_on_enospc = 1;
+ btrfs_header_nritems(mid);
}
/*
@@ -1224,14 +1219,12 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
int wret;
int pslot;
int orig_slot = path->slots[level];
- u64 orig_ptr;
if (level == 0)
return 1;
mid = path->nodes[level];
WARN_ON(btrfs_header_generation(mid) != trans->transid);
- orig_ptr = btrfs_node_blockptr(mid, orig_slot);
if (level < BTRFS_MAX_LEVEL - 1)
parent = path->nodes[level + 1];
@@ -1577,13 +1570,33 @@ read_block_for_search(struct btrfs_trans_handle *trans,
blocksize = btrfs_level_size(root, level - 1);
tmp = btrfs_find_tree_block(root, blocknr, blocksize);
- if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
- /*
- * we found an up to date block without sleeping, return
- * right away
- */
- *eb_ret = tmp;
- return 0;
+ if (tmp) {
+ if (btrfs_buffer_uptodate(tmp, 0)) {
+ if (btrfs_buffer_uptodate(tmp, gen)) {
+ /*
+ * we found an up to date block without
+ * sleeping, return
+ * right away
+ */
+ *eb_ret = tmp;
+ return 0;
+ }
+ /* the pages were up to date, but we failed
+ * the generation number check. Do a full
+ * read for the generation number that is correct.
+ * We must do this without dropping locks so
+ * we can trust our generation number
+ */
+ free_extent_buffer(tmp);
+ tmp = read_tree_block(root, blocknr, blocksize, gen);
+ if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
+ *eb_ret = tmp;
+ return 0;
+ }
+ free_extent_buffer(tmp);
+ btrfs_release_path(NULL, p);
+ return -EIO;
+ }
}
/*
@@ -1596,8 +1609,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
btrfs_unlock_up_safe(p, level + 1);
btrfs_set_path_blocking(p);
- if (tmp)
- free_extent_buffer(tmp);
+ free_extent_buffer(tmp);
if (p->reada)
reada_for_search(root, p, level, slot, key->objectid);
@@ -2548,7 +2560,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
{
struct btrfs_disk_key disk_key;
struct extent_buffer *right = path->nodes[0];
- int slot;
int i;
int push_space = 0;
int push_items = 0;
@@ -2560,8 +2571,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
u32 this_item_size;
u32 old_left_item_size;
- slot = path->slots[1];
-
if (empty)
nr = min(right_nritems, max_slot);
else
@@ -3330,7 +3339,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
{
int ret = 0;
int slot;
- int slot_orig;
struct extent_buffer *leaf;
struct btrfs_item *item;
u32 nritems;
@@ -3340,7 +3348,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
unsigned int size_diff;
int i;
- slot_orig = path->slots[0];
leaf = path->nodes[0];
slot = path->slots[0];
@@ -3445,7 +3452,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
{
int ret = 0;
int slot;
- int slot_orig;
struct extent_buffer *leaf;
struct btrfs_item *item;
u32 nritems;
@@ -3454,7 +3460,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
unsigned int old_size;
int i;
- slot_orig = path->slots[0];
leaf = path->nodes[0];
nritems = btrfs_header_nritems(leaf);
@@ -3787,7 +3792,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
struct btrfs_key *cpu_key, u32 *data_size,
int nr)
{
- struct extent_buffer *leaf;
int ret = 0;
int slot;
int i;
@@ -3804,7 +3808,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
if (ret < 0)
goto out;
- leaf = path->nodes[0];
slot = path->slots[0];
BUG_ON(slot < 0);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index eaf286abad17..8db9234f6b41 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -99,6 +99,9 @@ struct btrfs_ordered_sum;
*/
#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
+/* For storing free space cache */
+#define BTRFS_FREE_SPACE_OBJECTID -11ULL
+
/* dummy objectid represents multiple objectids */
#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
@@ -265,6 +268,22 @@ struct btrfs_chunk {
/* additional stripes go here */
} __attribute__ ((__packed__));
+#define BTRFS_FREE_SPACE_EXTENT 1
+#define BTRFS_FREE_SPACE_BITMAP 2
+
+struct btrfs_free_space_entry {
+ __le64 offset;
+ __le64 bytes;
+ u8 type;
+} __attribute__ ((__packed__));
+
+struct btrfs_free_space_header {
+ struct btrfs_disk_key location;
+ __le64 generation;
+ __le64 num_entries;
+ __le64 num_bitmaps;
+} __attribute__ ((__packed__));
+
static inline unsigned long btrfs_chunk_item_size(int num_stripes)
{
BUG_ON(num_stripes == 0);
@@ -365,8 +384,10 @@ struct btrfs_super_block {
char label[BTRFS_LABEL_SIZE];
+ __le64 cache_generation;
+
/* future expansion */
- __le64 reserved[32];
+ __le64 reserved[31];
u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
} __attribute__ ((__packed__));
@@ -375,13 +396,15 @@ struct btrfs_super_block {
* ones specified below then we will fail to mount
*/
#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
-#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (2ULL << 0)
+#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
+#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
-#define BTRFS_FEATURE_INCOMPAT_SUPP \
- (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
- BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)
+#define BTRFS_FEATURE_INCOMPAT_SUPP \
+ (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
+ BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
+ BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
/*
* A leaf is full of items. offset and size tell us where to find
@@ -675,7 +698,8 @@ struct btrfs_block_group_item {
struct btrfs_space_info {
u64 flags;
- u64 total_bytes; /* total bytes in the space */
+ u64 total_bytes; /* total bytes in the space,
+ this doesn't take mirrors into account */
u64 bytes_used; /* total bytes used,
this does't take mirrors into account */
u64 bytes_pinned; /* total bytes pinned, will be freed when the
@@ -687,6 +711,8 @@ struct btrfs_space_info {
u64 bytes_may_use; /* number of bytes that may be used for
delalloc/allocations */
u64 disk_used; /* total bytes used on disk */
+ u64 disk_total; /* total bytes on disk, takes mirrors into
+ account */
int full; /* indicates that we cannot allocate any more
chunks for this space */
@@ -750,6 +776,14 @@ enum btrfs_caching_type {
BTRFS_CACHE_FINISHED = 2,
};
+enum btrfs_disk_cache_state {
+ BTRFS_DC_WRITTEN = 0,
+ BTRFS_DC_ERROR = 1,
+ BTRFS_DC_CLEAR = 2,
+ BTRFS_DC_SETUP = 3,
+ BTRFS_DC_NEED_WRITE = 4,
+};
+
struct btrfs_caching_control {
struct list_head list;
struct mutex mutex;
@@ -763,6 +797,7 @@ struct btrfs_block_group_cache {
struct btrfs_key key;
struct btrfs_block_group_item item;
struct btrfs_fs_info *fs_info;
+ struct inode *inode;
spinlock_t lock;
u64 pinned;
u64 reserved;
@@ -773,8 +808,11 @@ struct btrfs_block_group_cache {
int extents_thresh;
int free_extents;
int total_bitmaps;
- int ro;
- int dirty;
+ int ro:1;
+ int dirty:1;
+ int iref:1;
+
+ int disk_cache_state;
/* cache tracking stuff */
int cached;
@@ -863,6 +901,7 @@ struct btrfs_fs_info {
struct btrfs_transaction *running_transaction;
wait_queue_head_t transaction_throttle;
wait_queue_head_t transaction_wait;
+ wait_queue_head_t transaction_blocked_wait;
wait_queue_head_t async_submit_wait;
struct btrfs_super_block super_copy;
@@ -949,6 +988,7 @@ struct btrfs_fs_info {
struct btrfs_workers endio_meta_workers;
struct btrfs_workers endio_meta_write_workers;
struct btrfs_workers endio_write_workers;
+ struct btrfs_workers endio_freespace_worker;
struct btrfs_workers submit_workers;
/*
* fixup workers take dirty pages that didn't properly go through
@@ -1192,6 +1232,9 @@ struct btrfs_root {
#define BTRFS_MOUNT_NOSSD (1 << 9)
#define BTRFS_MOUNT_DISCARD (1 << 10)
#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11)
+#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
+#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
+#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -1665,6 +1708,27 @@ static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
write_eb_member(eb, item, struct btrfs_dir_item, location, key);
}
+BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header,
+ num_entries, 64);
+BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header,
+ num_bitmaps, 64);
+BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header,
+ generation, 64);
+
+static inline void btrfs_free_space_key(struct extent_buffer *eb,
+ struct btrfs_free_space_header *h,
+ struct btrfs_disk_key *key)
+{
+ read_eb_member(eb, h, struct btrfs_free_space_header, location, key);
+}
+
+static inline void btrfs_set_free_space_key(struct extent_buffer *eb,
+ struct btrfs_free_space_header *h,
+ struct btrfs_disk_key *key)
+{
+ write_eb_member(eb, h, struct btrfs_free_space_header, location, key);
+}
+
/* struct btrfs_disk_key */
BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key,
objectid, 64);
@@ -1876,6 +1940,8 @@ BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block,
incompat_flags, 64);
BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
csum_type, 16);
+BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
+ cache_generation, 64);
static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
{
@@ -1988,6 +2054,12 @@ static inline struct dentry *fdentry(struct file *file)
return file->f_path.dentry;
}
+static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
+{
+ return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
+ (space_info->flags & BTRFS_BLOCK_GROUP_DATA));
+}
+
/* extent-tree.c */
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
@@ -2079,7 +2151,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- int num_items, int *retries);
+ int num_items);
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
@@ -2100,7 +2172,7 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
- u64 num_bytes, int *retries);
+ u64 num_bytes);
int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
@@ -2115,6 +2187,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
int btrfs_set_block_group_rw(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
+void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
@@ -2373,7 +2446,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
u32 min_type);
int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
-int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput);
+int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
+ int sync);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
struct extent_state **cached_state);
int btrfs_writepages(struct address_space *mapping,
@@ -2426,6 +2500,10 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root);
int btrfs_prealloc_file_range(struct inode *inode, int mode,
u64 start, u64 num_bytes, u64 min_size,
loff_t actual_len, u64 *alloc_hint);
+int btrfs_prealloc_file_range_trans(struct inode *inode,
+ struct btrfs_trans_handle *trans, int mode,
+ u64 start, u64 num_bytes, u64 min_size,
+ loff_t actual_len, u64 *alloc_hint);
extern const struct dentry_operations btrfs_dentry_operations;
/* ioctl.c */
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index e9103b3baa49..f0cad5ae5be7 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -427,5 +427,5 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
ret = btrfs_truncate_item(trans, root, path,
item_len - sub_item_len, 1);
}
- return 0;
+ return ret;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5e789f4a3ed0..fb827d0d7181 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -338,7 +338,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
struct extent_io_tree *tree;
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 found_start;
- int found_level;
unsigned long len;
struct extent_buffer *eb;
int ret;
@@ -369,8 +368,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
WARN_ON(1);
goto err;
}
- found_level = btrfs_header_level(eb);
-
csum_tree_block(root, eb, 0);
err:
free_extent_buffer(eb);
@@ -481,9 +478,12 @@ static void end_workqueue_bio(struct bio *bio, int err)
end_io_wq->work.flags = 0;
if (bio->bi_rw & REQ_WRITE) {
- if (end_io_wq->metadata)
+ if (end_io_wq->metadata == 1)
btrfs_queue_worker(&fs_info->endio_meta_write_workers,
&end_io_wq->work);
+ else if (end_io_wq->metadata == 2)
+ btrfs_queue_worker(&fs_info->endio_freespace_worker,
+ &end_io_wq->work);
else
btrfs_queue_worker(&fs_info->endio_write_workers,
&end_io_wq->work);
@@ -497,6 +497,13 @@ static void end_workqueue_bio(struct bio *bio, int err)
}
}
+/*
+ * For the metadata arg you want
+ *
+ * 0 - if data
+ * 1 - if normal metadta
+ * 2 - if writing to the free space cache area
+ */
int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
int metadata)
{
@@ -533,11 +540,9 @@ int btrfs_congested_async(struct btrfs_fs_info *info, int iodone)
static void run_one_async_start(struct btrfs_work *work)
{
- struct btrfs_fs_info *fs_info;
struct async_submit_bio *async;
async = container_of(work, struct async_submit_bio, work);
- fs_info = BTRFS_I(async->inode)->root->fs_info;
async->submit_bio_start(async->inode, async->rw, async->bio,
async->mirror_num, async->bio_flags,
async->bio_offset);
@@ -850,12 +855,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
u32 blocksize, u64 parent_transid)
{
struct extent_buffer *buf = NULL;
- struct inode *btree_inode = root->fs_info->btree_inode;
- struct extent_io_tree *io_tree;
int ret;
- io_tree = &BTRFS_I(btree_inode)->io_tree;
-
buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
if (!buf)
return NULL;
@@ -1377,7 +1378,6 @@ static int bio_ready_for_csum(struct bio *bio)
u64 start = 0;
struct page *page;
struct extent_io_tree *io_tree = NULL;
- struct btrfs_fs_info *info = NULL;
struct bio_vec *bvec;
int i;
int ret;
@@ -1396,7 +1396,6 @@ static int bio_ready_for_csum(struct bio *bio)
buf_len = page->private >> 2;
start = page_offset(page) + bvec->bv_offset;
io_tree = &BTRFS_I(page->mapping->host)->io_tree;
- info = BTRFS_I(page->mapping->host)->root->fs_info;
}
/* are we fully contained in this bio? */
if (buf_len <= length)
@@ -1680,12 +1679,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
init_waitqueue_head(&fs_info->transaction_throttle);
init_waitqueue_head(&fs_info->transaction_wait);
+ init_waitqueue_head(&fs_info->transaction_blocked_wait);
init_waitqueue_head(&fs_info->async_submit_wait);
__setup_root(4096, 4096, 4096, 4096, tree_root,
fs_info, BTRFS_ROOT_TREE_OBJECTID);
-
bh = btrfs_read_dev_super(fs_devices->latest_bdev);
if (!bh)
goto fail_iput;
@@ -1775,6 +1774,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
fs_info->thread_pool_size,
&fs_info->generic_worker);
+ btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write",
+ 1, &fs_info->generic_worker);
/*
* endios are largely parallel and should have a very
@@ -1795,6 +1796,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_start_workers(&fs_info->endio_meta_workers, 1);
btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
btrfs_start_workers(&fs_info->endio_write_workers, 1);
+ btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -1993,6 +1995,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
if (!(sb->s_flags & MS_RDONLY)) {
down_read(&fs_info->cleanup_work_sem);
btrfs_orphan_cleanup(fs_info->fs_root);
+ btrfs_orphan_cleanup(fs_info->tree_root);
up_read(&fs_info->cleanup_work_sem);
}
@@ -2035,6 +2038,7 @@ fail_sb_buffer:
btrfs_stop_workers(&fs_info->endio_meta_workers);
btrfs_stop_workers(&fs_info->endio_meta_write_workers);
btrfs_stop_workers(&fs_info->endio_write_workers);
+ btrfs_stop_workers(&fs_info->endio_freespace_worker);
btrfs_stop_workers(&fs_info->submit_workers);
fail_iput:
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
@@ -2410,6 +2414,7 @@ int close_ctree(struct btrfs_root *root)
fs_info->closing = 1;
smp_mb();
+ btrfs_put_block_group_cache(fs_info);
if (!(fs_info->sb->s_flags & MS_RDONLY)) {
ret = btrfs_commit_super(root);
if (ret)
@@ -2456,6 +2461,7 @@ int close_ctree(struct btrfs_root *root)
btrfs_stop_workers(&fs_info->endio_meta_workers);
btrfs_stop_workers(&fs_info->endio_meta_write_workers);
btrfs_stop_workers(&fs_info->endio_write_workers);
+ btrfs_stop_workers(&fs_info->endio_freespace_worker);
btrfs_stop_workers(&fs_info->submit_workers);
btrfs_close_devices(fs_info->fs_devices);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0b81ecdb101c..0c097f3aec41 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -242,6 +242,12 @@ get_caching_control(struct btrfs_block_group_cache *cache)
return NULL;
}
+ /* We're loading it the fast way, so we don't have a caching_ctl. */
+ if (!cache->caching_ctl) {
+ spin_unlock(&cache->lock);
+ return NULL;
+ }
+
ctl = cache->caching_ctl;
atomic_inc(&ctl->count);
spin_unlock(&cache->lock);
@@ -421,7 +427,9 @@ err:
return 0;
}
-static int cache_block_group(struct btrfs_block_group_cache *cache)
+static int cache_block_group(struct btrfs_block_group_cache *cache,
+ struct btrfs_trans_handle *trans,
+ int load_cache_only)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_caching_control *caching_ctl;
@@ -432,6 +440,36 @@ static int cache_block_group(struct btrfs_block_group_cache *cache)
if (cache->cached != BTRFS_CACHE_NO)
return 0;
+ /*
+ * We can't do the read from on-disk cache during a commit since we need
+ * to have the normal tree locking.
+ */
+ if (!trans->transaction->in_commit) {
+ spin_lock(&cache->lock);
+ if (cache->cached != BTRFS_CACHE_NO) {
+ spin_unlock(&cache->lock);
+ return 0;
+ }
+ cache->cached = BTRFS_CACHE_STARTED;
+ spin_unlock(&cache->lock);
+
+ ret = load_free_space_cache(fs_info, cache);
+
+ spin_lock(&cache->lock);
+ if (ret == 1) {
+ cache->cached = BTRFS_CACHE_FINISHED;
+ cache->last_byte_to_unpin = (u64)-1;
+ } else {
+ cache->cached = BTRFS_CACHE_NO;
+ }
+ spin_unlock(&cache->lock);
+ if (ret == 1)
+ return 0;
+ }
+
+ if (load_cache_only)
+ return 0;
+
caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
BUG_ON(!caching_ctl);
@@ -509,7 +547,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
rcu_read_lock();
list_for_each_entry_rcu(found, head, list) {
- if (found->flags == flags) {
+ if (found->flags & flags) {
rcu_read_unlock();
return found;
}
@@ -542,6 +580,15 @@ static u64 div_factor(u64 num, int factor)
return num;
}
+static u64 div_factor_fine(u64 num, int factor)
+{
+ if (factor == 100)
+ return num;
+ num *= factor;
+ do_div(num, 100);
+ return num;
+}
+
u64 btrfs_find_block_group(struct btrfs_root *root,
u64 search_start, u64 search_hint, int owner)
{
@@ -2687,6 +2734,109 @@ next_block_group(struct btrfs_root *root,
return cache;
}
+static int cache_save_setup(struct btrfs_block_group_cache *block_group,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_path *path)
+{
+ struct btrfs_root *root = block_group->fs_info->tree_root;
+ struct inode *inode = NULL;
+ u64 alloc_hint = 0;
+ int num_pages = 0;
+ int retries = 0;
+ int ret = 0;
+
+ /*
+ * If this block group is smaller than 100 megs don't bother caching the
+ * block group.
+ */
+ if (block_group->key.offset < (100 * 1024 * 1024)) {
+ spin_lock(&block_group->lock);
+ block_group->disk_cache_state = BTRFS_DC_WRITTEN;
+ spin_unlock(&block_group->lock);
+ return 0;
+ }
+
+again:
+ inode = lookup_free_space_inode(root, block_group, path);
+ if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
+ ret = PTR_ERR(inode);
+ btrfs_release_path(root, path);
+ goto out;
+ }
+
+ if (IS_ERR(inode)) {
+ BUG_ON(retries);
+ retries++;
+
+ if (block_group->ro)
+ goto out_free;
+
+ ret = create_free_space_inode(root, trans, block_group, path);
+ if (ret)
+ goto out_free;
+ goto again;
+ }
+
+ /*
+ * We want to set the generation to 0, that way if anything goes wrong
+ * from here on out we know not to trust this cache when we load up next
+ * time.
+ */
+ BTRFS_I(inode)->generation = 0;
+ ret = btrfs_update_inode(trans, root, inode);
+ WARN_ON(ret);
+
+ if (i_size_read(inode) > 0) {
+ ret = btrfs_truncate_free_space_cache(root, trans, path,
+ inode);
+ if (ret)
+ goto out_put;
+ }
+
+ spin_lock(&block_group->lock);
+ if (block_group->cached != BTRFS_CACHE_FINISHED) {
+ spin_unlock(&block_group->lock);
+ goto out_put;
+ }
+ spin_unlock(&block_group->lock);
+
+ num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024);
+ if (!num_pages)
+ num_pages = 1;
+
+ /*
+ * Just to make absolutely sure we have enough space, we're going to
+ * preallocate 12 pages worth of space for each block group. In
+ * practice we ought to use at most 8, but we need extra space so we can
+ * add our header and have a terminator between the extents and the
+ * bitmaps.
+ */
+ num_pages *= 16;
+ num_pages *= PAGE_CACHE_SIZE;
+
+ ret = btrfs_check_data_free_space(inode, num_pages);
+ if (ret)
+ goto out_put;
+
+ ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
+ num_pages, num_pages,
+ &alloc_hint);
+ btrfs_free_reserved_data_space(inode, num_pages);
+out_put:
+ iput(inode);
+out_free:
+ btrfs_release_path(root, path);
+out:
+ spin_lock(&block_group->lock);
+ if (ret)
+ block_group->disk_cache_state = BTRFS_DC_ERROR;
+ else
+ block_group->disk_cache_state = BTRFS_DC_SETUP;
+ spin_unlock(&block_group->lock);
+
+ return ret;
+}
+
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
@@ -2699,6 +2849,25 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
+again:
+ while (1) {
+ cache = btrfs_lookup_first_block_group(root->fs_info, last);
+ while (cache) {
+ if (cache->disk_cache_state == BTRFS_DC_CLEAR)
+ break;
+ cache = next_block_group(root, cache);
+ }
+ if (!cache) {
+ if (last == 0)
+ break;
+ last = 0;
+ continue;
+ }
+ err = cache_save_setup(cache, trans, path);
+ last = cache->key.objectid + cache->key.offset;
+ btrfs_put_block_group(cache);
+ }
+
while (1) {
if (last == 0) {
err = btrfs_run_delayed_refs(trans, root,
@@ -2708,6 +2877,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
cache = btrfs_lookup_first_block_group(root->fs_info, last);
while (cache) {
+ if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
+ btrfs_put_block_group(cache);
+ goto again;
+ }
+
if (cache->dirty)
break;
cache = next_block_group(root, cache);
@@ -2719,6 +2893,8 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
continue;
}
+ if (cache->disk_cache_state == BTRFS_DC_SETUP)
+ cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
cache->dirty = 0;
last = cache->key.objectid + cache->key.offset;
@@ -2727,6 +2903,52 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
btrfs_put_block_group(cache);
}
+ while (1) {
+ /*
+ * I don't think this is needed since we're just marking our
+ * preallocated extent as written, but just in case it can't
+ * hurt.
+ */
+ if (last == 0) {
+ err = btrfs_run_delayed_refs(trans, root,
+ (unsigned long)-1);
+ BUG_ON(err);
+ }
+
+ cache = btrfs_lookup_first_block_group(root->fs_info, last);
+ while (cache) {
+ /*
+ * Really this shouldn't happen, but it could if we
+ * couldn't write the entire preallocated extent and
+ * splitting the extent resulted in a new block.
+ */
+ if (cache->dirty) {
+ btrfs_put_block_group(cache);
+ goto again;
+ }
+ if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
+ break;
+ cache = next_block_group(root, cache);
+ }
+ if (!cache) {
+ if (last == 0)
+ break;
+ last = 0;
+ continue;
+ }
+
+ btrfs_write_out_cache(root, trans, cache, path);
+
+ /*
+ * If we didn't have an error then the cache state is still
+ * NEED_WRITE, so we can set it to WRITTEN.
+ */
+ if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
+ cache->disk_cache_state = BTRFS_DC_WRITTEN;
+ last = cache->key.objectid + cache->key.offset;
+ btrfs_put_block_group(cache);
+ }
+
btrfs_free_path(path);
return 0;
}
@@ -2762,6 +2984,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
if (found) {
spin_lock(&found->lock);
found->total_bytes += total_bytes;
+ found->disk_total += total_bytes * factor;
found->bytes_used += bytes_used;
found->disk_used += bytes_used * factor;
found->full = 0;
@@ -2781,6 +3004,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
BTRFS_BLOCK_GROUP_SYSTEM |
BTRFS_BLOCK_GROUP_METADATA);
found->total_bytes = total_bytes;
+ found->disk_total = total_bytes * factor;
found->bytes_used = bytes_used;
found->disk_used = bytes_used * factor;
found->bytes_pinned = 0;
@@ -2882,11 +3106,16 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
struct btrfs_space_info *data_sinfo;
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 used;
- int ret = 0, committed = 0;
+ int ret = 0, committed = 0, alloc_chunk = 1;
/* make sure bytes are sectorsize aligned */
bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
+ if (root == root->fs_info->tree_root) {
+ alloc_chunk = 0;
+ committed = 1;
+ }
+
data_sinfo = BTRFS_I(inode)->space_info;
if (!data_sinfo)
goto alloc;
@@ -2905,7 +3134,7 @@ again:
* if we don't have enough free bytes in this space then we need
* to alloc a new chunk.
*/
- if (!data_sinfo->full) {
+ if (!data_sinfo->full && alloc_chunk) {
u64 alloc_target;
data_sinfo->force_alloc = 1;
@@ -2997,10 +3226,11 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
rcu_read_unlock();
}
-static int should_alloc_chunk(struct btrfs_space_info *sinfo,
- u64 alloc_bytes)
+static int should_alloc_chunk(struct btrfs_root *root,
+ struct btrfs_space_info *sinfo, u64 alloc_bytes)
{
u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
+ u64 thresh;
if (sinfo->bytes_used + sinfo->bytes_reserved +
alloc_bytes + 256 * 1024 * 1024 < num_bytes)
@@ -3010,6 +3240,12 @@ static int should_alloc_chunk(struct btrfs_space_info *sinfo,
alloc_bytes < div_factor(num_bytes, 8))
return 0;
+ thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
+ thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
+
+ if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
+ return 0;
+
return 1;
}
@@ -3041,13 +3277,21 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
goto out;
}
- if (!force && !should_alloc_chunk(space_info, alloc_bytes)) {
+ if (!force && !should_alloc_chunk(extent_root, space_info,
+ alloc_bytes)) {
spin_unlock(&space_info->lock);
goto out;
}
spin_unlock(&space_info->lock);
/*
+ * If we have mixed data/metadata chunks we want to make sure we keep
+ * allocating mixed chunks instead of individual chunks.
+ */
+ if (btrfs_mixed_space_info(space_info))
+ flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
+
+ /*
* if we're doing a data chunk, go ahead and make sure that
* we keep a reasonable number of metadata chunks allocated in the
* FS as well.
@@ -3072,55 +3316,25 @@ out:
return ret;
}
-static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_space_info *sinfo, u64 num_bytes)
-{
- int ret;
- int end_trans = 0;
-
- if (sinfo->full)
- return 0;
-
- spin_lock(&sinfo->lock);
- ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
- spin_unlock(&sinfo->lock);
- if (!ret)
- return 0;
-
- if (!trans) {
- trans = btrfs_join_transaction(root, 1);
- BUG_ON(IS_ERR(trans));
- end_trans = 1;
- }
-
- ret = do_chunk_alloc(trans, root->fs_info->extent_root,
- num_bytes + 2 * 1024 * 1024,
- get_alloc_profile(root, sinfo->flags), 0);
-
- if (end_trans)
- btrfs_end_transaction(trans, root);
-
- return ret == 1 ? 1 : 0;
-}
-
/*
* shrink metadata reservation for delalloc
*/
static int shrink_delalloc(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 to_reclaim)
+ struct btrfs_root *root, u64 to_reclaim, int sync)
{
struct btrfs_block_rsv *block_rsv;
+ struct btrfs_space_info *space_info;
u64 reserved;
u64 max_reclaim;
u64 reclaimed = 0;
int pause = 1;
- int ret;
+ int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
block_rsv = &root->fs_info->delalloc_block_rsv;
- spin_lock(&block_rsv->lock);
- reserved = block_rsv->reserved;
- spin_unlock(&block_rsv->lock);
+ space_info = block_rsv->space_info;
+
+ smp_mb();
+ reserved = space_info->bytes_reserved;
if (reserved == 0)
return 0;
@@ -3128,104 +3342,169 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
max_reclaim = min(reserved, to_reclaim);
while (1) {
- ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0);
- if (!ret) {
- __set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(pause);
- pause <<= 1;
- if (pause > HZ / 10)
- pause = HZ / 10;
- } else {
- pause = 1;
- }
+ /* have the flusher threads jump in and do some IO */
+ smp_mb();
+ nr_pages = min_t(unsigned long, nr_pages,
+ root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
+ writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
- spin_lock(&block_rsv->lock);
- if (reserved > block_rsv->reserved)
- reclaimed = reserved - block_rsv->reserved;
- reserved = block_rsv->reserved;
- spin_unlock(&block_rsv->lock);
+ spin_lock(&space_info->lock);
+ if (reserved > space_info->bytes_reserved)
+ reclaimed += reserved - space_info->bytes_reserved;
+ reserved = space_info->bytes_reserved;
+ spin_unlock(&space_info->lock);
if (reserved == 0 || reclaimed >= max_reclaim)
break;
if (trans && trans->transaction->blocked)
return -EAGAIN;
+
+ __set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(pause);
+ pause <<= 1;
+ if (pause > HZ / 10)
+ pause = HZ / 10;
+
}
return reclaimed >= to_reclaim;
}
-static int should_retry_reserve(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- u64 num_bytes, int *retries)
+/*
+ * Retries tells us how many times we've called reserve_metadata_bytes. The
+ * idea is if this is the first call (retries == 0) then we will add to our
+ * reserved count if we can't make the allocation in order to hold our place
+ * while we go and try and free up space. That way for retries > 1 we don't try
+ * and add space, we just check to see if the amount of unused space is >= the
+ * total space, meaning that our reservation is valid.
+ *
+ * However if we don't intend to retry this reservation, pass -1 as retries so
+ * that it short circuits this logic.
+ */
+static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 orig_bytes, int flush)
{
struct btrfs_space_info *space_info = block_rsv->space_info;
- int ret;
+ u64 unused;
+ u64 num_bytes = orig_bytes;
+ int retries = 0;
+ int ret = 0;
+ bool reserved = false;
+ bool committed = false;
- if ((*retries) > 2)
- return -ENOSPC;
+again:
+ ret = -ENOSPC;
+ if (reserved)
+ num_bytes = 0;
- ret = maybe_allocate_chunk(trans, root, space_info, num_bytes);
- if (ret)
- return 1;
+ spin_lock(&space_info->lock);
+ unused = space_info->bytes_used + space_info->bytes_reserved +
+ space_info->bytes_pinned + space_info->bytes_readonly +
+ space_info->bytes_may_use;
- if (trans && trans->transaction->in_commit)
- return -ENOSPC;
+ /*
+ * The idea here is that we've not already over-reserved the block group
+ * then we can go ahead and save our reservation first and then start
+ * flushing if we need to. Otherwise if we've already overcommitted
+ * lets start flushing stuff first and then come back and try to make
+ * our reservation.
+ */
+ if (unused <= space_info->total_bytes) {
+ unused -= space_info->total_bytes;
+ if (unused >= num_bytes) {
+ if (!reserved)
+ space_info->bytes_reserved += orig_bytes;
+ ret = 0;
+ } else {
+ /*
+ * Ok set num_bytes to orig_bytes since we aren't
+ * overocmmitted, this way we only try and reclaim what
+ * we need.
+ */
+ num_bytes = orig_bytes;
+ }
+ } else {
+ /*
+ * Ok we're over committed, set num_bytes to the overcommitted
+ * amount plus the amount of bytes that we need for this
+ * reservation.
+ */
+ num_bytes = unused - space_info->total_bytes +
+ (orig_bytes * (retries + 1));
+ }
- ret = shrink_delalloc(trans, root, num_bytes);
- if (ret)
- return ret;
+ /*
+ * Couldn't make our reservation, save our place so while we're trying
+ * to reclaim space we can actually use it instead of somebody else
+ * stealing it from us.
+ */
+ if (ret && !reserved) {
+ space_info->bytes_reserved += orig_bytes;
+ reserved = true;
+ }
- spin_lock(&space_info->lock);
- if (space_info->bytes_pinned < num_bytes)
- ret = 1;
spin_unlock(&space_info->lock);
- if (ret)
- return -ENOSPC;
-
- (*retries)++;
- if (trans)
- return -EAGAIN;
+ if (!ret)
+ return 0;
- trans = btrfs_join_transaction(root, 1);
- BUG_ON(IS_ERR(trans));
- ret = btrfs_commit_transaction(trans, root);
- BUG_ON(ret);
+ if (!flush)
+ goto out;
- return 1;
-}
+ /*
+ * We do synchronous shrinking since we don't actually unreserve
+ * metadata until after the IO is completed.
+ */
+ ret = shrink_delalloc(trans, root, num_bytes, 1);
+ if (ret > 0)
+ return 0;
+ else if (ret < 0)
+ goto out;
-static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv,
- u64 num_bytes)
-{
- struct btrfs_space_info *space_info = block_rsv->space_info;
- u64 unused;
- int ret = -ENOSPC;
+ /*
+ * So if we were overcommitted it's possible that somebody else flushed
+ * out enough space and we simply didn't have enough space to reclaim,
+ * so go back around and try again.
+ */
+ if (retries < 2) {
+ retries++;
+ goto again;
+ }
spin_lock(&space_info->lock);
- unused = space_info->bytes_used + space_info->bytes_reserved +
- space_info->bytes_pinned + space_info->bytes_readonly;
+ /*
+ * Not enough space to be reclaimed, don't bother committing the
+ * transaction.
+ */
+ if (space_info->bytes_pinned < orig_bytes)
+ ret = -ENOSPC;
+ spin_unlock(&space_info->lock);
+ if (ret)
+ goto out;
- if (unused < space_info->total_bytes)
- unused = space_info->total_bytes - unused;
- else
- unused = 0;
+ ret = -EAGAIN;
+ if (trans || committed)
+ goto out;
- if (unused >= num_bytes) {
- if (block_rsv->priority >= 10) {
- space_info->bytes_reserved += num_bytes;
- ret = 0;
- } else {
- if ((unused + block_rsv->reserved) *
- block_rsv->priority >=
- (num_bytes + block_rsv->reserved) * 10) {
- space_info->bytes_reserved += num_bytes;
- ret = 0;
- }
- }
+ ret = -ENOSPC;
+ trans = btrfs_join_transaction(root, 1);
+ if (IS_ERR(trans))
+ goto out;
+ ret = btrfs_commit_transaction(trans, root);
+ if (!ret) {
+ trans = NULL;
+ committed = true;
+ goto again;
+ }
+
+out:
+ if (reserved) {
+ spin_lock(&space_info->lock);
+ space_info->bytes_reserved -= orig_bytes;
+ spin_unlock(&space_info->lock);
}
- spin_unlock(&space_info->lock);
return ret;
}
@@ -3327,18 +3606,14 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
{
struct btrfs_block_rsv *block_rsv;
struct btrfs_fs_info *fs_info = root->fs_info;
- u64 alloc_target;
block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
if (!block_rsv)
return NULL;
btrfs_init_block_rsv(block_rsv);
-
- alloc_target = btrfs_get_alloc_profile(root, 0);
block_rsv->space_info = __find_space_info(fs_info,
BTRFS_BLOCK_GROUP_METADATA);
-
return block_rsv;
}
@@ -3369,23 +3644,19 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
- u64 num_bytes, int *retries)
+ u64 num_bytes)
{
int ret;
if (num_bytes == 0)
return 0;
-again:
- ret = reserve_metadata_bytes(block_rsv, num_bytes);
+
+ ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1);
if (!ret) {
block_rsv_add_bytes(block_rsv, num_bytes, 1);
return 0;
}
- ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries);
- if (ret > 0)
- goto again;
-
return ret;
}
@@ -3420,7 +3691,8 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
return 0;
if (block_rsv->refill_used) {
- ret = reserve_metadata_bytes(block_rsv, num_bytes);
+ ret = reserve_metadata_bytes(trans, root, block_rsv,
+ num_bytes, 0);
if (!ret) {
block_rsv_add_bytes(block_rsv, num_bytes, 0);
return 0;
@@ -3499,6 +3771,8 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
spin_lock(&sinfo->lock);
+ if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
+ data_used = 0;
meta_used = sinfo->bytes_used;
spin_unlock(&sinfo->lock);
@@ -3526,7 +3800,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
block_rsv->size = num_bytes;
num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
- sinfo->bytes_reserved + sinfo->bytes_readonly;
+ sinfo->bytes_reserved + sinfo->bytes_readonly +
+ sinfo->bytes_may_use;
if (sinfo->total_bytes > num_bytes) {
num_bytes = sinfo->total_bytes - num_bytes;
@@ -3597,7 +3872,7 @@ static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- int num_items, int *retries)
+ int num_items)
{
u64 num_bytes;
int ret;
@@ -3607,7 +3882,7 @@ int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
num_bytes = calc_trans_metadata_size(root, num_items);
ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
- num_bytes, retries);
+ num_bytes);
if (!ret) {
trans->bytes_reserved += num_bytes;
trans->block_rsv = &root->fs_info->trans_block_rsv;
@@ -3681,14 +3956,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
u64 to_reserve;
int nr_extents;
- int retries = 0;
int ret;
if (btrfs_transaction_in_commit(root->fs_info))
schedule_timeout(1);
num_bytes = ALIGN(num_bytes, root->sectorsize);
-again:
+
spin_lock(&BTRFS_I(inode)->accounting_lock);
nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
if (nr_extents > BTRFS_I(inode)->reserved_extents) {
@@ -3698,18 +3972,14 @@ again:
nr_extents = 0;
to_reserve = 0;
}
+ spin_unlock(&BTRFS_I(inode)->accounting_lock);
to_reserve += calc_csum_metadata_size(inode, num_bytes);
- ret = reserve_metadata_bytes(block_rsv, to_reserve);
- if (ret) {
- spin_unlock(&BTRFS_I(inode)->accounting_lock);
- ret = should_retry_reserve(NULL, root, block_rsv, to_reserve,
- &retries);
- if (ret > 0)
- goto again;
+ ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
+ if (ret)
return ret;
- }
+ spin_lock(&BTRFS_I(inode)->accounting_lock);
BTRFS_I(inode)->reserved_extents += nr_extents;
atomic_inc(&BTRFS_I(inode)->outstanding_extents);
spin_unlock(&BTRFS_I(inode)->accounting_lock);
@@ -3717,7 +3987,7 @@ again:
block_rsv_add_bytes(block_rsv, to_reserve, 1);
if (block_rsv->size > 512 * 1024 * 1024)
- shrink_delalloc(NULL, root, to_reserve);
+ shrink_delalloc(NULL, root, to_reserve, 0);
return 0;
}
@@ -3776,12 +4046,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc)
{
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group_cache *cache = NULL;
struct btrfs_fs_info *info = root->fs_info;
- int factor;
u64 total = num_bytes;
u64 old_val;
u64 byte_in_group;
+ int factor;
/* block accounting for super block */
spin_lock(&info->delalloc_lock);
@@ -3803,11 +4073,25 @@ static int update_block_group(struct btrfs_trans_handle *trans,
factor = 2;
else
factor = 1;
+ /*
+ * If this block group has free space cache written out, we
+ * need to make sure to load it if we are removing space. This
+ * is because we need the unpinning stage to actually add the
+ * space back to the block group, otherwise we will leak space.
+ */
+ if (!alloc && cache->cached == BTRFS_CACHE_NO)
+ cache_block_group(cache, trans, 1);
+
byte_in_group = bytenr - cache->key.objectid;
WARN_ON(byte_in_group > cache->key.offset);
spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
+
+ if (btrfs_super_cache_generation(&info->super_copy) != 0 &&
+ cache->disk_cache_state < BTRFS_DC_CLEAR)
+ cache->disk_cache_state = BTRFS_DC_CLEAR;
+
cache->dirty = 1;
old_val = btrfs_block_group_used(&cache->item);
num_bytes = min(total, cache->key.offset - byte_in_group);
@@ -4554,6 +4838,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
bool found_uncached_bg = false;
bool failed_cluster_refill = false;
bool failed_alloc = false;
+ bool use_cluster = true;
u64 ideal_cache_percent = 0;
u64 ideal_cache_offset = 0;
@@ -4568,16 +4853,24 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
return -ENOSPC;
}
+ /*
+ * If the space info is for both data and metadata it means we have a
+ * small filesystem and we can't use the clustering stuff.
+ */
+ if (btrfs_mixed_space_info(space_info))
+ use_cluster = false;
+
if (orig_root->ref_cows || empty_size)
allowed_chunk_alloc = 1;
- if (data & BTRFS_BLOCK_GROUP_METADATA) {
+ if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
last_ptr = &root->fs_info->meta_alloc_cluster;
if (!btrfs_test_opt(root, SSD))
empty_cluster = 64 * 1024;
}
- if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
+ if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
+ btrfs_test_opt(root, SSD)) {
last_ptr = &root->fs_info->data_alloc_cluster;
}
@@ -4641,6 +4934,10 @@ have_block_group:
if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
u64 free_percent;
+ ret = cache_block_group(block_group, trans, 1);
+ if (block_group->cached == BTRFS_CACHE_FINISHED)
+ goto have_block_group;
+
free_percent = btrfs_block_group_used(&block_group->item);
free_percent *= 100;
free_percent = div64_u64(free_percent,
@@ -4661,7 +4958,7 @@ have_block_group:
if (loop > LOOP_CACHING_NOWAIT ||
(loop > LOOP_FIND_IDEAL &&
atomic_read(&space_info->caching_threads) < 2)) {
- ret = cache_block_group(block_group);
+ ret = cache_block_group(block_group, trans, 0);
BUG_ON(ret);
}
found_uncached_bg = true;
@@ -5218,7 +5515,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
u64 num_bytes = ins->offset;
block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
- cache_block_group(block_group);
+ cache_block_group(block_group, trans, 0);
caching_ctl = get_caching_control(block_group);
if (!caching_ctl) {
@@ -5308,7 +5605,8 @@ use_block_rsv(struct btrfs_trans_handle *trans,
block_rsv = get_block_rsv(trans, root);
if (block_rsv->size == 0) {
- ret = reserve_metadata_bytes(block_rsv, blocksize);
+ ret = reserve_metadata_bytes(trans, root, block_rsv,
+ blocksize, 0);
if (ret)
return ERR_PTR(ret);
return block_rsv;
@@ -5318,11 +5616,6 @@ use_block_rsv(struct btrfs_trans_handle *trans,
if (!ret)
return block_rsv;
- WARN_ON(1);
- printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
- block_rsv->size, block_rsv->reserved,
- block_rsv->freed[0], block_rsv->freed[1]);
-
return ERR_PTR(-ENOSPC);
}
@@ -5421,7 +5714,6 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
u64 generation;
u64 refs;
u64 flags;
- u64 last = 0;
u32 nritems;
u32 blocksize;
struct btrfs_key key;
@@ -5489,7 +5781,6 @@ reada:
generation);
if (ret)
break;
- last = bytenr + blocksize;
nread++;
}
wc->reada_slot = slot;
@@ -7813,6 +8104,40 @@ out:
return ret;
}
+void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
+{
+ struct btrfs_block_group_cache *block_group;
+ u64 last = 0;
+
+ while (1) {
+ struct inode *inode;
+
+ block_group = btrfs_lookup_first_block_group(info, last);
+ while (block_group) {
+ spin_lock(&block_group->lock);
+ if (block_group->iref)
+ break;
+ spin_unlock(&block_group->lock);
+ block_group = next_block_group(info->tree_root,
+ block_group);
+ }
+ if (!block_group) {
+ if (last == 0)
+ break;
+ last = 0;
+ continue;
+ }
+
+ inode = block_group->inode;
+ block_group->iref = 0;
+ block_group->inode = NULL;
+ spin_unlock(&block_group->lock);
+ iput(inode);
+ last = block_group->key.objectid + block_group->key.offset;
+ btrfs_put_block_group(block_group);
+ }
+}
+
int btrfs_free_block_groups(struct btrfs_fs_info *info)
{
struct btrfs_block_group_cache *block_group;
@@ -7896,6 +8221,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
struct btrfs_key key;
struct btrfs_key found_key;
struct extent_buffer *leaf;
+ int need_clear = 0;
+ u64 cache_gen;
root = info->extent_root;
key.objectid = 0;
@@ -7905,6 +8232,15 @@ int btrfs_read_block_groups(struct btrfs_root *root)
if (!path)
return -ENOMEM;
+ cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
+ if (cache_gen != 0 &&
+ btrfs_super_generation(&root->fs_info->super_copy) != cache_gen)
+ need_clear = 1;
+ if (btrfs_test_opt(root, CLEAR_CACHE))
+ need_clear = 1;
+ if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen)
+ printk(KERN_INFO "btrfs: disk space caching is enabled\n");
+
while (1) {
ret = find_first_block_group(root, path, &key);
if (ret > 0)
@@ -7927,6 +8263,9 @@ int btrfs_read_block_groups(struct btrfs_root *root)
INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
+ if (need_clear)
+ cache->disk_cache_state = BTRFS_DC_CLEAR;
+
/*
* we only want to have 32k of ram per block group for keeping
* track of free space, and if we pass 1/2 of that we want to
@@ -8031,6 +8370,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
cache->key.offset = size;
cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
cache->sectorsize = root->sectorsize;
+ cache->fs_info = root->fs_info;
/*
* we only want to have 32k of ram per block group for keeping track
@@ -8087,8 +8427,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_path *path;
struct btrfs_block_group_cache *block_group;
struct btrfs_free_cluster *cluster;
+ struct btrfs_root *tree_root = root->fs_info->tree_root;
struct btrfs_key key;
+ struct inode *inode;
int ret;
+ int factor;
root = root->fs_info->extent_root;
@@ -8097,6 +8440,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
BUG_ON(!block_group->ro);
memcpy(&key, &block_group->key, sizeof(key));
+ if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
+ BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10))
+ factor = 2;
+ else
+ factor = 1;
/* make sure this block group isn't part of an allocation cluster */
cluster = &root->fs_info->data_alloc_cluster;
@@ -8116,6 +8465,40 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
BUG_ON(!path);
+ inode = lookup_free_space_inode(root, block_group, path);
+ if (!IS_ERR(inode)) {
+ btrfs_orphan_add(trans, inode);
+ clear_nlink(inode);
+ /* One for the block groups ref */
+ spin_lock(&block_group->lock);
+ if (block_group->iref) {
+ block_group->iref = 0;
+ block_group->inode = NULL;
+ spin_unlock(&block_group->lock);
+ iput(inode);
+ } else {
+ spin_unlock(&block_group->lock);
+ }
+ /* One for our lookup ref */
+ iput(inode);
+ }
+
+ key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+ key.offset = block_group->key.objectid;
+ key.type = 0;
+
+ ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
+ if (ret < 0)
+ goto out;
+ if (ret > 0)
+ btrfs_release_path(tree_root, path);
+ if (ret == 0) {
+ ret = btrfs_del_item(trans, tree_root, path);
+ if (ret)
+ goto out;
+ btrfs_release_path(tree_root, path);
+ }
+
spin_lock(&root->fs_info->block_group_cache_lock);
rb_erase(&block_group->cache_node,
&root->fs_info->block_group_cache_tree);
@@ -8137,8 +8520,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
spin_lock(&block_group->space_info->lock);
block_group->space_info->total_bytes -= block_group->key.offset;
block_group->space_info->bytes_readonly -= block_group->key.offset;
+ block_group->space_info->disk_total -= block_group->key.offset * factor;
spin_unlock(&block_group->space_info->lock);
+ memcpy(&key, &block_group->key, sizeof(key));
+
btrfs_clear_space_info_full(root->fs_info);
btrfs_put_block_group(block_group);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d74e6af9b53a..eac10e3260a9 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -104,7 +104,7 @@ void extent_io_tree_init(struct extent_io_tree *tree,
struct address_space *mapping, gfp_t mask)
{
tree->state = RB_ROOT;
- tree->buffer = RB_ROOT;
+ INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
tree->ops = NULL;
tree->dirty_bytes = 0;
spin_lock_init(&tree->lock);
@@ -235,50 +235,6 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree,
return ret;
}
-static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
- u64 offset, struct rb_node *node)
-{
- struct rb_root *root = &tree->buffer;
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent = NULL;
- struct extent_buffer *eb;
-
- while (*p) {
- parent = *p;
- eb = rb_entry(parent, struct extent_buffer, rb_node);
-
- if (offset < eb->start)
- p = &(*p)->rb_left;
- else if (offset > eb->start)
- p = &(*p)->rb_right;
- else
- return eb;
- }
-
- rb_link_node(node, parent, p);
- rb_insert_color(node, root);
- return NULL;
-}
-
-static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
- u64 offset)
-{
- struct rb_root *root = &tree->buffer;
- struct rb_node *n = root->rb_node;
- struct extent_buffer *eb;
-
- while (n) {
- eb = rb_entry(n, struct extent_buffer, rb_node);
- if (offset < eb->start)
- n = n->rb_left;
- else if (offset > eb->start)
- n = n->rb_right;
- else
- return eb;
- }
- return NULL;
-}
-
static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
struct extent_state *other)
{
@@ -1901,10 +1857,8 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
struct page *page = bvec->bv_page;
struct extent_io_tree *tree = bio->bi_private;
u64 start;
- u64 end;
start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
- end = start + bvec->bv_len - 1;
bio->bi_private = NULL;
@@ -2204,7 +2158,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
u64 last_byte = i_size_read(inode);
u64 block_start;
u64 iosize;
- u64 unlock_start;
sector_t sector;
struct extent_state *cached_state = NULL;
struct extent_map *em;
@@ -2329,7 +2282,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
if (tree->ops && tree->ops->writepage_end_io_hook)
tree->ops->writepage_end_io_hook(page, start,
page_end, NULL, 1);
- unlock_start = page_end + 1;
goto done;
}
@@ -2340,7 +2292,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
if (tree->ops && tree->ops->writepage_end_io_hook)
tree->ops->writepage_end_io_hook(page, cur,
page_end, NULL, 1);
- unlock_start = page_end + 1;
break;
}
em = epd->get_extent(inode, page, pg_offset, cur,
@@ -2387,7 +2338,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
cur += iosize;
pg_offset += iosize;
- unlock_start = cur;
continue;
}
/* leave this out until we have a page_mkwrite call */
@@ -2473,7 +2423,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
pgoff_t index;
pgoff_t end; /* Inclusive */
int scanned = 0;
- int range_whole = 0;
pagevec_init(&pvec, 0);
if (wbc->range_cyclic) {
@@ -2482,8 +2431,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
} else {
index = wbc->range_start >> PAGE_CACHE_SHIFT;
end = wbc->range_end >> PAGE_CACHE_SHIFT;
- if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
- range_whole = 1;
scanned = 1;
}
retry:
@@ -2823,6 +2770,8 @@ int extent_prepare_write(struct extent_io_tree *tree,
NULL, 1,
end_bio_extent_preparewrite, 0,
0, 0);
+ if (ret && !err)
+ err = ret;
iocount++;
block_start = block_start + iosize;
} else {
@@ -3104,6 +3053,39 @@ static void __free_extent_buffer(struct extent_buffer *eb)
kmem_cache_free(extent_buffer_cache, eb);
}
+/*
+ * Helper for releasing extent buffer page.
+ */
+static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
+ unsigned long start_idx)
+{
+ unsigned long index;
+ struct page *page;
+
+ if (!eb->first_page)
+ return;
+
+ index = num_extent_pages(eb->start, eb->len);
+ if (start_idx >= index)
+ return;
+
+ do {
+ index--;
+ page = extent_buffer_page(eb, index);
+ if (page)
+ page_cache_release(page);
+ } while (index != start_idx);
+}
+
+/*
+ * Helper for releasing the extent buffer.
+ */
+static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
+{
+ btrfs_release_extent_buffer_page(eb, 0);
+ __free_extent_buffer(eb);
+}
+
struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
u64 start, unsigned long len,
struct page *page0,
@@ -3117,16 +3099,16 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
struct page *p;
struct address_space *mapping = tree->mapping;
int uptodate = 1;
+ int ret;
- spin_lock(&tree->buffer_lock);
- eb = buffer_search(tree, start);
- if (eb) {
- atomic_inc(&eb->refs);
- spin_unlock(&tree->buffer_lock);
+ rcu_read_lock();
+ eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
+ if (eb && atomic_inc_not_zero(&eb->refs)) {
+ rcu_read_unlock();
mark_page_accessed(eb->first_page);
return eb;
}
- spin_unlock(&tree->buffer_lock);
+ rcu_read_unlock();
eb = __alloc_extent_buffer(tree, start, len, mask);
if (!eb)
@@ -3165,26 +3147,31 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
if (uptodate)
set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+ ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
+ if (ret)
+ goto free_eb;
+
spin_lock(&tree->buffer_lock);
- exists = buffer_tree_insert(tree, start, &eb->rb_node);
- if (exists) {
+ ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
+ if (ret == -EEXIST) {
+ exists = radix_tree_lookup(&tree->buffer,
+ start >> PAGE_CACHE_SHIFT);
/* add one reference for the caller */
atomic_inc(&exists->refs);
spin_unlock(&tree->buffer_lock);
+ radix_tree_preload_end();
goto free_eb;
}
/* add one reference for the tree */
atomic_inc(&eb->refs);
spin_unlock(&tree->buffer_lock);
+ radix_tree_preload_end();
return eb;
free_eb:
if (!atomic_dec_and_test(&eb->refs))
return exists;
- for (index = 1; index < i; index++)
- page_cache_release(extent_buffer_page(eb, index));
- page_cache_release(extent_buffer_page(eb, 0));
- __free_extent_buffer(eb);
+ btrfs_release_extent_buffer(eb);
return exists;
}
@@ -3194,16 +3181,16 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
{
struct extent_buffer *eb;
- spin_lock(&tree->buffer_lock);
- eb = buffer_search(tree, start);
- if (eb)
- atomic_inc(&eb->refs);
- spin_unlock(&tree->buffer_lock);
-
- if (eb)
+ rcu_read_lock();
+ eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
+ if (eb && atomic_inc_not_zero(&eb->refs)) {
+ rcu_read_unlock();
mark_page_accessed(eb->first_page);
+ return eb;
+ }
+ rcu_read_unlock();
- return eb;
+ return NULL;
}
void free_extent_buffer(struct extent_buffer *eb)
@@ -3833,34 +3820,45 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
}
}
+static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
+{
+ struct extent_buffer *eb =
+ container_of(head, struct extent_buffer, rcu_head);
+
+ btrfs_release_extent_buffer(eb);
+}
+
int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
{
u64 start = page_offset(page);
struct extent_buffer *eb;
int ret = 1;
- unsigned long i;
- unsigned long num_pages;
spin_lock(&tree->buffer_lock);
- eb = buffer_search(tree, start);
+ eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
if (!eb)
goto out;
- if (atomic_read(&eb->refs) > 1) {
+ if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
ret = 0;
goto out;
}
- if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
+
+ /*
+ * set @eb->refs to 0 if it is already 1, and then release the @eb.
+ * Or go back.
+ */
+ if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) {
ret = 0;
goto out;
}
- /* at this point we can safely release the extent buffer */
- num_pages = num_extent_pages(eb->start, eb->len);
- for (i = 0; i < num_pages; i++)
- page_cache_release(extent_buffer_page(eb, i));
- rb_erase(&eb->rb_node, &tree->buffer);
- __free_extent_buffer(eb);
+
+ radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT);
out:
spin_unlock(&tree->buffer_lock);
+
+ /* at this point we can safely release the extent buffer */
+ if (atomic_read(&eb->refs) == 0)
+ call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
return ret;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5691c7b590da..1c6d4f342ef7 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -85,7 +85,7 @@ struct extent_io_ops {
struct extent_io_tree {
struct rb_root state;
- struct rb_root buffer;
+ struct radix_tree_root buffer;
struct address_space *mapping;
u64 dirty_bytes;
spinlock_t lock;
@@ -123,7 +123,7 @@ struct extent_buffer {
unsigned long bflags;
atomic_t refs;
struct list_head leak_list;
- struct rb_node rb_node;
+ struct rcu_head rcu_head;
/* the spinlock is used to protect most operations */
spinlock_t lock;
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 454ca52d6451..23cb8da3ff66 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -335,7 +335,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
goto out;
}
if (IS_ERR(rb_node)) {
- em = ERR_PTR(PTR_ERR(rb_node));
+ em = ERR_CAST(rb_node);
goto out;
}
em = rb_entry(rb_node, struct extent_map, rb_node);
@@ -384,7 +384,7 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
goto out;
}
if (IS_ERR(rb_node)) {
- em = ERR_PTR(PTR_ERR(rb_node));
+ em = ERR_CAST(rb_node);
goto out;
}
em = rb_entry(rb_node, struct extent_map, rb_node);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index f488fac04d99..22ee0dc2e6b8 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -23,10 +23,761 @@
#include "ctree.h"
#include "free-space-cache.h"
#include "transaction.h"
+#include "disk-io.h"
#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
+static void recalculate_thresholds(struct btrfs_block_group_cache
+ *block_group);
+static int link_free_space(struct btrfs_block_group_cache *block_group,
+ struct btrfs_free_space *info);
+
+struct inode *lookup_free_space_inode(struct btrfs_root *root,
+ struct btrfs_block_group_cache
+ *block_group, struct btrfs_path *path)
+{
+ struct btrfs_key key;
+ struct btrfs_key location;
+ struct btrfs_disk_key disk_key;
+ struct btrfs_free_space_header *header;
+ struct extent_buffer *leaf;
+ struct inode *inode = NULL;
+ int ret;
+
+ spin_lock(&block_group->lock);
+ if (block_group->inode)
+ inode = igrab(block_group->inode);
+ spin_unlock(&block_group->lock);
+ if (inode)
+ return inode;
+
+ key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+ key.offset = block_group->key.objectid;
+ key.type = 0;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ if (ret > 0) {
+ btrfs_release_path(root, path);
+ return ERR_PTR(-ENOENT);
+ }
+
+ leaf = path->nodes[0];
+ header = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_free_space_header);
+ btrfs_free_space_key(leaf, header, &disk_key);
+ btrfs_disk_key_to_cpu(&location, &disk_key);
+ btrfs_release_path(root, path);
+
+ inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
+ if (!inode)
+ return ERR_PTR(-ENOENT);
+ if (IS_ERR(inode))
+ return inode;
+ if (is_bad_inode(inode)) {
+ iput(inode);
+ return ERR_PTR(-ENOENT);
+ }
+
+ spin_lock(&block_group->lock);
+ if (!root->fs_info->closing) {
+ block_group->inode = igrab(inode);
+ block_group->iref = 1;
+ }
+ spin_unlock(&block_group->lock);
+
+ return inode;
+}
+
+int create_free_space_inode(struct btrfs_root *root,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path)
+{
+ struct btrfs_key key;
+ struct btrfs_disk_key disk_key;
+ struct btrfs_free_space_header *header;
+ struct btrfs_inode_item *inode_item;
+ struct extent_buffer *leaf;
+ u64 objectid;
+ int ret;
+
+ ret = btrfs_find_free_objectid(trans, root, 0, &objectid);
+ if (ret < 0)
+ return ret;
+
+ ret = btrfs_insert_empty_inode(trans, root, path, objectid);
+ if (ret)
+ return ret;
+
+ leaf = path->nodes[0];
+ inode_item = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_inode_item);
+ btrfs_item_key(leaf, &disk_key, path->slots[0]);
+ memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
+ sizeof(*inode_item));
+ btrfs_set_inode_generation(leaf, inode_item, trans->transid);
+ btrfs_set_inode_size(leaf, inode_item, 0);
+ btrfs_set_inode_nbytes(leaf, inode_item, 0);
+ btrfs_set_inode_uid(leaf, inode_item, 0);
+ btrfs_set_inode_gid(leaf, inode_item, 0);
+ btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
+ btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
+ BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
+ btrfs_set_inode_nlink(leaf, inode_item, 1);
+ btrfs_set_inode_transid(leaf, inode_item, trans->transid);
+ btrfs_set_inode_block_group(leaf, inode_item,
+ block_group->key.objectid);
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(root, path);
+
+ key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+ key.offset = block_group->key.objectid;
+ key.type = 0;
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key,
+ sizeof(struct btrfs_free_space_header));
+ if (ret < 0) {
+ btrfs_release_path(root, path);
+ return ret;
+ }
+ leaf = path->nodes[0];
+ header = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_free_space_header);
+ memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
+ btrfs_set_free_space_key(leaf, header, &disk_key);
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(root, path);
+
+ return 0;
+}
+
+int btrfs_truncate_free_space_cache(struct btrfs_root *root,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
+ struct inode *inode)
+{
+ loff_t oldsize;
+ int ret = 0;
+
+ trans->block_rsv = root->orphan_block_rsv;
+ ret = btrfs_block_rsv_check(trans, root,
+ root->orphan_block_rsv,
+ 0, 5);
+ if (ret)
+ return ret;
+
+ oldsize = i_size_read(inode);
+ btrfs_i_size_write(inode, 0);
+ truncate_pagecache(inode, oldsize, 0);
+
+ /*
+ * We don't need an orphan item because truncating the free space cache
+ * will never be split across transactions.
+ */
+ ret = btrfs_truncate_inode_items(trans, root, inode,
+ 0, BTRFS_EXTENT_DATA_KEY);
+ if (ret) {
+ WARN_ON(1);
+ return ret;
+ }
+
+ return btrfs_update_inode(trans, root, inode);
+}
+
+static int readahead_cache(struct inode *inode)
+{
+ struct file_ra_state *ra;
+ unsigned long last_index;
+
+ ra = kzalloc(sizeof(*ra), GFP_NOFS);
+ if (!ra)
+ return -ENOMEM;
+
+ file_ra_state_init(ra, inode->i_mapping);
+ last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+
+ page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
+
+ kfree(ra);
+
+ return 0;
+}
+
+int load_free_space_cache(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group)
+{
+ struct btrfs_root *root = fs_info->tree_root;
+ struct inode *inode;
+ struct btrfs_free_space_header *header;
+ struct extent_buffer *leaf;
+ struct page *page;
+ struct btrfs_path *path;
+ u32 *checksums = NULL, *crc;
+ char *disk_crcs = NULL;
+ struct btrfs_key key;
+ struct list_head bitmaps;
+ u64 num_entries;
+ u64 num_bitmaps;
+ u64 generation;
+ u32 cur_crc = ~(u32)0;
+ pgoff_t index = 0;
+ unsigned long first_page_offset;
+ int num_checksums;
+ int ret = 0;
+
+ /*
+ * If we're unmounting then just return, since this does a search on the
+ * normal root and not the commit root and we could deadlock.
+ */
+ smp_mb();
+ if (fs_info->closing)
+ return 0;
+
+ /*
+ * If this block group has been marked to be cleared for one reason or
+ * another then we can't trust the on disk cache, so just return.
+ */
+ spin_lock(&block_group->lock);
+ if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
+ spin_unlock(&block_group->lock);
+ return 0;
+ }
+ spin_unlock(&block_group->lock);
+
+ INIT_LIST_HEAD(&bitmaps);
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return 0;
+
+ inode = lookup_free_space_inode(root, block_group, path);
+ if (IS_ERR(inode)) {
+ btrfs_free_path(path);
+ return 0;
+ }
+
+ /* Nothing in the space cache, goodbye */
+ if (!i_size_read(inode)) {
+ btrfs_free_path(path);
+ goto out;
+ }
+
+ key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+ key.offset = block_group->key.objectid;
+ key.type = 0;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret) {
+ btrfs_free_path(path);
+ goto out;
+ }
+
+ leaf = path->nodes[0];
+ header = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_free_space_header);
+ num_entries = btrfs_free_space_entries(leaf, header);
+ num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
+ generation = btrfs_free_space_generation(leaf, header);
+ btrfs_free_path(path);
+
+ if (BTRFS_I(inode)->generation != generation) {
+ printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
+ " not match free space cache generation (%llu) for "
+ "block group %llu\n",
+ (unsigned long long)BTRFS_I(inode)->generation,
+ (unsigned long long)generation,
+ (unsigned long long)block_group->key.objectid);
+ goto out;
+ }
+
+ if (!num_entries)
+ goto out;
+
+ /* Setup everything for doing checksumming */
+ num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
+ checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
+ if (!checksums)
+ goto out;
+ first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
+ disk_crcs = kzalloc(first_page_offset, GFP_NOFS);
+ if (!disk_crcs)
+ goto out;
+
+ ret = readahead_cache(inode);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+
+ while (1) {
+ struct btrfs_free_space_entry *entry;
+ struct btrfs_free_space *e;
+ void *addr;
+ unsigned long offset = 0;
+ unsigned long start_offset = 0;
+ int need_loop = 0;
+
+ if (!num_entries && !num_bitmaps)
+ break;
+
+ if (index == 0) {
+ start_offset = first_page_offset;
+ offset = start_offset;
+ }
+
+ page = grab_cache_page(inode->i_mapping, index);
+ if (!page) {
+ ret = 0;
+ goto free_cache;
+ }
+
+ if (!PageUptodate(page)) {
+ btrfs_readpage(NULL, page);
+ lock_page(page);
+ if (!PageUptodate(page)) {
+ unlock_page(page);
+ page_cache_release(page);
+ printk(KERN_ERR "btrfs: error reading free "
+ "space cache: %llu\n",
+ (unsigned long long)
+ block_group->key.objectid);
+ goto free_cache;
+ }
+ }
+ addr = kmap(page);
+
+ if (index == 0) {
+ u64 *gen;
+
+ memcpy(disk_crcs, addr, first_page_offset);
+ gen = addr + (sizeof(u32) * num_checksums);
+ if (*gen != BTRFS_I(inode)->generation) {
+ printk(KERN_ERR "btrfs: space cache generation"
+ " (%llu) does not match inode (%llu) "
+ "for block group %llu\n",
+ (unsigned long long)*gen,
+ (unsigned long long)
+ BTRFS_I(inode)->generation,
+ (unsigned long long)
+ block_group->key.objectid);
+ kunmap(page);
+ unlock_page(page);
+ page_cache_release(page);
+ goto free_cache;
+ }
+ crc = (u32 *)disk_crcs;
+ }
+ entry = addr + start_offset;
+
+ /* First lets check our crc before we do anything fun */
+ cur_crc = ~(u32)0;
+ cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc,
+ PAGE_CACHE_SIZE - start_offset);
+ btrfs_csum_final(cur_crc, (char *)&cur_crc);
+ if (cur_crc != *crc) {
+ printk(KERN_ERR "btrfs: crc mismatch for page %lu in "
+ "block group %llu\n", index,
+ (unsigned long long)block_group->key.objectid);
+ kunmap(page);
+ unlock_page(page);
+ page_cache_release(page);
+ goto free_cache;
+ }
+ crc++;
+
+ while (1) {
+ if (!num_entries)
+ break;
+
+ need_loop = 1;
+ e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
+ if (!e) {
+ kunmap(page);
+ unlock_page(page);
+ page_cache_release(page);
+ goto free_cache;
+ }
+
+ e->offset = le64_to_cpu(entry->offset);
+ e->bytes = le64_to_cpu(entry->bytes);
+ if (!e->bytes) {
+ kunmap(page);
+ kfree(e);
+ unlock_page(page);
+ page_cache_release(page);
+ goto free_cache;
+ }
+
+ if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
+ spin_lock(&block_group->tree_lock);
+ ret = link_free_space(block_group, e);
+ spin_unlock(&block_group->tree_lock);
+ BUG_ON(ret);
+ } else {
+ e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+ if (!e->bitmap) {
+ kunmap(page);
+ kfree(e);
+ unlock_page(page);
+ page_cache_release(page);
+ goto free_cache;
+ }
+ spin_lock(&block_group->tree_lock);
+ ret = link_free_space(block_group, e);
+ block_group->total_bitmaps++;
+ recalculate_thresholds(block_group);
+ spin_unlock(&block_group->tree_lock);
+ list_add_tail(&e->list, &bitmaps);
+ }
+
+ num_entries--;
+ offset += sizeof(struct btrfs_free_space_entry);
+ if (offset + sizeof(struct btrfs_free_space_entry) >=
+ PAGE_CACHE_SIZE)
+ break;
+ entry++;
+ }
+
+ /*
+ * We read an entry out of this page, we need to move on to the
+ * next page.
+ */
+ if (need_loop) {
+ kunmap(page);
+ goto next;
+ }
+
+ /*
+ * We add the bitmaps at the end of the entries in order that
+ * the bitmap entries are added to the cache.
+ */
+ e = list_entry(bitmaps.next, struct btrfs_free_space, list);
+ list_del_init(&e->list);
+ memcpy(e->bitmap, addr, PAGE_CACHE_SIZE);
+ kunmap(page);
+ num_bitmaps--;
+next:
+ unlock_page(page);
+ page_cache_release(page);
+ index++;
+ }
+
+ ret = 1;
+out:
+ kfree(checksums);
+ kfree(disk_crcs);
+ iput(inode);
+ return ret;
+
+free_cache:
+ /* This cache is bogus, make sure it gets cleared */
+ spin_lock(&block_group->lock);
+ block_group->disk_cache_state = BTRFS_DC_CLEAR;
+ spin_unlock(&block_group->lock);
+ btrfs_remove_free_space_cache(block_group);
+ goto out;
+}
+
+int btrfs_write_out_cache(struct btrfs_root *root,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path)
+{
+ struct btrfs_free_space_header *header;
+ struct extent_buffer *leaf;
+ struct inode *inode;
+ struct rb_node *node;
+ struct list_head *pos, *n;
+ struct page *page;
+ struct extent_state *cached_state = NULL;
+ struct list_head bitmap_list;
+ struct btrfs_key key;
+ u64 bytes = 0;
+ u32 *crc, *checksums;
+ pgoff_t index = 0, last_index = 0;
+ unsigned long first_page_offset;
+ int num_checksums;
+ int entries = 0;
+ int bitmaps = 0;
+ int ret = 0;
+
+ root = root->fs_info->tree_root;
+
+ INIT_LIST_HEAD(&bitmap_list);
+
+ spin_lock(&block_group->lock);
+ if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
+ spin_unlock(&block_group->lock);
+ return 0;
+ }
+ spin_unlock(&block_group->lock);
+
+ inode = lookup_free_space_inode(root, block_group, path);
+ if (IS_ERR(inode))
+ return 0;
+
+ if (!i_size_read(inode)) {
+ iput(inode);
+ return 0;
+ }
+
+ last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+ filemap_write_and_wait(inode->i_mapping);
+ btrfs_wait_ordered_range(inode, inode->i_size &
+ ~(root->sectorsize - 1), (u64)-1);
+
+ /* We need a checksum per page. */
+ num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
+ crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
+ if (!crc) {
+ iput(inode);
+ return 0;
+ }
+
+ /* Since the first page has all of our checksums and our generation we
+ * need to calculate the offset into the page that we can start writing
+ * our entries.
+ */
+ first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
+
+ node = rb_first(&block_group->free_space_offset);
+ if (!node)
+ goto out_free;
+
+ /*
+ * Lock all pages first so we can lock the extent safely.
+ *
+ * NOTE: Because we hold the ref the entire time we're going to write to
+ * the page find_get_page should never fail, so we don't do a check
+ * after find_get_page at this point. Just putting this here so people
+ * know and don't freak out.
+ */
+ while (index <= last_index) {
+ page = grab_cache_page(inode->i_mapping, index);
+ if (!page) {
+ pgoff_t i = 0;
+
+ while (i < index) {
+ page = find_get_page(inode->i_mapping, i);
+ unlock_page(page);
+ page_cache_release(page);
+ page_cache_release(page);
+ i++;
+ }
+ goto out_free;
+ }
+ index++;
+ }
+
+ index = 0;
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
+ 0, &cached_state, GFP_NOFS);
+
+ /* Write out the extent entries */
+ do {
+ struct btrfs_free_space_entry *entry;
+ void *addr;
+ unsigned long offset = 0;
+ unsigned long start_offset = 0;
+
+ if (index == 0) {
+ start_offset = first_page_offset;
+ offset = start_offset;
+ }
+
+ page = find_get_page(inode->i_mapping, index);
+
+ addr = kmap(page);
+ entry = addr + start_offset;
+
+ memset(addr, 0, PAGE_CACHE_SIZE);
+ while (1) {
+ struct btrfs_free_space *e;
+
+ e = rb_entry(node, struct btrfs_free_space, offset_index);
+ entries++;
+
+ entry->offset = cpu_to_le64(e->offset);
+ entry->bytes = cpu_to_le64(e->bytes);
+ if (e->bitmap) {
+ entry->type = BTRFS_FREE_SPACE_BITMAP;
+ list_add_tail(&e->list, &bitmap_list);
+ bitmaps++;
+ } else {
+ entry->type = BTRFS_FREE_SPACE_EXTENT;
+ }
+ node = rb_next(node);
+ if (!node)
+ break;
+ offset += sizeof(struct btrfs_free_space_entry);
+ if (offset + sizeof(struct btrfs_free_space_entry) >=
+ PAGE_CACHE_SIZE)
+ break;
+ entry++;
+ }
+ *crc = ~(u32)0;
+ *crc = btrfs_csum_data(root, addr + start_offset, *crc,
+ PAGE_CACHE_SIZE - start_offset);
+ kunmap(page);
+
+ btrfs_csum_final(*crc, (char *)crc);
+ crc++;
+
+ bytes += PAGE_CACHE_SIZE;
+
+ ClearPageChecked(page);
+ set_page_extent_mapped(page);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+
+ /*
+ * We need to release our reference we got for grab_cache_page,
+ * except for the first page which will hold our checksums, we
+ * do that below.
+ */
+ if (index != 0) {
+ unlock_page(page);
+ page_cache_release(page);
+ }
+
+ page_cache_release(page);
+
+ index++;
+ } while (node);
+
+ /* Write out the bitmaps */
+ list_for_each_safe(pos, n, &bitmap_list) {
+ void *addr;
+ struct btrfs_free_space *entry =
+ list_entry(pos, struct btrfs_free_space, list);
+
+ page = find_get_page(inode->i_mapping, index);
+
+ addr = kmap(page);
+ memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
+ *crc = ~(u32)0;
+ *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
+ kunmap(page);
+ btrfs_csum_final(*crc, (char *)crc);
+ crc++;
+ bytes += PAGE_CACHE_SIZE;
+
+ ClearPageChecked(page);
+ set_page_extent_mapped(page);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+ unlock_page(page);
+ page_cache_release(page);
+ page_cache_release(page);
+ list_del_init(&entry->list);
+ index++;
+ }
+
+ /* Zero out the rest of the pages just to make sure */
+ while (index <= last_index) {
+ void *addr;
+
+ page = find_get_page(inode->i_mapping, index);
+
+ addr = kmap(page);
+ memset(addr, 0, PAGE_CACHE_SIZE);
+ kunmap(page);
+ ClearPageChecked(page);
+ set_page_extent_mapped(page);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+ unlock_page(page);
+ page_cache_release(page);
+ page_cache_release(page);
+ bytes += PAGE_CACHE_SIZE;
+ index++;
+ }
+
+ btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
+
+ /* Write the checksums and trans id to the first page */
+ {
+ void *addr;
+ u64 *gen;
+
+ page = find_get_page(inode->i_mapping, 0);
+
+ addr = kmap(page);
+ memcpy(addr, checksums, sizeof(u32) * num_checksums);
+ gen = addr + (sizeof(u32) * num_checksums);
+ *gen = trans->transid;
+ kunmap(page);
+ ClearPageChecked(page);
+ set_page_extent_mapped(page);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+ unlock_page(page);
+ page_cache_release(page);
+ page_cache_release(page);
+ }
+ BTRFS_I(inode)->generation = trans->transid;
+
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
+ i_size_read(inode) - 1, &cached_state, GFP_NOFS);
+
+ filemap_write_and_wait(inode->i_mapping);
+
+ key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+ key.offset = block_group->key.objectid;
+ key.type = 0;
+
+ ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
+ if (ret < 0) {
+ ret = 0;
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
+ EXTENT_DIRTY | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
+ goto out_free;
+ }
+ leaf = path->nodes[0];
+ if (ret > 0) {
+ struct btrfs_key found_key;
+ BUG_ON(!path->slots[0]);
+ path->slots[0]--;
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
+ found_key.offset != block_group->key.objectid) {
+ ret = 0;
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
+ EXTENT_DIRTY | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING, 0, 0, NULL,
+ GFP_NOFS);
+ btrfs_release_path(root, path);
+ goto out_free;
+ }
+ }
+ header = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_free_space_header);
+ btrfs_set_free_space_entries(leaf, header, entries);
+ btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
+ btrfs_set_free_space_generation(leaf, header, trans->transid);
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(root, path);
+
+ ret = 1;
+
+out_free:
+ if (ret == 0) {
+ invalidate_inode_pages2_range(inode->i_mapping, 0, index);
+ spin_lock(&block_group->lock);
+ block_group->disk_cache_state = BTRFS_DC_ERROR;
+ spin_unlock(&block_group->lock);
+ BTRFS_I(inode)->generation = 0;
+ }
+ kfree(checksums);
+ btrfs_update_inode(trans, root, inode);
+ iput(inode);
+ return ret;
+}
+
static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
u64 offset)
{
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 890a8e79011b..e49ca5c321b5 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -27,6 +27,24 @@ struct btrfs_free_space {
struct list_head list;
};
+struct inode *lookup_free_space_inode(struct btrfs_root *root,
+ struct btrfs_block_group_cache
+ *block_group, struct btrfs_path *path);
+int create_free_space_inode(struct btrfs_root *root,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path);
+
+int btrfs_truncate_free_space_cache(struct btrfs_root *root,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
+ struct inode *inode);
+int load_free_space_cache(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_group_cache *block_group);
+int btrfs_write_out_cache(struct btrfs_root *root,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path);
int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
u64 bytenr, u64 size);
int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 64f99cf69ce0..558cac2dfa54 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -319,8 +319,6 @@ static noinline int compress_file_range(struct inode *inode,
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
u64 num_bytes;
- u64 orig_start;
- u64 disk_num_bytes;
u64 blocksize = root->sectorsize;
u64 actual_end;
u64 isize = i_size_read(inode);
@@ -335,8 +333,6 @@ static noinline int compress_file_range(struct inode *inode,
int i;
int will_compress;
- orig_start = start;
-
actual_end = min_t(u64, isize, end + 1);
again:
will_compress = 0;
@@ -371,7 +367,6 @@ again:
total_compressed = min(total_compressed, max_uncompressed);
num_bytes = (end - start + blocksize) & ~(blocksize - 1);
num_bytes = max(blocksize, num_bytes);
- disk_num_bytes = num_bytes;
total_in = 0;
ret = 0;
@@ -467,7 +462,6 @@ again:
if (total_compressed >= total_in) {
will_compress = 0;
} else {
- disk_num_bytes = total_compressed;
num_bytes = total_in;
}
}
@@ -757,20 +751,17 @@ static noinline int cow_file_range(struct inode *inode,
u64 disk_num_bytes;
u64 cur_alloc_size;
u64 blocksize = root->sectorsize;
- u64 actual_end;
- u64 isize = i_size_read(inode);
struct btrfs_key ins;
struct extent_map *em;
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
int ret = 0;
+ BUG_ON(root == root->fs_info->tree_root);
trans = btrfs_join_transaction(root, 1);
BUG_ON(!trans);
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
- actual_end = min_t(u64, isize, end + 1);
-
num_bytes = (end - start + blocksize) & ~(blocksize - 1);
num_bytes = max(blocksize, num_bytes);
disk_num_bytes = num_bytes;
@@ -1035,10 +1026,16 @@ static noinline int run_delalloc_nocow(struct inode *inode,
int type;
int nocow;
int check_prev = 1;
+ bool nolock = false;
path = btrfs_alloc_path();
BUG_ON(!path);
- trans = btrfs_join_transaction(root, 1);
+ if (root == root->fs_info->tree_root) {
+ nolock = true;
+ trans = btrfs_join_transaction_nolock(root, 1);
+ } else {
+ trans = btrfs_join_transaction(root, 1);
+ }
BUG_ON(!trans);
cow_start = (u64)-1;
@@ -1211,8 +1208,13 @@ out_check:
BUG_ON(ret);
}
- ret = btrfs_end_transaction(trans, root);
- BUG_ON(ret);
+ if (nolock) {
+ ret = btrfs_end_transaction_nolock(trans, root);
+ BUG_ON(ret);
+ } else {
+ ret = btrfs_end_transaction(trans, root);
+ BUG_ON(ret);
+ }
btrfs_free_path(path);
return 0;
}
@@ -1289,6 +1291,8 @@ static int btrfs_set_bit_hook(struct inode *inode,
if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
+ int do_list = (root->root_key.objectid !=
+ BTRFS_ROOT_TREE_OBJECTID);
if (*bits & EXTENT_FIRST_DELALLOC)
*bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1298,7 +1302,7 @@ static int btrfs_set_bit_hook(struct inode *inode,
spin_lock(&root->fs_info->delalloc_lock);
BTRFS_I(inode)->delalloc_bytes += len;
root->fs_info->delalloc_bytes += len;
- if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
+ if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
&root->fs_info->delalloc_inodes);
}
@@ -1321,6 +1325,8 @@ static int btrfs_clear_bit_hook(struct inode *inode,
if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
+ int do_list = (root->root_key.objectid !=
+ BTRFS_ROOT_TREE_OBJECTID);
if (*bits & EXTENT_FIRST_DELALLOC)
*bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1330,14 +1336,15 @@ static int btrfs_clear_bit_hook(struct inode *inode,
if (*bits & EXTENT_DO_ACCOUNTING)
btrfs_delalloc_release_metadata(inode, len);
- if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
+ if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
+ && do_list)
btrfs_free_reserved_data_space(inode, len);
spin_lock(&root->fs_info->delalloc_lock);
root->fs_info->delalloc_bytes -= len;
BTRFS_I(inode)->delalloc_bytes -= len;
- if (BTRFS_I(inode)->delalloc_bytes == 0 &&
+ if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
list_del_init(&BTRFS_I(inode)->delalloc_inodes);
}
@@ -1372,7 +1379,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
if (map_length < length + size)
return 1;
- return 0;
+ return ret;
}
/*
@@ -1426,7 +1433,10 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
- ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
+ if (root == root->fs_info->tree_root)
+ ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
+ else
+ ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
BUG_ON(ret);
if (!(rw & REQ_WRITE)) {
@@ -1662,6 +1672,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
struct extent_state *cached_state = NULL;
int compressed = 0;
int ret;
+ bool nolock = false;
ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
end - start + 1);
@@ -1669,11 +1680,17 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
return 0;
BUG_ON(!ordered_extent);
+ nolock = (root == root->fs_info->tree_root);
+
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
BUG_ON(!list_empty(&ordered_extent->list));
ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
if (!ret) {
- trans = btrfs_join_transaction(root, 1);
+ if (nolock)
+ trans = btrfs_join_transaction_nolock(root, 1);
+ else
+ trans = btrfs_join_transaction(root, 1);
+ BUG_ON(!trans);
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
ret = btrfs_update_inode(trans, root, inode);
@@ -1686,7 +1703,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ordered_extent->file_offset + ordered_extent->len - 1,
0, &cached_state, GFP_NOFS);
- trans = btrfs_join_transaction(root, 1);
+ if (nolock)
+ trans = btrfs_join_transaction_nolock(root, 1);
+ else
+ trans = btrfs_join_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -1700,6 +1720,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ordered_extent->len);
BUG_ON(ret);
} else {
+ BUG_ON(root == root->fs_info->tree_root);
ret = insert_reserved_file_extent(trans, inode,
ordered_extent->file_offset,
ordered_extent->start,
@@ -1724,9 +1745,15 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
out:
- btrfs_delalloc_release_metadata(inode, ordered_extent->len);
- if (trans)
- btrfs_end_transaction(trans, root);
+ if (nolock) {
+ if (trans)
+ btrfs_end_transaction_nolock(trans, root);
+ } else {
+ btrfs_delalloc_release_metadata(inode, ordered_extent->len);
+ if (trans)
+ btrfs_end_transaction(trans, root);
+ }
+
/* once for us */
btrfs_put_ordered_extent(ordered_extent);
/* once for the tree */
@@ -2237,7 +2264,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
{
struct btrfs_path *path;
struct extent_buffer *leaf;
- struct btrfs_item *item;
struct btrfs_key key, found_key;
struct btrfs_trans_handle *trans;
struct inode *inode;
@@ -2275,7 +2301,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
/* pull out the item */
leaf = path->nodes[0];
- item = btrfs_item_nr(leaf, path->slots[0]);
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
/* make sure the item matches what we want */
@@ -2651,7 +2676,8 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
dir, index);
- BUG_ON(ret);
+ if (ret == -ENOENT)
+ ret = 0;
err:
btrfs_free_path(path);
if (ret)
@@ -2672,8 +2698,8 @@ static int check_path_shared(struct btrfs_root *root,
{
struct extent_buffer *eb;
int level;
- int ret;
u64 refs = 1;
+ int uninitialized_var(ret);
for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
if (!path->nodes[level])
@@ -2686,7 +2712,7 @@ static int check_path_shared(struct btrfs_root *root,
if (refs > 1)
return 1;
}
- return 0;
+ return ret; /* XXX callers? */
}
/*
@@ -3196,7 +3222,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
- if (root->ref_cows)
+ if (root->ref_cows || root == root->fs_info->tree_root)
btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
path = btrfs_alloc_path();
@@ -3344,7 +3370,8 @@ delete:
} else {
break;
}
- if (found_extent && root->ref_cows) {
+ if (found_extent && (root->ref_cows ||
+ root == root->fs_info->tree_root)) {
btrfs_set_path_blocking(path);
ret = btrfs_free_extent(trans, root, extent_start,
extent_num_bytes, 0,
@@ -3675,7 +3702,8 @@ void btrfs_evict_inode(struct inode *inode)
int ret;
truncate_inode_pages(&inode->i_data, 0);
- if (inode->i_nlink && btrfs_root_refs(&root->root_item) != 0)
+ if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
+ root == root->fs_info->tree_root))
goto no_delete;
if (is_bad_inode(inode)) {
@@ -3888,7 +3916,14 @@ static void inode_tree_del(struct inode *inode)
}
spin_unlock(&root->inode_lock);
- if (empty && btrfs_root_refs(&root->root_item) == 0) {
+ /*
+ * Free space cache has inodes in the tree root, but the tree root has a
+ * root_refs of 0, so this could end up dropping the tree root as a
+ * snapshot, so we need the extra !root->fs_info->tree_root check to
+ * make sure we don't drop it.
+ */
+ if (empty && btrfs_root_refs(&root->root_item) == 0 &&
+ root != root->fs_info->tree_root) {
synchronize_srcu(&root->fs_info->subvol_srcu);
spin_lock(&root->inode_lock);
empty = RB_EMPTY_ROOT(&root->inode_tree);
@@ -4282,14 +4317,24 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
int ret = 0;
+ bool nolock = false;
if (BTRFS_I(inode)->dummy_inode)
return 0;
+ smp_mb();
+ nolock = (root->fs_info->closing && root == root->fs_info->tree_root);
+
if (wbc->sync_mode == WB_SYNC_ALL) {
- trans = btrfs_join_transaction(root, 1);
+ if (nolock)
+ trans = btrfs_join_transaction_nolock(root, 1);
+ else
+ trans = btrfs_join_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
- ret = btrfs_commit_transaction(trans, root);
+ if (nolock)
+ ret = btrfs_end_transaction_nolock(trans, root);
+ else
+ ret = btrfs_commit_transaction(trans, root);
}
return ret;
}
@@ -5645,7 +5690,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_dio_private *dip;
struct bio_vec *bvec = bio->bi_io_vec;
- u64 start;
int skip_sum;
int write = rw & REQ_WRITE;
int ret = 0;
@@ -5671,7 +5715,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
dip->inode = inode;
dip->logical_offset = file_offset;
- start = dip->logical_offset;
dip->bytes = 0;
do {
dip->bytes += bvec->bv_len;
@@ -6308,6 +6351,21 @@ void btrfs_destroy_inode(struct inode *inode)
spin_unlock(&root->fs_info->ordered_extent_lock);
}
+ if (root == root->fs_info->tree_root) {
+ struct btrfs_block_group_cache *block_group;
+
+ block_group = btrfs_lookup_block_group(root->fs_info,
+ BTRFS_I(inode)->block_group);
+ if (block_group && block_group->inode == inode) {
+ spin_lock(&block_group->lock);
+ block_group->inode = NULL;
+ spin_unlock(&block_group->lock);
+ btrfs_put_block_group(block_group);
+ } else if (block_group) {
+ btrfs_put_block_group(block_group);
+ }
+ }
+
spin_lock(&root->orphan_lock);
if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
@@ -6340,7 +6398,8 @@ int btrfs_drop_inode(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- if (btrfs_root_refs(&root->root_item) == 0)
+ if (btrfs_root_refs(&root->root_item) == 0 &&
+ root != root->fs_info->tree_root)
return 1;
else
return generic_drop_inode(inode);
@@ -6609,7 +6668,8 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
return 0;
}
-int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
+int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
+ int sync)
{
struct btrfs_inode *binode;
struct inode *inode = NULL;
@@ -6631,7 +6691,26 @@ int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
spin_unlock(&root->fs_info->delalloc_lock);
if (inode) {
- write_inode_now(inode, 0);
+ if (sync) {
+ filemap_write_and_wait(inode->i_mapping);
+ /*
+ * We have to do this because compression doesn't
+ * actually set PG_writeback until it submits the pages
+ * for IO, which happens in an async thread, so we could
+ * race and not actually wait for any writeback pages
+ * because they've not been submitted yet. Technically
+ * this could still be the case for the ordered stuff
+ * since the async thread may not have started to do its
+ * work yet. If this becomes the case then we need to
+ * figure out a way to make sure that in writepage we
+ * wait for any async pages to be submitted before
+ * returning so that fdatawait does what its supposed to
+ * do.
+ */
+ btrfs_wait_ordered_range(inode, 0, (u64)-1);
+ } else {
+ filemap_flush(inode->i_mapping);
+ }
if (delay_iput)
btrfs_add_delayed_iput(inode);
else
@@ -6757,27 +6836,33 @@ out_unlock:
return err;
}
-int btrfs_prealloc_file_range(struct inode *inode, int mode,
- u64 start, u64 num_bytes, u64 min_size,
- loff_t actual_len, u64 *alloc_hint)
+static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
+ u64 start, u64 num_bytes, u64 min_size,
+ loff_t actual_len, u64 *alloc_hint,
+ struct btrfs_trans_handle *trans)
{
- struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_key ins;
u64 cur_offset = start;
int ret = 0;
+ bool own_trans = true;
+ if (trans)
+ own_trans = false;
while (num_bytes > 0) {
- trans = btrfs_start_transaction(root, 3);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- break;
+ if (own_trans) {
+ trans = btrfs_start_transaction(root, 3);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ break;
+ }
}
ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
0, *alloc_hint, (u64)-1, &ins, 1);
if (ret) {
- btrfs_end_transaction(trans, root);
+ if (own_trans)
+ btrfs_end_transaction(trans, root);
break;
}
@@ -6810,11 +6895,30 @@ int btrfs_prealloc_file_range(struct inode *inode, int mode,
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
- btrfs_end_transaction(trans, root);
+ if (own_trans)
+ btrfs_end_transaction(trans, root);
}
return ret;
}
+int btrfs_prealloc_file_range(struct inode *inode, int mode,
+ u64 start, u64 num_bytes, u64 min_size,
+ loff_t actual_len, u64 *alloc_hint)
+{
+ return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
+ min_size, actual_len, alloc_hint,
+ NULL);
+}
+
+int btrfs_prealloc_file_range_trans(struct inode *inode,
+ struct btrfs_trans_handle *trans, int mode,
+ u64 start, u64 num_bytes, u64 min_size,
+ loff_t actual_len, u64 *alloc_hint)
+{
+ return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
+ min_size, actual_len, alloc_hint, trans);
+}
+
static long btrfs_fallocate(struct inode *inode, int mode,
loff_t offset, loff_t len)
{
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 9254b3d58dbe..463d91b4dd3a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -224,7 +224,8 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
static noinline int create_subvol(struct btrfs_root *root,
struct dentry *dentry,
- char *name, int namelen)
+ char *name, int namelen,
+ u64 *async_transid)
{
struct btrfs_trans_handle *trans;
struct btrfs_key key;
@@ -338,13 +339,19 @@ static noinline int create_subvol(struct btrfs_root *root,
d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
fail:
- err = btrfs_commit_transaction(trans, root);
+ if (async_transid) {
+ *async_transid = trans->transid;
+ err = btrfs_commit_transaction_async(trans, root, 1);
+ } else {
+ err = btrfs_commit_transaction(trans, root);
+ }
if (err && !ret)
ret = err;
return ret;
}
-static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
+static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
+ char *name, int namelen, u64 *async_transid)
{
struct inode *inode;
struct btrfs_pending_snapshot *pending_snapshot;
@@ -373,7 +380,14 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
list_add(&pending_snapshot->list,
&trans->transaction->pending_snapshots);
- ret = btrfs_commit_transaction(trans, root->fs_info->extent_root);
+ if (async_transid) {
+ *async_transid = trans->transid;
+ ret = btrfs_commit_transaction_async(trans,
+ root->fs_info->extent_root, 1);
+ } else {
+ ret = btrfs_commit_transaction(trans,
+ root->fs_info->extent_root);
+ }
BUG_ON(ret);
ret = pending_snapshot->error;
@@ -395,6 +409,76 @@ fail:
return ret;
}
+/* copy of check_sticky in fs/namei.c()
+* It's inline, so penalty for filesystems that don't use sticky bit is
+* minimal.
+*/
+static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
+{
+ uid_t fsuid = current_fsuid();
+
+ if (!(dir->i_mode & S_ISVTX))
+ return 0;
+ if (inode->i_uid == fsuid)
+ return 0;
+ if (dir->i_uid == fsuid)
+ return 0;
+ return !capable(CAP_FOWNER);
+}
+
+/* copy of may_delete in fs/namei.c()
+ * Check whether we can remove a link victim from directory dir, check
+ * whether the type of victim is right.
+ * 1. We can't do it if dir is read-only (done in permission())
+ * 2. We should have write and exec permissions on dir
+ * 3. We can't remove anything from append-only dir
+ * 4. We can't do anything with immutable dir (done in permission())
+ * 5. If the sticky bit on dir is set we should either
+ * a. be owner of dir, or
+ * b. be owner of victim, or
+ * c. have CAP_FOWNER capability
+ * 6. If the victim is append-only or immutable we can't do antyhing with
+ * links pointing to it.
+ * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
+ * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
+ * 9. We can't remove a root or mountpoint.
+ * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
+ * nfs_async_unlink().
+ */
+
+static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir)
+{
+ int error;
+
+ if (!victim->d_inode)
+ return -ENOENT;
+
+ BUG_ON(victim->d_parent->d_inode != dir);
+ audit_inode_child(victim, dir);
+
+ error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+ if (error)
+ return error;
+ if (IS_APPEND(dir))
+ return -EPERM;
+ if (btrfs_check_sticky(dir, victim->d_inode)||
+ IS_APPEND(victim->d_inode)||
+ IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
+ return -EPERM;
+ if (isdir) {
+ if (!S_ISDIR(victim->d_inode->i_mode))
+ return -ENOTDIR;
+ if (IS_ROOT(victim))
+ return -EBUSY;
+ } else if (S_ISDIR(victim->d_inode->i_mode))
+ return -EISDIR;
+ if (IS_DEADDIR(dir))
+ return -ENOENT;
+ if (victim->d_flags & DCACHE_NFSFS_RENAMED)
+ return -EBUSY;
+ return 0;
+}
+
/* copy of may_create in fs/namei.c() */
static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
{
@@ -412,7 +496,8 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
*/
static noinline int btrfs_mksubvol(struct path *parent,
char *name, int namelen,
- struct btrfs_root *snap_src)
+ struct btrfs_root *snap_src,
+ u64 *async_transid)
{
struct inode *dir = parent->dentry->d_inode;
struct dentry *dentry;
@@ -443,10 +528,11 @@ static noinline int btrfs_mksubvol(struct path *parent,
goto out_up_read;
if (snap_src) {
- error = create_snapshot(snap_src, dentry);
+ error = create_snapshot(snap_src, dentry,
+ name, namelen, async_transid);
} else {
error = create_subvol(BTRFS_I(dir)->root, dentry,
- name, namelen);
+ name, namelen, async_transid);
}
if (!error)
fsnotify_mkdir(dir, dentry);
@@ -708,7 +794,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
char *sizestr;
char *devstr = NULL;
int ret = 0;
- int namelen;
int mod = 0;
if (root->fs_info->sb->s_flags & MS_RDONLY)
@@ -722,7 +807,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
return PTR_ERR(vol_args);
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
- namelen = strlen(vol_args->name);
mutex_lock(&root->fs_info->volume_mutex);
sizestr = vol_args->name;
@@ -801,11 +885,13 @@ out_unlock:
return ret;
}
-static noinline int btrfs_ioctl_snap_create(struct file *file,
- void __user *arg, int subvol)
+static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
+ char *name,
+ unsigned long fd,
+ int subvol,
+ u64 *transid)
{
struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
- struct btrfs_ioctl_vol_args *vol_args;
struct file *src_file;
int namelen;
int ret = 0;
@@ -813,23 +899,18 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
- vol_args = memdup_user(arg, sizeof(*vol_args));
- if (IS_ERR(vol_args))
- return PTR_ERR(vol_args);
-
- vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
- namelen = strlen(vol_args->name);
- if (strchr(vol_args->name, '/')) {
+ namelen = strlen(name);
+ if (strchr(name, '/')) {
ret = -EINVAL;
goto out;
}
if (subvol) {
- ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
- NULL);
+ ret = btrfs_mksubvol(&file->f_path, name, namelen,
+ NULL, transid);
} else {
struct inode *src_inode;
- src_file = fget(vol_args->fd);
+ src_file = fget(fd);
if (!src_file) {
ret = -EINVAL;
goto out;
@@ -843,12 +924,56 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
fput(src_file);
goto out;
}
- ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
- BTRFS_I(src_inode)->root);
+ ret = btrfs_mksubvol(&file->f_path, name, namelen,
+ BTRFS_I(src_inode)->root,
+ transid);
fput(src_file);
}
out:
+ return ret;
+}
+
+static noinline int btrfs_ioctl_snap_create(struct file *file,
+ void __user *arg, int subvol,
+ int async)
+{
+ struct btrfs_ioctl_vol_args *vol_args = NULL;
+ struct btrfs_ioctl_async_vol_args *async_vol_args = NULL;
+ char *name;
+ u64 fd;
+ u64 transid = 0;
+ int ret;
+
+ if (async) {
+ async_vol_args = memdup_user(arg, sizeof(*async_vol_args));
+ if (IS_ERR(async_vol_args))
+ return PTR_ERR(async_vol_args);
+
+ name = async_vol_args->name;
+ fd = async_vol_args->fd;
+ async_vol_args->name[BTRFS_SNAPSHOT_NAME_MAX] = '\0';
+ } else {
+ vol_args = memdup_user(arg, sizeof(*vol_args));
+ if (IS_ERR(vol_args))
+ return PTR_ERR(vol_args);
+ name = vol_args->name;
+ fd = vol_args->fd;
+ vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
+ }
+
+ ret = btrfs_ioctl_snap_create_transid(file, name, fd,
+ subvol, &transid);
+
+ if (!ret && async) {
+ if (copy_to_user(arg +
+ offsetof(struct btrfs_ioctl_async_vol_args,
+ transid), &transid, sizeof(transid)))
+ return -EFAULT;
+ }
+
kfree(vol_args);
+ kfree(async_vol_args);
+
return ret;
}
@@ -1073,14 +1198,10 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- args = kmalloc(sizeof(*args), GFP_KERNEL);
- if (!args)
- return -ENOMEM;
+ args = memdup_user(argp, sizeof(*args));
+ if (IS_ERR(args))
+ return PTR_ERR(args);
- if (copy_from_user(args, argp, sizeof(*args))) {
- kfree(args);
- return -EFAULT;
- }
inode = fdentry(file)->d_inode;
ret = search_ioctl(inode, args);
if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
@@ -1188,14 +1309,10 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- args = kmalloc(sizeof(*args), GFP_KERNEL);
- if (!args)
- return -ENOMEM;
+ args = memdup_user(argp, sizeof(*args));
+ if (IS_ERR(args))
+ return PTR_ERR(args);
- if (copy_from_user(args, argp, sizeof(*args))) {
- kfree(args);
- return -EFAULT;
- }
inode = fdentry(file)->d_inode;
if (args->treeid == 0)
@@ -1227,9 +1344,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
int ret;
int err = 0;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
@@ -1259,13 +1373,51 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
}
inode = dentry->d_inode;
+ dest = BTRFS_I(inode)->root;
+ if (!capable(CAP_SYS_ADMIN)){
+ /*
+ * Regular user. Only allow this with a special mount
+ * option, when the user has write+exec access to the
+ * subvol root, and when rmdir(2) would have been
+ * allowed.
+ *
+ * Note that this is _not_ check that the subvol is
+ * empty or doesn't contain data that we wouldn't
+ * otherwise be able to delete.
+ *
+ * Users who want to delete empty subvols should try
+ * rmdir(2).
+ */
+ err = -EPERM;
+ if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
+ goto out_dput;
+
+ /*
+ * Do not allow deletion if the parent dir is the same
+ * as the dir to be deleted. That means the ioctl
+ * must be called on the dentry referencing the root
+ * of the subvol, not a random directory contained
+ * within it.
+ */
+ err = -EINVAL;
+ if (root == dest)
+ goto out_dput;
+
+ err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
+ if (err)
+ goto out_dput;
+
+ /* check if subvolume may be deleted by a non-root user */
+ err = btrfs_may_delete(dir, dentry, 1);
+ if (err)
+ goto out_dput;
+ }
+
if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
err = -EINVAL;
goto out_dput;
}
- dest = BTRFS_I(inode)->root;
-
mutex_lock(&inode->i_mutex);
err = d_invalidate(dentry);
if (err)
@@ -1304,7 +1456,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
BUG_ON(ret);
}
- ret = btrfs_commit_transaction(trans, root);
+ ret = btrfs_end_transaction(trans, root);
BUG_ON(ret);
inode->i_flags |= S_DEAD;
out_up_write:
@@ -1502,11 +1654,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
path->reada = 2;
if (inode < src) {
- mutex_lock(&inode->i_mutex);
- mutex_lock(&src->i_mutex);
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
} else {
- mutex_lock(&src->i_mutex);
- mutex_lock(&inode->i_mutex);
+ mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
}
/* determine range to clone */
@@ -1530,13 +1682,15 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
while (1) {
struct btrfs_ordered_extent *ordered;
lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
- ordered = btrfs_lookup_first_ordered_extent(inode, off+len);
- if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered)
+ ordered = btrfs_lookup_first_ordered_extent(src, off+len);
+ if (!ordered &&
+ !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len,
+ EXTENT_DELALLOC, 0, NULL))
break;
unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
if (ordered)
btrfs_put_ordered_extent(ordered);
- btrfs_wait_ordered_range(src, off, off+len);
+ btrfs_wait_ordered_range(src, off, len);
}
/* clone data */
@@ -1605,7 +1759,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
}
btrfs_release_path(root, path);
- if (key.offset + datal < off ||
+ if (key.offset + datal <= off ||
key.offset >= off+len)
goto next;
@@ -1879,6 +2033,22 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
return 0;
}
+static void get_block_group_info(struct list_head *groups_list,
+ struct btrfs_ioctl_space_info *space)
+{
+ struct btrfs_block_group_cache *block_group;
+
+ space->total_bytes = 0;
+ space->used_bytes = 0;
+ space->flags = 0;
+ list_for_each_entry(block_group, groups_list, list) {
+ space->flags = block_group->flags;
+ space->total_bytes += block_group->key.offset;
+ space->used_bytes +=
+ btrfs_block_group_used(&block_group->item);
+ }
+}
+
long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
{
struct btrfs_ioctl_space_args space_args;
@@ -1887,27 +2057,56 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
struct btrfs_ioctl_space_info *dest_orig;
struct btrfs_ioctl_space_info *user_dest;
struct btrfs_space_info *info;
+ u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
+ BTRFS_BLOCK_GROUP_SYSTEM,
+ BTRFS_BLOCK_GROUP_METADATA,
+ BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
+ int num_types = 4;
int alloc_size;
int ret = 0;
int slot_count = 0;
+ int i, c;
if (copy_from_user(&space_args,
(struct btrfs_ioctl_space_args __user *)arg,
sizeof(space_args)))
return -EFAULT;
- /* first we count slots */
- rcu_read_lock();
- list_for_each_entry_rcu(info, &root->fs_info->space_info, list)
- slot_count++;
- rcu_read_unlock();
+ for (i = 0; i < num_types; i++) {
+ struct btrfs_space_info *tmp;
+
+ info = NULL;
+ rcu_read_lock();
+ list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
+ list) {
+ if (tmp->flags == types[i]) {
+ info = tmp;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ if (!info)
+ continue;
+
+ down_read(&info->groups_sem);
+ for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
+ if (!list_empty(&info->block_groups[c]))
+ slot_count++;
+ }
+ up_read(&info->groups_sem);
+ }
/* space_slots == 0 means they are asking for a count */
if (space_args.space_slots == 0) {
space_args.total_spaces = slot_count;
goto out;
}
+
+ slot_count = min_t(int, space_args.space_slots, slot_count);
+
alloc_size = sizeof(*dest) * slot_count;
+
/* we generally have at most 6 or so space infos, one for each raid
* level. So, a whole page should be more than enough for everyone
*/
@@ -1921,27 +2120,34 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
dest_orig = dest;
/* now we have a buffer to copy into */
- rcu_read_lock();
- list_for_each_entry_rcu(info, &root->fs_info->space_info, list) {
- /* make sure we don't copy more than we allocated
- * in our buffer
- */
- if (slot_count == 0)
- break;
- slot_count--;
-
- /* make sure userland has enough room in their buffer */
- if (space_args.total_spaces >= space_args.space_slots)
- break;
+ for (i = 0; i < num_types; i++) {
+ struct btrfs_space_info *tmp;
+
+ info = NULL;
+ rcu_read_lock();
+ list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
+ list) {
+ if (tmp->flags == types[i]) {
+ info = tmp;
+ break;
+ }
+ }
+ rcu_read_unlock();
- space.flags = info->flags;
- space.total_bytes = info->total_bytes;
- space.used_bytes = info->bytes_used;
- memcpy(dest, &space, sizeof(space));
- dest++;
- space_args.total_spaces++;
+ if (!info)
+ continue;
+ down_read(&info->groups_sem);
+ for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
+ if (!list_empty(&info->block_groups[c])) {
+ get_block_group_info(&info->block_groups[c],
+ &space);
+ memcpy(dest, &space, sizeof(space));
+ dest++;
+ space_args.total_spaces++;
+ }
+ }
+ up_read(&info->groups_sem);
}
- rcu_read_unlock();
user_dest = (struct btrfs_ioctl_space_info *)
(arg + sizeof(struct btrfs_ioctl_space_args));
@@ -1984,6 +2190,36 @@ long btrfs_ioctl_trans_end(struct file *file)
return 0;
}
+static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp)
+{
+ struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
+ struct btrfs_trans_handle *trans;
+ u64 transid;
+
+ trans = btrfs_start_transaction(root, 0);
+ transid = trans->transid;
+ btrfs_commit_transaction_async(trans, root, 0);
+
+ if (argp)
+ if (copy_to_user(argp, &transid, sizeof(transid)))
+ return -EFAULT;
+ return 0;
+}
+
+static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp)
+{
+ struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
+ u64 transid;
+
+ if (argp) {
+ if (copy_from_user(&transid, argp, sizeof(transid)))
+ return -EFAULT;
+ } else {
+ transid = 0; /* current trans */
+ }
+ return btrfs_wait_for_commit(root, transid);
+}
+
long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
@@ -1998,9 +2234,11 @@ long btrfs_ioctl(struct file *file, unsigned int
case FS_IOC_GETVERSION:
return btrfs_ioctl_getversion(file, argp);
case BTRFS_IOC_SNAP_CREATE:
- return btrfs_ioctl_snap_create(file, argp, 0);
+ return btrfs_ioctl_snap_create(file, argp, 0, 0);
+ case BTRFS_IOC_SNAP_CREATE_ASYNC:
+ return btrfs_ioctl_snap_create(file, argp, 0, 1);
case BTRFS_IOC_SUBVOL_CREATE:
- return btrfs_ioctl_snap_create(file, argp, 1);
+ return btrfs_ioctl_snap_create(file, argp, 1, 0);
case BTRFS_IOC_SNAP_DESTROY:
return btrfs_ioctl_snap_destroy(file, argp);
case BTRFS_IOC_DEFAULT_SUBVOL:
@@ -2034,6 +2272,10 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_SYNC:
btrfs_sync_fs(file->f_dentry->d_sb, 1);
return 0;
+ case BTRFS_IOC_START_SYNC:
+ return btrfs_ioctl_start_sync(file, argp);
+ case BTRFS_IOC_WAIT_SYNC:
+ return btrfs_ioctl_wait_sync(file, argp);
}
return -ENOTTY;
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 424694aa517f..17c99ebdf960 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -22,14 +22,21 @@
#define BTRFS_IOCTL_MAGIC 0x94
#define BTRFS_VOL_NAME_MAX 255
-#define BTRFS_PATH_NAME_MAX 4087
/* this should be 4k */
+#define BTRFS_PATH_NAME_MAX 4087
struct btrfs_ioctl_vol_args {
__s64 fd;
char name[BTRFS_PATH_NAME_MAX + 1];
};
+#define BTRFS_SNAPSHOT_NAME_MAX 4079
+struct btrfs_ioctl_async_vol_args {
+ __s64 fd;
+ __u64 transid;
+ char name[BTRFS_SNAPSHOT_NAME_MAX + 1];
+};
+
#define BTRFS_INO_LOOKUP_PATH_MAX 4080
struct btrfs_ioctl_ino_lookup_args {
__u64 treeid;
@@ -178,4 +185,8 @@ struct btrfs_ioctl_space_args {
#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64)
#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
struct btrfs_ioctl_space_args)
+#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
+#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
+#define BTRFS_IOC_SNAP_CREATE_ASYNC _IOW(BTRFS_IOCTL_MAGIC, 23, \
+ struct btrfs_ioctl_async_vol_args)
#endif
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index e56c72bc5add..f4621f6deca1 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -526,7 +526,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
{
u64 end;
u64 orig_end;
- u64 wait_end;
struct btrfs_ordered_extent *ordered;
int found;
@@ -537,7 +536,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
if (orig_end > INT_LIMIT(loff_t))
orig_end = INT_LIMIT(loff_t);
}
- wait_end = orig_end;
again:
/* start IO across the range first to instantiate any delalloc
* extents
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b37d723b9d4a..045c9c2b2d7e 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -29,6 +29,7 @@
#include "locking.h"
#include "btrfs_inode.h"
#include "async-thread.h"
+#include "free-space-cache.h"
/*
* backref_node, mapping_node and tree_block start with this
@@ -178,8 +179,6 @@ struct reloc_control {
u64 search_start;
u64 extents_found;
- int block_rsv_retries;
-
unsigned int stage:8;
unsigned int create_reloc_tree:1;
unsigned int merge_reloc_tree:1;
@@ -2133,7 +2132,6 @@ int prepare_to_merge(struct reloc_control *rc, int err)
LIST_HEAD(reloc_roots);
u64 num_bytes = 0;
int ret;
- int retries = 0;
mutex_lock(&root->fs_info->trans_mutex);
rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
@@ -2143,7 +2141,7 @@ again:
if (!err) {
num_bytes = rc->merging_rsv_size;
ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv,
- num_bytes, &retries);
+ num_bytes);
if (ret)
err = ret;
}
@@ -2155,7 +2153,6 @@ again:
btrfs_end_transaction(trans, rc->extent_root);
btrfs_block_rsv_release(rc->extent_root,
rc->block_rsv, num_bytes);
- retries = 0;
goto again;
}
}
@@ -2405,15 +2402,13 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
num_bytes = calcu_metadata_size(rc, node, 1) * 2;
trans->block_rsv = rc->block_rsv;
- ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes,
- &rc->block_rsv_retries);
+ ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes);
if (ret) {
if (ret == -EAGAIN)
rc->commit_transaction = 1;
return ret;
}
- rc->block_rsv_retries = 0;
return 0;
}
@@ -3099,6 +3094,8 @@ static int add_tree_block(struct reloc_control *rc,
BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
ret = get_ref_objectid_v0(rc, path, extent_key,
&ref_owner, NULL);
+ if (ret < 0)
+ return ret;
BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
level = (int)ref_owner;
/* FIXME: get real generation */
@@ -3191,6 +3188,54 @@ static int block_use_full_backref(struct reloc_control *rc,
return ret;
}
+static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
+ struct inode *inode, u64 ino)
+{
+ struct btrfs_key key;
+ struct btrfs_path *path;
+ struct btrfs_root *root = fs_info->tree_root;
+ struct btrfs_trans_handle *trans;
+ unsigned long nr;
+ int ret = 0;
+
+ if (inode)
+ goto truncate;
+
+ key.objectid = ino;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+
+ inode = btrfs_iget(fs_info->sb, &key, root, NULL);
+ if (!inode || IS_ERR(inode) || is_bad_inode(inode)) {
+ if (inode && !IS_ERR(inode))
+ iput(inode);
+ return -ENOENT;
+ }
+
+truncate:
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ trans = btrfs_join_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ goto out;
+ }
+
+ ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
+
+ btrfs_free_path(path);
+ nr = trans->blocks_used;
+ btrfs_end_transaction(trans, root);
+ btrfs_btree_balance_dirty(root, nr);
+out:
+ iput(inode);
+ return ret;
+}
+
/*
* helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
* this function scans fs tree to find blocks reference the data extent
@@ -3217,15 +3262,27 @@ static int find_data_references(struct reloc_control *rc,
int counted;
int ret;
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
ref_root = btrfs_extent_data_ref_root(leaf, ref);
ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
ref_count = btrfs_extent_data_ref_count(leaf, ref);
+ /*
+ * This is an extent belonging to the free space cache, lets just delete
+ * it and redo the search.
+ */
+ if (ref_root == BTRFS_ROOT_TREE_OBJECTID) {
+ ret = delete_block_group_cache(rc->extent_root->fs_info,
+ NULL, ref_objectid);
+ if (ret != -ENOENT)
+ return ret;
+ ret = 0;
+ }
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
root = read_fs_root(rc->extent_root->fs_info, ref_root);
if (IS_ERR(root)) {
err = PTR_ERR(root);
@@ -3554,8 +3611,7 @@ int prepare_to_relocate(struct reloc_control *rc)
* is no reservation in transaction handle.
*/
ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv,
- rc->extent_root->nodesize * 256,
- &rc->block_rsv_retries);
+ rc->extent_root->nodesize * 256);
if (ret)
return ret;
@@ -3567,7 +3623,6 @@ int prepare_to_relocate(struct reloc_control *rc)
rc->extents_found = 0;
rc->nodes_relocated = 0;
rc->merging_rsv_size = 0;
- rc->block_rsv_retries = 0;
rc->create_reloc_tree = 1;
set_reloc_control(rc);
@@ -3860,6 +3915,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
{
struct btrfs_fs_info *fs_info = extent_root->fs_info;
struct reloc_control *rc;
+ struct inode *inode;
+ struct btrfs_path *path;
int ret;
int rw = 0;
int err = 0;
@@ -3882,6 +3939,26 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
rw = 1;
}
+ path = btrfs_alloc_path();
+ if (!path) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group,
+ path);
+ btrfs_free_path(path);
+
+ if (!IS_ERR(inode))
+ ret = delete_block_group_cache(fs_info, inode, 0);
+ else
+ ret = PTR_ERR(inode);
+
+ if (ret && ret != -ENOENT) {
+ err = ret;
+ goto out;
+ }
+
rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
if (IS_ERR(rc->data_inode)) {
err = PTR_ERR(rc->data_inode);
@@ -4143,7 +4220,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
btrfs_add_ordered_sum(inode, ordered, sums);
}
btrfs_put_ordered_extent(ordered);
- return 0;
+ return ret;
}
void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 2d958be761c8..6a1086e83ffc 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -181,7 +181,6 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid)
{
struct btrfs_root *dead_root;
- struct btrfs_item *item;
struct btrfs_root_item *ri;
struct btrfs_key key;
struct btrfs_key found_key;
@@ -214,7 +213,6 @@ again:
nritems = btrfs_header_nritems(leaf);
slot = path->slots[0];
}
- item = btrfs_item_nr(leaf, slot);
btrfs_item_key_to_cpu(leaf, &key, slot);
if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY)
goto next;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 144f8a5730f5..8299a25ffc8f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -61,6 +61,8 @@ static void btrfs_put_super(struct super_block *sb)
ret = close_ctree(root);
sb->s_fs_info = NULL;
+
+ (void)ret; /* FIXME: need to fix VFS to return error? */
}
enum {
@@ -68,7 +70,8 @@ enum {
Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
- Opt_discard, Opt_err,
+ Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err,
+ Opt_user_subvol_rm_allowed,
};
static match_table_t tokens = {
@@ -92,6 +95,9 @@ static match_table_t tokens = {
{Opt_flushoncommit, "flushoncommit"},
{Opt_ratio, "metadata_ratio=%d"},
{Opt_discard, "discard"},
+ {Opt_space_cache, "space_cache"},
+ {Opt_clear_cache, "clear_cache"},
+ {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
{Opt_err, NULL},
};
@@ -235,6 +241,16 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
case Opt_discard:
btrfs_set_opt(info->mount_opt, DISCARD);
break;
+ case Opt_space_cache:
+ printk(KERN_INFO "btrfs: enabling disk space caching\n");
+ btrfs_set_opt(info->mount_opt, SPACE_CACHE);
+ case Opt_clear_cache:
+ printk(KERN_INFO "btrfs: force clearing of disk cache\n");
+ btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
+ break;
+ case Opt_user_subvol_rm_allowed:
+ btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
+ break;
case Opt_err:
printk(KERN_INFO "btrfs: unrecognized mount option "
"'%s'\n", p);
@@ -380,7 +396,7 @@ static struct dentry *get_default_root(struct super_block *sb,
find_root:
new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
if (IS_ERR(new_root))
- return ERR_PTR(PTR_ERR(new_root));
+ return ERR_CAST(new_root);
if (btrfs_root_refs(&new_root->root_item) == 0)
return ERR_PTR(-ENOENT);
@@ -436,7 +452,6 @@ static int btrfs_fill_super(struct super_block *sb,
{
struct inode *inode;
struct dentry *root_dentry;
- struct btrfs_super_block *disk_super;
struct btrfs_root *tree_root;
struct btrfs_key key;
int err;
@@ -458,7 +473,6 @@ static int btrfs_fill_super(struct super_block *sb,
return PTR_ERR(tree_root);
}
sb->s_fs_info = tree_root;
- disk_super = &tree_root->fs_info->super_copy;
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
key.type = BTRFS_INODE_ITEM_KEY;
@@ -560,8 +574,8 @@ static int btrfs_test_super(struct super_block *s, void *data)
* Note: This is based on get_sb_bdev from fs/super.c with a few additions
* for multiple device setup. Make sure to keep it in sync.
*/
-static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
{
struct block_device *bdev = NULL;
struct super_block *s;
@@ -571,7 +585,6 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
char *subvol_name = NULL;
u64 subvol_objectid = 0;
int error = 0;
- int found = 0;
if (!(flags & MS_RDONLY))
mode |= FMODE_WRITE;
@@ -580,7 +593,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
&subvol_name, &subvol_objectid,
&fs_devices);
if (error)
- return error;
+ return ERR_PTR(error);
error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices);
if (error)
@@ -607,7 +620,6 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
goto error_close_devices;
}
- found = 1;
btrfs_close_devices(fs_devices);
} else {
char b[BDEVNAME_SIZE];
@@ -629,7 +641,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
if (IS_ERR(root)) {
error = PTR_ERR(root);
deactivate_locked_super(s);
- goto error;
+ goto error_free_subvol_name;
}
/* if they gave us a subvolume name bind mount into that */
if (strcmp(subvol_name, ".")) {
@@ -643,24 +655,21 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
deactivate_locked_super(s);
error = PTR_ERR(new_root);
dput(root);
- goto error_close_devices;
+ goto error_free_subvol_name;
}
if (!new_root->d_inode) {
dput(root);
dput(new_root);
deactivate_locked_super(s);
error = -ENXIO;
- goto error_close_devices;
+ goto error_free_subvol_name;
}
dput(root);
root = new_root;
}
- mnt->mnt_sb = s;
- mnt->mnt_root = root;
-
kfree(subvol_name);
- return 0;
+ return root;
error_s:
error = PTR_ERR(s);
@@ -668,8 +677,7 @@ error_close_devices:
btrfs_close_devices(fs_devices);
error_free_subvol_name:
kfree(subvol_name);
-error:
- return error;
+ return ERR_PTR(error);
}
static int btrfs_remount(struct super_block *sb, int *flags, char *data)
@@ -716,18 +724,25 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
struct list_head *head = &root->fs_info->space_info;
struct btrfs_space_info *found;
u64 total_used = 0;
+ u64 total_used_data = 0;
int bits = dentry->d_sb->s_blocksize_bits;
__be32 *fsid = (__be32 *)root->fs_info->fsid;
rcu_read_lock();
- list_for_each_entry_rcu(found, head, list)
+ list_for_each_entry_rcu(found, head, list) {
+ if (found->flags & (BTRFS_BLOCK_GROUP_METADATA |
+ BTRFS_BLOCK_GROUP_SYSTEM))
+ total_used_data += found->disk_total;
+ else
+ total_used_data += found->disk_used;
total_used += found->disk_used;
+ }
rcu_read_unlock();
buf->f_namelen = BTRFS_NAME_LEN;
buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
buf->f_bfree = buf->f_blocks - (total_used >> bits);
- buf->f_bavail = buf->f_bfree;
+ buf->f_bavail = buf->f_blocks - (total_used_data >> bits);
buf->f_bsize = dentry->d_sb->s_blocksize;
buf->f_type = BTRFS_SUPER_MAGIC;
@@ -746,7 +761,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
static struct file_system_type btrfs_fs_type = {
.owner = THIS_MODULE,
.name = "btrfs",
- .get_sb = btrfs_get_sb,
+ .mount = btrfs_mount,
.kill_sb = kill_anon_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 66e4c66cc63b..1fffbc017bdf 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -163,6 +163,7 @@ enum btrfs_trans_type {
TRANS_START,
TRANS_JOIN,
TRANS_USERSPACE,
+ TRANS_JOIN_NOLOCK,
};
static int may_wait_transaction(struct btrfs_root *root, int type)
@@ -179,14 +180,14 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
{
struct btrfs_trans_handle *h;
struct btrfs_transaction *cur_trans;
- int retries = 0;
int ret;
again:
h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
if (!h)
return ERR_PTR(-ENOMEM);
- mutex_lock(&root->fs_info->trans_mutex);
+ if (type != TRANS_JOIN_NOLOCK)
+ mutex_lock(&root->fs_info->trans_mutex);
if (may_wait_transaction(root, type))
wait_current_trans(root);
@@ -195,7 +196,8 @@ again:
cur_trans = root->fs_info->running_transaction;
cur_trans->use_count++;
- mutex_unlock(&root->fs_info->trans_mutex);
+ if (type != TRANS_JOIN_NOLOCK)
+ mutex_unlock(&root->fs_info->trans_mutex);
h->transid = cur_trans->transid;
h->transaction = cur_trans;
@@ -212,8 +214,7 @@ again:
}
if (num_items > 0) {
- ret = btrfs_trans_reserve_metadata(h, root, num_items,
- &retries);
+ ret = btrfs_trans_reserve_metadata(h, root, num_items);
if (ret == -EAGAIN) {
btrfs_commit_transaction(h, root);
goto again;
@@ -224,9 +225,11 @@ again:
}
}
- mutex_lock(&root->fs_info->trans_mutex);
+ if (type != TRANS_JOIN_NOLOCK)
+ mutex_lock(&root->fs_info->trans_mutex);
record_root_in_trans(h, root);
- mutex_unlock(&root->fs_info->trans_mutex);
+ if (type != TRANS_JOIN_NOLOCK)
+ mutex_unlock(&root->fs_info->trans_mutex);
if (!current->journal_info && type != TRANS_USERSPACE)
current->journal_info = h;
@@ -244,6 +247,12 @@ struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
return start_transaction(root, 0, TRANS_JOIN);
}
+struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
+ int num_blocks)
+{
+ return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
+}
+
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
int num_blocks)
{
@@ -270,6 +279,58 @@ static noinline int wait_for_commit(struct btrfs_root *root,
return 0;
}
+int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
+{
+ struct btrfs_transaction *cur_trans = NULL, *t;
+ int ret;
+
+ mutex_lock(&root->fs_info->trans_mutex);
+
+ ret = 0;
+ if (transid) {
+ if (transid <= root->fs_info->last_trans_committed)
+ goto out_unlock;
+
+ /* find specified transaction */
+ list_for_each_entry(t, &root->fs_info->trans_list, list) {
+ if (t->transid == transid) {
+ cur_trans = t;
+ break;
+ }
+ if (t->transid > transid)
+ break;
+ }
+ ret = -EINVAL;
+ if (!cur_trans)
+ goto out_unlock; /* bad transid */
+ } else {
+ /* find newest transaction that is committing | committed */
+ list_for_each_entry_reverse(t, &root->fs_info->trans_list,
+ list) {
+ if (t->in_commit) {
+ if (t->commit_done)
+ goto out_unlock;
+ cur_trans = t;
+ break;
+ }
+ }
+ if (!cur_trans)
+ goto out_unlock; /* nothing committing|committed */
+ }
+
+ cur_trans->use_count++;
+ mutex_unlock(&root->fs_info->trans_mutex);
+
+ wait_for_commit(root, cur_trans);
+
+ mutex_lock(&root->fs_info->trans_mutex);
+ put_transaction(cur_trans);
+ ret = 0;
+out_unlock:
+ mutex_unlock(&root->fs_info->trans_mutex);
+ return ret;
+}
+
#if 0
/*
* rate limit against the drop_snapshot code. This helps to slow down new
@@ -348,7 +409,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
}
static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, int throttle)
+ struct btrfs_root *root, int throttle, int lock)
{
struct btrfs_transaction *cur_trans = trans->transaction;
struct btrfs_fs_info *info = root->fs_info;
@@ -376,26 +437,29 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
btrfs_trans_release_metadata(trans, root);
- if (!root->fs_info->open_ioctl_trans &&
+ if (lock && !root->fs_info->open_ioctl_trans &&
should_end_transaction(trans, root))
trans->transaction->blocked = 1;
- if (cur_trans->blocked && !cur_trans->in_commit) {
+ if (lock && cur_trans->blocked && !cur_trans->in_commit) {
if (throttle)
return btrfs_commit_transaction(trans, root);
else
wake_up_process(info->transaction_kthread);
}
- mutex_lock(&info->trans_mutex);
+ if (lock)
+ mutex_lock(&info->trans_mutex);
WARN_ON(cur_trans != info->running_transaction);
WARN_ON(cur_trans->num_writers < 1);
cur_trans->num_writers--;
+ smp_mb();
if (waitqueue_active(&cur_trans->writer_wait))
wake_up(&cur_trans->writer_wait);
put_transaction(cur_trans);
- mutex_unlock(&info->trans_mutex);
+ if (lock)
+ mutex_unlock(&info->trans_mutex);
if (current->journal_info == trans)
current->journal_info = NULL;
@@ -411,13 +475,19 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
- return __btrfs_end_transaction(trans, root, 0);
+ return __btrfs_end_transaction(trans, root, 0, 1);
}
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
- return __btrfs_end_transaction(trans, root, 1);
+ return __btrfs_end_transaction(trans, root, 1, 1);
+}
+
+int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ return __btrfs_end_transaction(trans, root, 0, 0);
}
/*
@@ -836,7 +906,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
struct extent_buffer *tmp;
struct extent_buffer *old;
int ret;
- int retries = 0;
u64 to_reserve = 0;
u64 index = 0;
u64 objectid;
@@ -858,7 +927,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
if (to_reserve > 0) {
ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv,
- to_reserve, &retries);
+ to_reserve);
if (ret) {
pending->error = ret;
goto fail;
@@ -966,6 +1035,8 @@ static void update_super_roots(struct btrfs_root *root)
super->root = root_item->bytenr;
super->generation = root_item->generation;
super->root_level = root_item->level;
+ if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE))
+ super->cache_generation = root_item->generation;
}
int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
@@ -988,11 +1059,127 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info)
return ret;
}
+/*
+ * wait for the current transaction commit to start and block subsequent
+ * transaction joins
+ */
+static void wait_current_trans_commit_start(struct btrfs_root *root,
+ struct btrfs_transaction *trans)
+{
+ DEFINE_WAIT(wait);
+
+ if (trans->in_commit)
+ return;
+
+ while (1) {
+ prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ if (trans->in_commit) {
+ finish_wait(&root->fs_info->transaction_blocked_wait,
+ &wait);
+ break;
+ }
+ mutex_unlock(&root->fs_info->trans_mutex);
+ schedule();
+ mutex_lock(&root->fs_info->trans_mutex);
+ finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
+ }
+}
+
+/*
+ * wait for the current transaction to start and then become unblocked.
+ * caller holds ref.
+ */
+static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
+ struct btrfs_transaction *trans)
+{
+ DEFINE_WAIT(wait);
+
+ if (trans->commit_done || (trans->in_commit && !trans->blocked))
+ return;
+
+ while (1) {
+ prepare_to_wait(&root->fs_info->transaction_wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ if (trans->commit_done ||
+ (trans->in_commit && !trans->blocked)) {
+ finish_wait(&root->fs_info->transaction_wait,
+ &wait);
+ break;
+ }
+ mutex_unlock(&root->fs_info->trans_mutex);
+ schedule();
+ mutex_lock(&root->fs_info->trans_mutex);
+ finish_wait(&root->fs_info->transaction_wait,
+ &wait);
+ }
+}
+
+/*
+ * commit transactions asynchronously. once btrfs_commit_transaction_async
+ * returns, any subsequent transaction will not be allowed to join.
+ */
+struct btrfs_async_commit {
+ struct btrfs_trans_handle *newtrans;
+ struct btrfs_root *root;
+ struct delayed_work work;
+};
+
+static void do_async_commit(struct work_struct *work)
+{
+ struct btrfs_async_commit *ac =
+ container_of(work, struct btrfs_async_commit, work.work);
+
+ btrfs_commit_transaction(ac->newtrans, ac->root);
+ kfree(ac);
+}
+
+int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ int wait_for_unblock)
+{
+ struct btrfs_async_commit *ac;
+ struct btrfs_transaction *cur_trans;
+
+ ac = kmalloc(sizeof(*ac), GFP_NOFS);
+ BUG_ON(!ac);
+
+ INIT_DELAYED_WORK(&ac->work, do_async_commit);
+ ac->root = root;
+ ac->newtrans = btrfs_join_transaction(root, 0);
+
+ /* take transaction reference */
+ mutex_lock(&root->fs_info->trans_mutex);
+ cur_trans = trans->transaction;
+ cur_trans->use_count++;
+ mutex_unlock(&root->fs_info->trans_mutex);
+
+ btrfs_end_transaction(trans, root);
+ schedule_delayed_work(&ac->work, 0);
+
+ /* wait for transaction to start and unblock */
+ mutex_lock(&root->fs_info->trans_mutex);
+ if (wait_for_unblock)
+ wait_current_trans_commit_start_and_unblock(root, cur_trans);
+ else
+ wait_current_trans_commit_start(root, cur_trans);
+ put_transaction(cur_trans);
+ mutex_unlock(&root->fs_info->trans_mutex);
+
+ return 0;
+}
+
+/*
+ * btrfs_transaction state sequence:
+ * in_commit = 0, blocked = 0 (initial)
+ * in_commit = 1, blocked = 1
+ * blocked = 0
+ * commit_done = 1
+ */
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
unsigned long joined = 0;
- unsigned long timeout = 1;
struct btrfs_transaction *cur_trans;
struct btrfs_transaction *prev_trans = NULL;
DEFINE_WAIT(wait);
@@ -1039,6 +1226,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
trans->transaction->in_commit = 1;
trans->transaction->blocked = 1;
+ wake_up(&root->fs_info->transaction_blocked_wait);
+
if (cur_trans->list.prev != &root->fs_info->trans_list) {
prev_trans = list_entry(cur_trans->list.prev,
struct btrfs_transaction, list);
@@ -1063,11 +1252,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
snap_pending = 1;
WARN_ON(cur_trans != trans->transaction);
- if (cur_trans->num_writers > 1)
- timeout = MAX_SCHEDULE_TIMEOUT;
- else if (should_grow)
- timeout = 1;
-
mutex_unlock(&root->fs_info->trans_mutex);
if (flush_on_commit || snap_pending) {
@@ -1089,8 +1273,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
TASK_UNINTERRUPTIBLE);
smp_mb();
- if (cur_trans->num_writers > 1 || should_grow)
- schedule_timeout(timeout);
+ if (cur_trans->num_writers > 1)
+ schedule_timeout(MAX_SCHEDULE_TIMEOUT);
+ else if (should_grow)
+ schedule_timeout(1);
mutex_lock(&root->fs_info->trans_mutex);
finish_wait(&cur_trans->writer_wait, &wait);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index e104986d0bfd..f104b57ad4ef 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -87,12 +87,17 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
+int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
int num_items);
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
int num_blocks);
+struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
+ int num_blocks);
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
int num_blocks);
+int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
@@ -104,6 +109,9 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
int btrfs_clean_old_snapshots(struct btrfs_root *root);
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
+int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ int wait_for_unblock);
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index f7ac8e013ed7..992ab425599d 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -36,7 +36,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
int ret = 0;
int wret;
int level;
- int orig_level;
int is_extent = 0;
int next_key_ret = 0;
u64 last_ret = 0;
@@ -64,7 +63,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
return -ENOMEM;
level = btrfs_header_level(root->node);
- orig_level = level;
if (level == 0)
goto out;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index fb102a9aee9c..a29f19384a27 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -786,7 +786,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
{
struct inode *dir;
int ret;
- struct btrfs_key location;
struct btrfs_inode_ref *ref;
struct btrfs_dir_item *di;
struct inode *inode;
@@ -795,10 +794,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
unsigned long ref_ptr;
unsigned long ref_end;
- location.objectid = key->objectid;
- location.type = BTRFS_INODE_ITEM_KEY;
- location.offset = 0;
-
/*
* it is possible that we didn't log all the parent directories
* for a given inode. If we don't find the dir, just don't
@@ -1583,7 +1578,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
struct btrfs_path *path;
struct btrfs_root *root = wc->replay_dest;
struct btrfs_key key;
- u32 item_size;
int level;
int i;
int ret;
@@ -1601,7 +1595,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
nritems = btrfs_header_nritems(eb);
for (i = 0; i < nritems; i++) {
btrfs_item_key_to_cpu(eb, &key, i);
- item_size = btrfs_item_size_nr(eb, i);
/* inode keys are done during the first stage */
if (key.type == BTRFS_INODE_ITEM_KEY &&
@@ -1668,7 +1661,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
struct walk_control *wc)
{
u64 root_owner;
- u64 root_gen;
u64 bytenr;
u64 ptr_gen;
struct extent_buffer *next;
@@ -1698,7 +1690,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
parent = path->nodes[*level];
root_owner = btrfs_header_owner(parent);
- root_gen = btrfs_header_generation(parent);
next = btrfs_find_create_tree_block(root, bytenr, blocksize);
@@ -1749,7 +1740,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
struct walk_control *wc)
{
u64 root_owner;
- u64 root_gen;
int i;
int slot;
int ret;
@@ -1757,8 +1747,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
slot = path->slots[i];
if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
- struct extent_buffer *node;
- node = path->nodes[i];
path->slots[i]++;
*level = i;
WARN_ON(*level == 0);
@@ -1771,7 +1759,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
parent = path->nodes[*level + 1];
root_owner = btrfs_header_owner(parent);
- root_gen = btrfs_header_generation(parent);
wc->process_func(root, path->nodes[*level], wc,
btrfs_header_generation(path->nodes[*level]));
if (wc->free) {
@@ -2273,7 +2260,7 @@ fail:
}
btrfs_end_log_trans(root);
- return 0;
+ return err;
}
/* see comments for btrfs_del_dir_entries_in_log */
@@ -2729,7 +2716,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct btrfs_key max_key;
struct btrfs_root *log = root->log_root;
struct extent_buffer *src = NULL;
- u32 size;
int err = 0;
int ret;
int nritems;
@@ -2793,7 +2779,6 @@ again:
break;
src = path->nodes[0];
- size = btrfs_item_size_nr(src, path->slots[0]);
if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
ins_nr++;
goto next_slot;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e25e46a8b4e2..cc04dc1445d6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1898,7 +1898,6 @@ int btrfs_balance(struct btrfs_root *dev_root)
u64 size_to_free;
struct btrfs_path *path;
struct btrfs_key key;
- struct btrfs_chunk *chunk;
struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
struct btrfs_trans_handle *trans;
struct btrfs_key found_key;
@@ -1962,9 +1961,6 @@ int btrfs_balance(struct btrfs_root *dev_root)
if (found_key.objectid != key.objectid)
break;
- chunk = btrfs_item_ptr(path->nodes[0],
- path->slots[0],
- struct btrfs_chunk);
/* chunk zero is special */
if (found_key.offset == 0)
break;
@@ -3031,8 +3027,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
}
bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
dev = multi->stripes[dev_nr].dev;
- BUG_ON(rw == WRITE && !dev->writeable);
- if (dev && dev->bdev) {
+ if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
bio->bi_bdev = dev->bdev;
if (async_submit)
schedule_bio(root, dev, rw, bio);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 88ecbb215878..698fdd2c739c 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -178,7 +178,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
struct inode *inode = dentry->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_path *path;
- struct btrfs_item *item;
struct extent_buffer *leaf;
struct btrfs_dir_item *di;
int ret = 0, slot, advance;
@@ -234,7 +233,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
}
advance = 1;
- item = btrfs_item_nr(leaf, slot);
btrfs_item_key_to_cpu(leaf, &found_key, slot);
/* check to make sure this item is what we want */
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 3e2b90eaa239..b9cd5445f71c 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -199,8 +199,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
int nr_pages = 0;
struct page *in_page = NULL;
struct page *out_page = NULL;
- int out_written = 0;
- int in_read = 0;
unsigned long bytes_left;
*out_pages = 0;
@@ -233,9 +231,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
- out_written = 0;
- in_read = 0;
-
while (workspace->def_strm.total_in < len) {
ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
if (ret != Z_OK) {
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index d6e0e0421891..08b460ae0539 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -635,7 +635,7 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
/*
* mount: join the ceph cluster, and open root directory.
*/
-static int ceph_mount(struct ceph_fs_client *fsc, struct vfsmount *mnt,
+static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
const char *path)
{
int err;
@@ -678,16 +678,14 @@ static int ceph_mount(struct ceph_fs_client *fsc, struct vfsmount *mnt,
}
}
- mnt->mnt_root = root;
- mnt->mnt_sb = fsc->sb;
-
fsc->mount_state = CEPH_MOUNT_MOUNTED;
dout("mount success\n");
- err = 0;
+ mutex_unlock(&fsc->client->mount_mutex);
+ return root;
out:
mutex_unlock(&fsc->client->mount_mutex);
- return err;
+ return ERR_PTR(err);
fail:
if (first) {
@@ -777,41 +775,45 @@ static int ceph_register_bdi(struct super_block *sb,
return err;
}
-static int ceph_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data,
- struct vfsmount *mnt)
+static struct dentry *ceph_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
struct super_block *sb;
struct ceph_fs_client *fsc;
+ struct dentry *res;
int err;
int (*compare_super)(struct super_block *, void *) = ceph_compare_super;
const char *path = NULL;
struct ceph_mount_options *fsopt = NULL;
struct ceph_options *opt = NULL;
- dout("ceph_get_sb\n");
+ dout("ceph_mount\n");
err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path);
- if (err < 0)
+ if (err < 0) {
+ res = ERR_PTR(err);
goto out_final;
+ }
/* create client (which we may/may not use) */
fsc = create_fs_client(fsopt, opt);
if (IS_ERR(fsc)) {
- err = PTR_ERR(fsc);
+ res = ERR_CAST(fsc);
kfree(fsopt);
kfree(opt);
goto out_final;
}
err = ceph_mdsc_init(fsc);
- if (err < 0)
+ if (err < 0) {
+ res = ERR_PTR(err);
goto out;
+ }
if (ceph_test_opt(fsc->client, NOSHARE))
compare_super = NULL;
sb = sget(fs_type, compare_super, ceph_set_super, fsc);
if (IS_ERR(sb)) {
- err = PTR_ERR(sb);
+ res = ERR_CAST(sb);
goto out;
}
@@ -823,16 +825,18 @@ static int ceph_get_sb(struct file_system_type *fs_type,
} else {
dout("get_sb using new client %p\n", fsc);
err = ceph_register_bdi(sb, fsc);
- if (err < 0)
+ if (err < 0) {
+ res = ERR_PTR(err);
goto out_splat;
+ }
}
- err = ceph_mount(fsc, mnt, path);
- if (err < 0)
+ res = ceph_real_mount(fsc, path);
+ if (IS_ERR(res))
goto out_splat;
- dout("root %p inode %p ino %llx.%llx\n", mnt->mnt_root,
- mnt->mnt_root->d_inode, ceph_vinop(mnt->mnt_root->d_inode));
- return 0;
+ dout("root %p inode %p ino %llx.%llx\n", res,
+ res->d_inode, ceph_vinop(res->d_inode));
+ return res;
out_splat:
ceph_mdsc_close_sessions(fsc->mdsc);
@@ -843,8 +847,8 @@ out:
ceph_mdsc_destroy(fsc);
destroy_fs_client(fsc);
out_final:
- dout("ceph_get_sb fail %d\n", err);
- return err;
+ dout("ceph_mount fail %ld\n", PTR_ERR(res));
+ return res;
}
static void ceph_kill_sb(struct super_block *s)
@@ -860,7 +864,7 @@ static void ceph_kill_sb(struct super_block *s)
static struct file_system_type ceph_fs_type = {
.owner = THIS_MODULE,
.name = "ceph",
- .get_sb = ceph_get_sb,
+ .mount = ceph_mount,
.kill_sb = ceph_kill_sb,
.fs_flags = FS_RENAME_DOES_D_MOVE,
};
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 917b7d449bb2..0ed213970ced 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -2,6 +2,9 @@ config CIFS
tristate "CIFS support (advanced network filesystem, SMBFS successor)"
depends on INET
select NLS
+ select CRYPTO
+ select CRYPTO_MD5
+ select CRYPTO_ARC4
help
This is the client VFS module for the Common Internet File System
(CIFS) protocol which is the successor to the Server Message Block
diff --git a/fs/cifs/TODO b/fs/cifs/TODO
index 5aff46c61e52..355abcdcda98 100644
--- a/fs/cifs/TODO
+++ b/fs/cifs/TODO
@@ -81,7 +81,7 @@ u) DOS attrs - returned as pseudo-xattr in Samba format (check VFAT and NTFS for
v) mount check for unmatched uids
-w) Add support for new vfs entry points for setlease and fallocate
+w) Add support for new vfs entry point for fallocate
x) Fix Samba 3 server to handle Linux kernel aio so dbench with lots of
processes can proceed better in parallel (on the server)
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 525ba59a4105..e9a393c9c2ca 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -15,7 +15,7 @@
* the GNU Lesser General Public License for more details.
*
*/
-#include <linux/radix-tree.h>
+#include <linux/rbtree.h>
#ifndef _CIFS_FS_SB_H
#define _CIFS_FS_SB_H
@@ -42,9 +42,9 @@
#define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */
struct cifs_sb_info {
- struct radix_tree_root tlink_tree;
-#define CIFS_TLINK_MASTER_TAG 0 /* is "master" (mount) tcon */
+ struct rb_root tlink_tree;
spinlock_t tlink_tree_lock;
+ struct tcon_link *master_tlink;
struct nls_table *local_nls;
unsigned int rsize;
unsigned int wsize;
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 7ac0056294cf..f856732161ab 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -43,18 +43,32 @@ extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
unsigned char *p24);
static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
- const struct session_key *key, char *signature)
+ struct TCP_Server_Info *server, char *signature)
{
- struct MD5Context context;
+ int rc;
- if ((cifs_pdu == NULL) || (signature == NULL) || (key == NULL))
+ if (cifs_pdu == NULL || signature == NULL || server == NULL)
return -EINVAL;
- cifs_MD5_init(&context);
- cifs_MD5_update(&context, (char *)&key->data, key->len);
- cifs_MD5_update(&context, cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
+ if (!server->secmech.sdescmd5) {
+ cERROR(1, "%s: Can't generate signature\n", __func__);
+ return -1;
+ }
+
+ rc = crypto_shash_init(&server->secmech.sdescmd5->shash);
+ if (rc) {
+ cERROR(1, "%s: Oould not init md5\n", __func__);
+ return rc;
+ }
+
+ crypto_shash_update(&server->secmech.sdescmd5->shash,
+ server->session_key.response, server->session_key.len);
+
+ crypto_shash_update(&server->secmech.sdescmd5->shash,
+ cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
+
+ rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
- cifs_MD5_final(signature, &context);
return 0;
}
@@ -79,8 +93,7 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
server->sequence_number++;
spin_unlock(&GlobalMid_Lock);
- rc = cifs_calculate_signature(cifs_pdu, &server->session_key,
- smb_signature);
+ rc = cifs_calculate_signature(cifs_pdu, server, smb_signature);
if (rc)
memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
else
@@ -90,16 +103,28 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
}
static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
- const struct session_key *key, char *signature)
+ struct TCP_Server_Info *server, char *signature)
{
- struct MD5Context context;
int i;
+ int rc;
- if ((iov == NULL) || (signature == NULL) || (key == NULL))
+ if (iov == NULL || signature == NULL || server == NULL)
return -EINVAL;
- cifs_MD5_init(&context);
- cifs_MD5_update(&context, (char *)&key->data, key->len);
+ if (!server->secmech.sdescmd5) {
+ cERROR(1, "%s: Can't generate signature\n", __func__);
+ return -1;
+ }
+
+ rc = crypto_shash_init(&server->secmech.sdescmd5->shash);
+ if (rc) {
+ cERROR(1, "%s: Oould not init md5\n", __func__);
+ return rc;
+ }
+
+ crypto_shash_update(&server->secmech.sdescmd5->shash,
+ server->session_key.response, server->session_key.len);
+
for (i = 0; i < n_vec; i++) {
if (iov[i].iov_len == 0)
continue;
@@ -112,18 +137,18 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
if (i == 0) {
if (iov[0].iov_len <= 8) /* cmd field at offset 9 */
break; /* nothing to sign or corrupt header */
- cifs_MD5_update(&context, iov[0].iov_base+4,
- iov[0].iov_len-4);
+ crypto_shash_update(&server->secmech.sdescmd5->shash,
+ iov[i].iov_base + 4, iov[i].iov_len - 4);
} else
- cifs_MD5_update(&context, iov[i].iov_base, iov[i].iov_len);
+ crypto_shash_update(&server->secmech.sdescmd5->shash,
+ iov[i].iov_base, iov[i].iov_len);
}
- cifs_MD5_final(signature, &context);
+ rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
- return 0;
+ return rc;
}
-
int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
__u32 *pexpected_response_sequence_number)
{
@@ -146,8 +171,7 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
server->sequence_number++;
spin_unlock(&GlobalMid_Lock);
- rc = cifs_calc_signature2(iov, n_vec, &server->session_key,
- smb_signature);
+ rc = cifs_calc_signature2(iov, n_vec, server, smb_signature);
if (rc)
memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
else
@@ -157,14 +181,14 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
}
int cifs_verify_signature(struct smb_hdr *cifs_pdu,
- const struct session_key *session_key,
+ struct TCP_Server_Info *server,
__u32 expected_sequence_number)
{
unsigned int rc;
char server_response_sig[8];
char what_we_think_sig_should_be[20];
- if (cifs_pdu == NULL || session_key == NULL)
+ if (cifs_pdu == NULL || server == NULL)
return -EINVAL;
if (cifs_pdu->Command == SMB_COM_NEGOTIATE)
@@ -193,7 +217,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
cpu_to_le32(expected_sequence_number);
cifs_pdu->Signature.Sequence.Reserved = 0;
- rc = cifs_calculate_signature(cifs_pdu, session_key,
+ rc = cifs_calculate_signature(cifs_pdu, server,
what_we_think_sig_should_be);
if (rc)
@@ -209,18 +233,28 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
}
-/* We fill in key by putting in 40 byte array which was allocated by caller */
-int cifs_calculate_session_key(struct session_key *key, const char *rn,
- const char *password)
+/* first calculate 24 bytes ntlm response and then 16 byte session key */
+int setup_ntlm_response(struct cifsSesInfo *ses)
{
- char temp_key[16];
- if ((key == NULL) || (rn == NULL))
+ unsigned int temp_len = CIFS_SESS_KEY_SIZE + CIFS_AUTH_RESP_SIZE;
+ char temp_key[CIFS_SESS_KEY_SIZE];
+
+ if (!ses)
return -EINVAL;
- E_md4hash(password, temp_key);
- mdfour(key->data.ntlm, temp_key, 16);
- memcpy(key->data.ntlm+16, rn, CIFS_SESS_KEY_SIZE);
- key->len = 40;
+ ses->auth_key.response = kmalloc(temp_len, GFP_KERNEL);
+ if (!ses->auth_key.response) {
+ cERROR(1, "NTLM can't allocate (%u bytes) memory", temp_len);
+ return -ENOMEM;
+ }
+ ses->auth_key.len = temp_len;
+
+ SMBNTencrypt(ses->password, ses->server->cryptkey,
+ ses->auth_key.response + CIFS_SESS_KEY_SIZE);
+
+ E_md4hash(ses->password, temp_key);
+ mdfour(ses->auth_key.response, temp_key, CIFS_SESS_KEY_SIZE);
+
return 0;
}
@@ -294,15 +328,15 @@ build_avpair_blob(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
* two times the unicode length of a server name +
* size of a timestamp (which is 8 bytes).
*/
- ses->tilen = size + 2 * (2 * dlen) + 2 * (2 * wlen) + 8;
- ses->tiblob = kzalloc(ses->tilen, GFP_KERNEL);
- if (!ses->tiblob) {
- ses->tilen = 0;
+ ses->auth_key.len = size + 2 * (2 * dlen) + 2 * (2 * wlen) + 8;
+ ses->auth_key.response = kzalloc(ses->auth_key.len, GFP_KERNEL);
+ if (!ses->auth_key.response) {
+ ses->auth_key.len = 0;
cERROR(1, "Challenge target info allocation failure");
return -ENOMEM;
}
- blobptr = ses->tiblob;
+ blobptr = ses->auth_key.response;
attrptr = (struct ntlmssp2_name *) blobptr;
attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_DOMAIN_NAME);
@@ -357,7 +391,7 @@ build_avpair_blob(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
* about target string i.e. for some, just user name might suffice.
*/
static int
-find_domain_name(struct cifsSesInfo *ses)
+find_domain_name(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
{
unsigned int attrsize;
unsigned int type;
@@ -366,11 +400,11 @@ find_domain_name(struct cifsSesInfo *ses)
unsigned char *blobend;
struct ntlmssp2_name *attrptr;
- if (!ses->tilen || !ses->tiblob)
+ if (!ses->auth_key.len || !ses->auth_key.response)
return 0;
- blobptr = ses->tiblob;
- blobend = ses->tiblob + ses->tilen;
+ blobptr = ses->auth_key.response;
+ blobend = blobptr + ses->auth_key.len;
while (blobptr + onesize < blobend) {
attrptr = (struct ntlmssp2_name *) blobptr;
@@ -386,16 +420,13 @@ find_domain_name(struct cifsSesInfo *ses)
if (!attrsize)
break;
if (!ses->domainName) {
- struct nls_table *default_nls;
ses->domainName =
kmalloc(attrsize + 1, GFP_KERNEL);
if (!ses->domainName)
return -ENOMEM;
- default_nls = load_nls_default();
cifs_from_ucs2(ses->domainName,
(__le16 *)blobptr, attrsize, attrsize,
- default_nls, false);
- unload_nls(default_nls);
+ nls_cp, false);
break;
}
}
@@ -405,82 +436,136 @@ find_domain_name(struct cifsSesInfo *ses)
return 0;
}
-static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
+static int calc_ntlmv2_hash(struct cifsSesInfo *ses, char *ntlmv2_hash,
const struct nls_table *nls_cp)
{
int rc = 0;
int len;
- char nt_hash[16];
- struct HMACMD5Context *pctxt;
+ char nt_hash[CIFS_NTHASH_SIZE];
wchar_t *user;
wchar_t *domain;
+ wchar_t *server;
- pctxt = kmalloc(sizeof(struct HMACMD5Context), GFP_KERNEL);
-
- if (pctxt == NULL)
- return -ENOMEM;
+ if (!ses->server->secmech.sdeschmacmd5) {
+ cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
+ return -1;
+ }
/* calculate md4 hash of password */
E_md4hash(ses->password, nt_hash);
- /* convert Domainname to unicode and uppercase */
- hmac_md5_init_limK_to_64(nt_hash, 16, pctxt);
+ crypto_shash_setkey(ses->server->secmech.hmacmd5, nt_hash,
+ CIFS_NTHASH_SIZE);
+
+ rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
+ if (rc) {
+ cERROR(1, "calc_ntlmv2_hash: could not init hmacmd5\n");
+ return rc;
+ }
/* convert ses->userName to unicode and uppercase */
len = strlen(ses->userName);
user = kmalloc(2 + (len * 2), GFP_KERNEL);
- if (user == NULL)
+ if (user == NULL) {
+ cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n");
+ rc = -ENOMEM;
goto calc_exit_2;
+ }
len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp);
UniStrupr(user);
- hmac_md5_update((char *)user, 2*len, pctxt);
+
+ crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+ (char *)user, 2 * len);
/* convert ses->domainName to unicode and uppercase */
if (ses->domainName) {
len = strlen(ses->domainName);
domain = kmalloc(2 + (len * 2), GFP_KERNEL);
- if (domain == NULL)
+ if (domain == NULL) {
+ cERROR(1, "calc_ntlmv2_hash: domain mem alloc failure");
+ rc = -ENOMEM;
goto calc_exit_1;
+ }
len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len,
nls_cp);
- /* the following line was removed since it didn't work well
- with lower cased domain name that passed as an option.
- Maybe converting the domain name earlier makes sense */
- /* UniStrupr(domain); */
-
- hmac_md5_update((char *)domain, 2*len, pctxt);
-
+ crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+ (char *)domain, 2 * len);
kfree(domain);
+ } else if (ses->serverName) {
+ len = strlen(ses->serverName);
+
+ server = kmalloc(2 + (len * 2), GFP_KERNEL);
+ if (server == NULL) {
+ cERROR(1, "calc_ntlmv2_hash: server mem alloc failure");
+ rc = -ENOMEM;
+ goto calc_exit_1;
+ }
+ len = cifs_strtoUCS((__le16 *)server, ses->serverName, len,
+ nls_cp);
+ crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+ (char *)server, 2 * len);
+ kfree(server);
}
+
+ rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
+ ntlmv2_hash);
+
calc_exit_1:
kfree(user);
calc_exit_2:
- /* BB FIXME what about bytes 24 through 40 of the signing key?
- compare with the NTLM example */
- hmac_md5_final(ses->ntlmv2_hash, pctxt);
+ return rc;
+}
+
+static int
+CalcNTLMv2_response(const struct cifsSesInfo *ses, char *ntlmv2_hash)
+{
+ int rc;
+ unsigned int offset = CIFS_SESS_KEY_SIZE + 8;
+
+ if (!ses->server->secmech.sdeschmacmd5) {
+ cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
+ return -1;
+ }
+
+ crypto_shash_setkey(ses->server->secmech.hmacmd5,
+ ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
+
+ rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
+ if (rc) {
+ cERROR(1, "CalcNTLMv2_response: could not init hmacmd5");
+ return rc;
+ }
+
+ if (ses->server->secType == RawNTLMSSP)
+ memcpy(ses->auth_key.response + offset,
+ ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
+ else
+ memcpy(ses->auth_key.response + offset,
+ ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
+ crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+ ses->auth_key.response + offset, ses->auth_key.len - offset);
+
+ rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
+ ses->auth_key.response + CIFS_SESS_KEY_SIZE);
- kfree(pctxt);
return rc;
}
+
int
-setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
- const struct nls_table *nls_cp)
+setup_ntlmv2_rsp(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
{
int rc;
- struct ntlmv2_resp *buf = (struct ntlmv2_resp *)resp_buf;
- struct HMACMD5Context context;
-
- buf->blob_signature = cpu_to_le32(0x00000101);
- buf->reserved = 0;
- buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
- get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
- buf->reserved2 = 0;
+ int baselen;
+ unsigned int tilen;
+ struct ntlmv2_resp *buf;
+ char ntlmv2_hash[16];
+ unsigned char *tiblob = NULL; /* target info blob */
if (ses->server->secType == RawNTLMSSP) {
if (!ses->domainName) {
- rc = find_domain_name(ses);
+ rc = find_domain_name(ses, nls_cp);
if (rc) {
cERROR(1, "error %d finding domain name", rc);
goto setup_ntlmv2_rsp_ret;
@@ -490,51 +575,179 @@ setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
rc = build_avpair_blob(ses, nls_cp);
if (rc) {
cERROR(1, "error %d building av pair blob", rc);
- return rc;
+ goto setup_ntlmv2_rsp_ret;
}
}
- /* calculate buf->ntlmv2_hash */
- rc = calc_ntlmv2_hash(ses, nls_cp);
+ baselen = CIFS_SESS_KEY_SIZE + sizeof(struct ntlmv2_resp);
+ tilen = ses->auth_key.len;
+ tiblob = ses->auth_key.response;
+
+ ses->auth_key.response = kmalloc(baselen + tilen, GFP_KERNEL);
+ if (!ses->auth_key.response) {
+ rc = ENOMEM;
+ ses->auth_key.len = 0;
+ cERROR(1, "%s: Can't allocate auth blob", __func__);
+ goto setup_ntlmv2_rsp_ret;
+ }
+ ses->auth_key.len += baselen;
+
+ buf = (struct ntlmv2_resp *)
+ (ses->auth_key.response + CIFS_SESS_KEY_SIZE);
+ buf->blob_signature = cpu_to_le32(0x00000101);
+ buf->reserved = 0;
+ buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
+ get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
+ buf->reserved2 = 0;
+
+ memcpy(ses->auth_key.response + baselen, tiblob, tilen);
+
+ /* calculate ntlmv2_hash */
+ rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp);
if (rc) {
cERROR(1, "could not get v2 hash rc %d", rc);
goto setup_ntlmv2_rsp_ret;
}
- CalcNTLMv2_response(ses, resp_buf);
+
+ /* calculate first part of the client response (CR1) */
+ rc = CalcNTLMv2_response(ses, ntlmv2_hash);
+ if (rc) {
+ cERROR(1, "Could not calculate CR1 rc: %d", rc);
+ goto setup_ntlmv2_rsp_ret;
+ }
/* now calculate the session key for NTLMv2 */
- hmac_md5_init_limK_to_64(ses->ntlmv2_hash, 16, &context);
- hmac_md5_update(resp_buf, 16, &context);
- hmac_md5_final(ses->auth_key.data.ntlmv2.key, &context);
+ crypto_shash_setkey(ses->server->secmech.hmacmd5,
+ ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
+
+ rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
+ if (rc) {
+ cERROR(1, "%s: Could not init hmacmd5\n", __func__);
+ goto setup_ntlmv2_rsp_ret;
+ }
- memcpy(&ses->auth_key.data.ntlmv2.resp, resp_buf,
- sizeof(struct ntlmv2_resp));
- ses->auth_key.len = 16 + sizeof(struct ntlmv2_resp);
+ crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+ ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+ CIFS_HMAC_MD5_HASH_SIZE);
- return 0;
+ rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
+ ses->auth_key.response);
setup_ntlmv2_rsp_ret:
- kfree(ses->tiblob);
- ses->tiblob = NULL;
- ses->tilen = 0;
+ kfree(tiblob);
return rc;
}
-void CalcNTLMv2_response(const struct cifsSesInfo *ses,
- char *v2_session_response)
+int
+calc_seckey(struct cifsSesInfo *ses)
{
- struct HMACMD5Context context;
- /* rest of v2 struct already generated */
- memcpy(v2_session_response + 8, ses->cryptKey, 8);
- hmac_md5_init_limK_to_64(ses->ntlmv2_hash, 16, &context);
+ int rc;
+ struct crypto_blkcipher *tfm_arc4;
+ struct scatterlist sgin, sgout;
+ struct blkcipher_desc desc;
+ unsigned char sec_key[CIFS_SESS_KEY_SIZE]; /* a nonce */
+
+ get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE);
+
+ tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
+ if (!tfm_arc4 || IS_ERR(tfm_arc4)) {
+ cERROR(1, "could not allocate crypto API arc4\n");
+ return PTR_ERR(tfm_arc4);
+ }
- hmac_md5_update(v2_session_response+8,
- sizeof(struct ntlmv2_resp) - 8, &context);
+ desc.tfm = tfm_arc4;
- if (ses->tilen)
- hmac_md5_update(ses->tiblob, ses->tilen, &context);
+ crypto_blkcipher_setkey(tfm_arc4, ses->auth_key.response,
+ CIFS_SESS_KEY_SIZE);
- hmac_md5_final(v2_session_response, &context);
-/* cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); */
+ sg_init_one(&sgin, sec_key, CIFS_SESS_KEY_SIZE);
+ sg_init_one(&sgout, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
+
+ rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, CIFS_CPHTXT_SIZE);
+ if (rc) {
+ cERROR(1, "could not encrypt session key rc: %d\n", rc);
+ crypto_free_blkcipher(tfm_arc4);
+ return rc;
+ }
+
+ /* make secondary_key/nonce as session key */
+ memcpy(ses->auth_key.response, sec_key, CIFS_SESS_KEY_SIZE);
+ /* and make len as that of session key only */
+ ses->auth_key.len = CIFS_SESS_KEY_SIZE;
+
+ crypto_free_blkcipher(tfm_arc4);
+
+ return 0;
+}
+
+void
+cifs_crypto_shash_release(struct TCP_Server_Info *server)
+{
+ if (server->secmech.md5)
+ crypto_free_shash(server->secmech.md5);
+
+ if (server->secmech.hmacmd5)
+ crypto_free_shash(server->secmech.hmacmd5);
+
+ kfree(server->secmech.sdeschmacmd5);
+
+ kfree(server->secmech.sdescmd5);
+}
+
+int
+cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
+{
+ int rc;
+ unsigned int size;
+
+ server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0);
+ if (!server->secmech.hmacmd5 ||
+ IS_ERR(server->secmech.hmacmd5)) {
+ cERROR(1, "could not allocate crypto hmacmd5\n");
+ return PTR_ERR(server->secmech.hmacmd5);
+ }
+
+ server->secmech.md5 = crypto_alloc_shash("md5", 0, 0);
+ if (!server->secmech.md5 || IS_ERR(server->secmech.md5)) {
+ cERROR(1, "could not allocate crypto md5\n");
+ rc = PTR_ERR(server->secmech.md5);
+ goto crypto_allocate_md5_fail;
+ }
+
+ size = sizeof(struct shash_desc) +
+ crypto_shash_descsize(server->secmech.hmacmd5);
+ server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL);
+ if (!server->secmech.sdeschmacmd5) {
+ cERROR(1, "cifs_crypto_shash_allocate: can't alloc hmacmd5\n");
+ rc = -ENOMEM;
+ goto crypto_allocate_hmacmd5_sdesc_fail;
+ }
+ server->secmech.sdeschmacmd5->shash.tfm = server->secmech.hmacmd5;
+ server->secmech.sdeschmacmd5->shash.flags = 0x0;
+
+
+ size = sizeof(struct shash_desc) +
+ crypto_shash_descsize(server->secmech.md5);
+ server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL);
+ if (!server->secmech.sdescmd5) {
+ cERROR(1, "cifs_crypto_shash_allocate: can't alloc md5\n");
+ rc = -ENOMEM;
+ goto crypto_allocate_md5_sdesc_fail;
+ }
+ server->secmech.sdescmd5->shash.tfm = server->secmech.md5;
+ server->secmech.sdescmd5->shash.flags = 0x0;
+
+ return 0;
+
+crypto_allocate_md5_sdesc_fail:
+ kfree(server->secmech.sdeschmacmd5);
+
+crypto_allocate_hmacmd5_sdesc_fail:
+ crypto_free_shash(server->secmech.md5);
+
+crypto_allocate_md5_fail:
+ crypto_free_shash(server->secmech.hmacmd5);
+
+ return rc;
}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 34371637f210..9c3789762ab7 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -116,7 +116,7 @@ cifs_read_super(struct super_block *sb, void *data,
return -ENOMEM;
spin_lock_init(&cifs_sb->tlink_tree_lock);
- INIT_RADIX_TREE(&cifs_sb->tlink_tree, GFP_KERNEL);
+ cifs_sb->tlink_tree = RB_ROOT;
rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY);
if (rc) {
@@ -318,12 +318,10 @@ cifs_alloc_inode(struct super_block *sb)
return NULL;
cifs_inode->cifsAttrs = 0x20; /* default */
cifs_inode->time = 0;
- cifs_inode->write_behind_rc = 0;
/* Until the file is open and we have gotten oplock
info back from the server, can not assume caching of
file data or metadata */
- cifs_inode->clientCanCacheRead = false;
- cifs_inode->clientCanCacheAll = false;
+ cifs_set_oplock_level(cifs_inode, 0);
cifs_inode->delete_pending = false;
cifs_inode->invalid_mapping = false;
cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
@@ -545,9 +543,9 @@ static const struct super_operations cifs_super_ops = {
#endif
};
-static int
-cifs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *
+cifs_do_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
int rc;
struct super_block *sb;
@@ -557,18 +555,17 @@ cifs_get_sb(struct file_system_type *fs_type,
cFYI(1, "Devname: %s flags: %d ", dev_name, flags);
if (IS_ERR(sb))
- return PTR_ERR(sb);
+ return ERR_CAST(sb);
sb->s_flags = flags;
rc = cifs_read_super(sb, data, dev_name, flags & MS_SILENT ? 1 : 0);
if (rc) {
deactivate_locked_super(sb);
- return rc;
+ return ERR_PTR(rc);
}
sb->s_flags |= MS_ACTIVE;
- simple_set_mnt(mnt, sb);
- return 0;
+ return dget(sb->s_root);
}
static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
@@ -634,7 +631,7 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
struct file_system_type cifs_fs_type = {
.owner = THIS_MODULE,
.name = "cifs",
- .get_sb = cifs_get_sb,
+ .mount = cifs_do_mount,
.kill_sb = kill_anon_super,
/* .fs_flags */
};
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index f35795a16b42..897b2b2b28b5 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -112,5 +112,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* EXPERIMENTAL */
-#define CIFS_VERSION "1.67"
+#define CIFS_VERSION "1.68"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 3365e77f6f24..b577bf0a1bb3 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -25,6 +25,9 @@
#include <linux/workqueue.h>
#include "cifs_fs_sb.h"
#include "cifsacl.h"
+#include <crypto/internal/hash.h>
+#include <linux/scatterlist.h>
+
/*
* The sizes of various internal tables and strings
*/
@@ -74,7 +77,7 @@
* CIFS vfs client Status information (based on what we know.)
*/
- /* associated with each tcp and smb session */
+/* associated with each tcp and smb session */
enum statusEnum {
CifsNew = 0,
CifsGood,
@@ -99,14 +102,29 @@ enum protocolEnum {
struct session_key {
unsigned int len;
- union {
- char ntlm[CIFS_SESS_KEY_SIZE + 16];
- char krb5[CIFS_SESS_KEY_SIZE + 16]; /* BB: length correct? */
- struct {
- char key[16];
- struct ntlmv2_resp resp;
- } ntlmv2;
- } data;
+ char *response;
+};
+
+/* crypto security descriptor definition */
+struct sdesc {
+ struct shash_desc shash;
+ char ctx[];
+};
+
+/* crypto hashing related structure/fields, not specific to a sec mech */
+struct cifs_secmech {
+ struct crypto_shash *hmacmd5; /* hmac-md5 hash function */
+ struct crypto_shash *md5; /* md5 hash function */
+ struct sdesc *sdeschmacmd5; /* ctxt to generate ntlmv2 hash, CR1 */
+ struct sdesc *sdescmd5; /* ctxt to generate cifs/smb signature */
+};
+
+/* per smb session structure/fields */
+struct ntlmssp_auth {
+ __u32 client_flags; /* sent by client in type 1 ntlmsssp exchange */
+ __u32 server_flags; /* sent by server in type 2 ntlmssp exchange */
+ unsigned char ciphertext[CIFS_CPHTXT_SIZE]; /* sent to server */
+ char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlmssp */
};
struct cifs_cred {
@@ -179,12 +197,14 @@ struct TCP_Server_Info {
int capabilities; /* allow selective disabling of caps by smb sess */
int timeAdj; /* Adjust for difference in server time zone in sec */
__u16 CurrentMid; /* multiplex id - rotating counter */
+ char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */
/* 16th byte of RFC1001 workstation name is always null */
char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
__u32 sequence_number; /* needed for CIFS PDU signature */
struct session_key session_key;
unsigned long lstrp; /* when we got last response from this server */
u16 dialect; /* dialect index that server chose */
+ struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */
/* extended security flavors that server supports */
bool sec_kerberos; /* supports plain Kerberos */
bool sec_mskerberos; /* supports legacy MS Kerberos */
@@ -222,11 +242,8 @@ struct cifsSesInfo {
char userName[MAX_USERNAME_SIZE + 1];
char *domainName;
char *password;
- char cryptKey[CIFS_CRYPTO_KEY_SIZE];
struct session_key auth_key;
- char ntlmv2_hash[16];
- unsigned int tilen; /* length of the target info blob */
- unsigned char *tiblob; /* target info blob in challenge response */
+ struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */
bool need_reconnect:1; /* connection reset, uid now invalid */
};
/* no more than one of the following three session flags may be set */
@@ -319,7 +336,8 @@ struct cifsTconInfo {
* "get" on the container.
*/
struct tcon_link {
- unsigned long tl_index;
+ struct rb_node tl_rbnode;
+ uid_t tl_uid;
unsigned long tl_flags;
#define TCON_LINK_MASTER 0
#define TCON_LINK_PENDING 1
@@ -395,16 +413,19 @@ struct cifsFileInfo {
struct list_head llist; /* list of byte range locks we have. */
bool invalidHandle:1; /* file closed via session abend */
bool oplock_break_cancelled:1;
- atomic_t count; /* reference count */
+ int count; /* refcount protected by cifs_file_list_lock */
struct mutex fh_mutex; /* prevents reopen race after dead ses*/
struct cifs_search_info srch_inf;
struct work_struct oplock_break; /* work for oplock breaks */
};
-/* Take a reference on the file private data */
+/*
+ * Take a reference on the file private data. Must be called with
+ * cifs_file_list_lock held.
+ */
static inline void cifsFileInfo_get(struct cifsFileInfo *cifs_file)
{
- atomic_inc(&cifs_file->count);
+ ++cifs_file->count;
}
void cifsFileInfo_put(struct cifsFileInfo *cifs_file);
@@ -417,7 +438,6 @@ struct cifsInodeInfo {
struct list_head lockList;
/* BB add in lists for dirty pages i.e. write caching info for oplock */
struct list_head openFileList;
- int write_behind_rc;
__u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */
unsigned long time; /* jiffies of last update/check of inode */
bool clientCanCacheRead:1; /* read oplock */
@@ -668,7 +688,7 @@ require use of the stronger protocol */
* GlobalMid_Lock protects:
* list operations on pending_mid_q and oplockQ
* updates to XID counters, multiplex id and SMB sequence numbers
- * GlobalSMBSesLock protects:
+ * cifs_file_list_lock protects:
* list operations on tcp and SMB session lists and tCon lists
* f_owner.lock protects certain per file struct operations
* mapping->page_lock protects certain per page operations
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b0f4b5656d4c..de36b09763a8 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -131,9 +131,20 @@
#define CIFS_CRYPTO_KEY_SIZE (8)
/*
+ * Size of the ntlm client response
+ */
+#define CIFS_AUTH_RESP_SIZE (24)
+
+/*
* Size of the session key (crypto key encrypted with the password
*/
-#define CIFS_SESS_KEY_SIZE (24)
+#define CIFS_SESS_KEY_SIZE (16)
+
+#define CIFS_CLIENT_CHALLENGE_SIZE (8)
+#define CIFS_SERVER_CHALLENGE_SIZE (8)
+#define CIFS_HMAC_MD5_HASH_SIZE (16)
+#define CIFS_CPHTXT_SIZE (16)
+#define CIFS_NTHASH_SIZE (16)
/*
* Maximum user name length
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index e593c40ba7ba..7ed69b6b5fe6 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -104,6 +104,7 @@ extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
extern u64 cifs_UnixTimeToNT(struct timespec);
extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
int offset);
+extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock);
extern struct cifsFileInfo *cifs_new_fileinfo(__u16 fileHandle,
struct file *file, struct tcon_link *tlink,
@@ -362,13 +363,15 @@ extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *);
extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
__u32 *);
extern int cifs_verify_signature(struct smb_hdr *,
- const struct session_key *session_key,
+ struct TCP_Server_Info *server,
__u32 expected_sequence_number);
-extern int cifs_calculate_session_key(struct session_key *key, const char *rn,
- const char *pass);
-extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *);
-extern int setup_ntlmv2_rsp(struct cifsSesInfo *, char *,
- const struct nls_table *);
+extern void SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *);
+extern int setup_ntlm_response(struct cifsSesInfo *);
+extern int setup_ntlmv2_rsp(struct cifsSesInfo *, const struct nls_table *);
+extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *);
+extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
+extern int calc_seckey(struct cifsSesInfo *);
+
#ifdef CONFIG_CIFS_WEAK_PW_HASH
extern void calc_lanman_hash(const char *password, const char *cryptkey,
bool encrypt, char *lnm_session_key);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index e98f1f317b15..2f2632b6df5a 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -503,7 +503,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
if (rsp->EncryptionKeyLength ==
cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) {
- memcpy(ses->cryptKey, rsp->EncryptionKey,
+ memcpy(ses->server->cryptkey, rsp->EncryptionKey,
CIFS_CRYPTO_KEY_SIZE);
} else if (server->secMode & SECMODE_PW_ENCRYPT) {
rc = -EIO; /* need cryptkey unless plain text */
@@ -574,7 +574,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone);
server->timeAdj *= 60;
if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) {
- memcpy(ses->cryptKey, pSMBr->u.EncryptionKey,
+ memcpy(ses->server->cryptkey, pSMBr->u.EncryptionKey,
CIFS_CRYPTO_KEY_SIZE);
} else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC)
&& (pSMBr->EncryptionKeyLength == 0)) {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 7e73176acb58..251a17c03545 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -116,6 +116,7 @@ struct smb_vol {
static int ipv4_connect(struct TCP_Server_Info *server);
static int ipv6_connect(struct TCP_Server_Info *server);
+static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink);
static void cifs_prune_tlinks(struct work_struct *work);
/*
@@ -175,6 +176,9 @@ cifs_reconnect(struct TCP_Server_Info *server)
}
server->sequence_number = 0;
server->session_estab = false;
+ kfree(server->session_key.response);
+ server->session_key.response = NULL;
+ server->session_key.len = 0;
spin_lock(&GlobalMid_Lock);
list_for_each(tmp, &server->pending_mid_q) {
@@ -1064,7 +1068,7 @@ cifs_parse_mount_options(char *options, const char *devname,
}
i = cifs_convert_address((struct sockaddr *)&vol->srcaddr,
value, strlen(value));
- if (i < 0) {
+ if (i == 0) {
printk(KERN_WARNING "CIFS: Could not parse"
" srcaddr: %s\n",
value);
@@ -1560,8 +1564,13 @@ cifs_put_tcp_session(struct TCP_Server_Info *server)
server->tcpStatus = CifsExiting;
spin_unlock(&GlobalMid_Lock);
+ cifs_crypto_shash_release(server);
cifs_fscache_release_client_cookie(server);
+ kfree(server->session_key.response);
+ server->session_key.response = NULL;
+ server->session_key.len = 0;
+
task = xchg(&server->tsk, NULL);
if (task)
force_sig(SIGKILL, task);
@@ -1614,10 +1623,16 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
goto out_err;
}
+ rc = cifs_crypto_shash_allocate(tcp_ses);
+ if (rc) {
+ cERROR(1, "could not setup hash structures rc %d", rc);
+ goto out_err;
+ }
+
tcp_ses->hostname = extract_hostname(volume_info->UNC);
if (IS_ERR(tcp_ses->hostname)) {
rc = PTR_ERR(tcp_ses->hostname);
- goto out_err;
+ goto out_err_crypto_release;
}
tcp_ses->noblocksnd = volume_info->noblocksnd;
@@ -1661,7 +1676,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
}
if (rc < 0) {
cERROR(1, "Error connecting to socket. Aborting operation");
- goto out_err;
+ goto out_err_crypto_release;
}
/*
@@ -1675,7 +1690,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
rc = PTR_ERR(tcp_ses->tsk);
cERROR(1, "error %d create cifsd thread", rc);
module_put(THIS_MODULE);
- goto out_err;
+ goto out_err_crypto_release;
}
/* thread spawned, put it on the list */
@@ -1687,6 +1702,9 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
return tcp_ses;
+out_err_crypto_release:
+ cifs_crypto_shash_release(tcp_ses);
+
out_err:
if (tcp_ses) {
if (!IS_ERR(tcp_ses->hostname))
@@ -1801,8 +1819,6 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
if (ses == NULL)
goto get_ses_fail;
- ses->tilen = 0;
- ses->tiblob = NULL;
/* new SMB session uses our server ref */
ses->server = server;
if (server->addr.sockAddr6.sin6_family == AF_INET6)
@@ -1823,10 +1839,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
goto get_ses_fail;
}
if (volume_info->domainname) {
- int len = strlen(volume_info->domainname);
- ses->domainName = kmalloc(len + 1, GFP_KERNEL);
- if (ses->domainName)
- strcpy(ses->domainName, volume_info->domainname);
+ ses->domainName = kstrdup(volume_info->domainname, GFP_KERNEL);
+ if (!ses->domainName)
+ goto get_ses_fail;
}
ses->cred_uid = volume_info->cred_uid;
ses->linux_uid = volume_info->linux_uid;
@@ -2886,24 +2901,16 @@ remote_path_check:
goto mount_fail_check;
}
- tlink->tl_index = pSesInfo->linux_uid;
+ tlink->tl_uid = pSesInfo->linux_uid;
tlink->tl_tcon = tcon;
tlink->tl_time = jiffies;
set_bit(TCON_LINK_MASTER, &tlink->tl_flags);
set_bit(TCON_LINK_IN_TREE, &tlink->tl_flags);
- rc = radix_tree_preload(GFP_KERNEL);
- if (rc == -ENOMEM) {
- kfree(tlink);
- goto mount_fail_check;
- }
-
+ cifs_sb->master_tlink = tlink;
spin_lock(&cifs_sb->tlink_tree_lock);
- radix_tree_insert(&cifs_sb->tlink_tree, pSesInfo->linux_uid, tlink);
- radix_tree_tag_set(&cifs_sb->tlink_tree, pSesInfo->linux_uid,
- CIFS_TLINK_MASTER_TAG);
+ tlink_rb_insert(&cifs_sb->tlink_tree, tlink);
spin_unlock(&cifs_sb->tlink_tree_lock);
- radix_tree_preload_end();
queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks,
TLINK_IDLE_EXPIRE);
@@ -2985,13 +2992,13 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
#ifdef CONFIG_CIFS_WEAK_PW_HASH
if ((global_secflags & CIFSSEC_MAY_LANMAN) &&
(ses->server->secType == LANMAN))
- calc_lanman_hash(tcon->password, ses->cryptKey,
+ calc_lanman_hash(tcon->password, ses->server->cryptkey,
ses->server->secMode &
SECMODE_PW_ENCRYPT ? true : false,
bcc_ptr);
else
#endif /* CIFS_WEAK_PW_HASH */
- SMBNTencrypt(tcon->password, ses->cryptKey, bcc_ptr);
+ SMBNTencrypt(tcon->password, ses->server->cryptkey, bcc_ptr);
bcc_ptr += CIFS_SESS_KEY_SIZE;
if (ses->capabilities & CAP_UNICODE) {
@@ -3093,32 +3100,25 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
int
cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
{
- int i, ret;
+ struct rb_root *root = &cifs_sb->tlink_tree;
+ struct rb_node *node;
+ struct tcon_link *tlink;
char *tmp;
- struct tcon_link *tlink[8];
- unsigned long index = 0;
cancel_delayed_work_sync(&cifs_sb->prune_tlinks);
- do {
- spin_lock(&cifs_sb->tlink_tree_lock);
- ret = radix_tree_gang_lookup(&cifs_sb->tlink_tree,
- (void **)tlink, index,
- ARRAY_SIZE(tlink));
- /* increment index for next pass */
- if (ret > 0)
- index = tlink[ret - 1]->tl_index + 1;
- for (i = 0; i < ret; i++) {
- cifs_get_tlink(tlink[i]);
- clear_bit(TCON_LINK_IN_TREE, &tlink[i]->tl_flags);
- radix_tree_delete(&cifs_sb->tlink_tree,
- tlink[i]->tl_index);
- }
- spin_unlock(&cifs_sb->tlink_tree_lock);
+ spin_lock(&cifs_sb->tlink_tree_lock);
+ while ((node = rb_first(root))) {
+ tlink = rb_entry(node, struct tcon_link, tl_rbnode);
+ cifs_get_tlink(tlink);
+ clear_bit(TCON_LINK_IN_TREE, &tlink->tl_flags);
+ rb_erase(node, root);
- for (i = 0; i < ret; i++)
- cifs_put_tlink(tlink[i]);
- } while (ret != 0);
+ spin_unlock(&cifs_sb->tlink_tree_lock);
+ cifs_put_tlink(tlink);
+ spin_lock(&cifs_sb->tlink_tree_lock);
+ }
+ spin_unlock(&cifs_sb->tlink_tree_lock);
tmp = cifs_sb->prepath;
cifs_sb->prepathlen = 0;
@@ -3178,10 +3178,11 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses,
} else {
mutex_lock(&ses->server->srv_mutex);
if (!server->session_estab) {
- memcpy(&server->session_key.data,
- &ses->auth_key.data, ses->auth_key.len);
+ server->session_key.response = ses->auth_key.response;
server->session_key.len = ses->auth_key.len;
- ses->server->session_estab = true;
+ server->sequence_number = 0x2;
+ server->session_estab = true;
+ ses->auth_key.response = NULL;
}
mutex_unlock(&server->srv_mutex);
@@ -3192,6 +3193,12 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses,
spin_unlock(&GlobalMid_Lock);
}
+ kfree(ses->auth_key.response);
+ ses->auth_key.response = NULL;
+ ses->auth_key.len = 0;
+ kfree(ses->ntlmssp);
+ ses->ntlmssp = NULL;
+
return rc;
}
@@ -3250,22 +3257,10 @@ out:
return tcon;
}
-static struct tcon_link *
+static inline struct tcon_link *
cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb)
{
- struct tcon_link *tlink;
- unsigned int ret;
-
- spin_lock(&cifs_sb->tlink_tree_lock);
- ret = radix_tree_gang_lookup_tag(&cifs_sb->tlink_tree, (void **)&tlink,
- 0, 1, CIFS_TLINK_MASTER_TAG);
- spin_unlock(&cifs_sb->tlink_tree_lock);
-
- /* the master tcon should always be present */
- if (ret == 0)
- BUG();
-
- return tlink;
+ return cifs_sb->master_tlink;
}
struct cifsTconInfo *
@@ -3281,6 +3276,47 @@ cifs_sb_tcon_pending_wait(void *unused)
return signal_pending(current) ? -ERESTARTSYS : 0;
}
+/* find and return a tlink with given uid */
+static struct tcon_link *
+tlink_rb_search(struct rb_root *root, uid_t uid)
+{
+ struct rb_node *node = root->rb_node;
+ struct tcon_link *tlink;
+
+ while (node) {
+ tlink = rb_entry(node, struct tcon_link, tl_rbnode);
+
+ if (tlink->tl_uid > uid)
+ node = node->rb_left;
+ else if (tlink->tl_uid < uid)
+ node = node->rb_right;
+ else
+ return tlink;
+ }
+ return NULL;
+}
+
+/* insert a tcon_link into the tree */
+static void
+tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink)
+{
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+ struct tcon_link *tlink;
+
+ while (*new) {
+ tlink = rb_entry(*new, struct tcon_link, tl_rbnode);
+ parent = *new;
+
+ if (tlink->tl_uid > new_tlink->tl_uid)
+ new = &((*new)->rb_left);
+ else
+ new = &((*new)->rb_right);
+ }
+
+ rb_link_node(&new_tlink->tl_rbnode, parent, new);
+ rb_insert_color(&new_tlink->tl_rbnode, root);
+}
+
/*
* Find or construct an appropriate tcon given a cifs_sb and the fsuid of the
* current task.
@@ -3288,7 +3324,7 @@ cifs_sb_tcon_pending_wait(void *unused)
* If the superblock doesn't refer to a multiuser mount, then just return
* the master tcon for the mount.
*
- * First, search the radix tree for an existing tcon for this fsuid. If one
+ * First, search the rbtree for an existing tcon for this fsuid. If one
* exists, then check to see if it's pending construction. If it is then wait
* for construction to complete. Once it's no longer pending, check to see if
* it failed and either return an error or retry construction, depending on
@@ -3301,14 +3337,14 @@ struct tcon_link *
cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
{
int ret;
- unsigned long fsuid = (unsigned long) current_fsuid();
+ uid_t fsuid = current_fsuid();
struct tcon_link *tlink, *newtlink;
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
return cifs_get_tlink(cifs_sb_master_tlink(cifs_sb));
spin_lock(&cifs_sb->tlink_tree_lock);
- tlink = radix_tree_lookup(&cifs_sb->tlink_tree, fsuid);
+ tlink = tlink_rb_search(&cifs_sb->tlink_tree, fsuid);
if (tlink)
cifs_get_tlink(tlink);
spin_unlock(&cifs_sb->tlink_tree_lock);
@@ -3317,36 +3353,24 @@ cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
newtlink = kzalloc(sizeof(*tlink), GFP_KERNEL);
if (newtlink == NULL)
return ERR_PTR(-ENOMEM);
- newtlink->tl_index = fsuid;
+ newtlink->tl_uid = fsuid;
newtlink->tl_tcon = ERR_PTR(-EACCES);
set_bit(TCON_LINK_PENDING, &newtlink->tl_flags);
set_bit(TCON_LINK_IN_TREE, &newtlink->tl_flags);
cifs_get_tlink(newtlink);
- ret = radix_tree_preload(GFP_KERNEL);
- if (ret != 0) {
- kfree(newtlink);
- return ERR_PTR(ret);
- }
-
spin_lock(&cifs_sb->tlink_tree_lock);
/* was one inserted after previous search? */
- tlink = radix_tree_lookup(&cifs_sb->tlink_tree, fsuid);
+ tlink = tlink_rb_search(&cifs_sb->tlink_tree, fsuid);
if (tlink) {
cifs_get_tlink(tlink);
spin_unlock(&cifs_sb->tlink_tree_lock);
- radix_tree_preload_end();
kfree(newtlink);
goto wait_for_construction;
}
- ret = radix_tree_insert(&cifs_sb->tlink_tree, fsuid, newtlink);
- spin_unlock(&cifs_sb->tlink_tree_lock);
- radix_tree_preload_end();
- if (ret) {
- kfree(newtlink);
- return ERR_PTR(ret);
- }
tlink = newtlink;
+ tlink_rb_insert(&cifs_sb->tlink_tree, tlink);
+ spin_unlock(&cifs_sb->tlink_tree_lock);
} else {
wait_for_construction:
ret = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING,
@@ -3392,39 +3416,39 @@ cifs_prune_tlinks(struct work_struct *work)
{
struct cifs_sb_info *cifs_sb = container_of(work, struct cifs_sb_info,
prune_tlinks.work);
- struct tcon_link *tlink[8];
- unsigned long now = jiffies;
- unsigned long index = 0;
- int i, ret;
+ struct rb_root *root = &cifs_sb->tlink_tree;
+ struct rb_node *node = rb_first(root);
+ struct rb_node *tmp;
+ struct tcon_link *tlink;
- do {
- spin_lock(&cifs_sb->tlink_tree_lock);
- ret = radix_tree_gang_lookup(&cifs_sb->tlink_tree,
- (void **)tlink, index,
- ARRAY_SIZE(tlink));
- /* increment index for next pass */
- if (ret > 0)
- index = tlink[ret - 1]->tl_index + 1;
- for (i = 0; i < ret; i++) {
- if (test_bit(TCON_LINK_MASTER, &tlink[i]->tl_flags) ||
- atomic_read(&tlink[i]->tl_count) != 0 ||
- time_after(tlink[i]->tl_time + TLINK_IDLE_EXPIRE,
- now)) {
- tlink[i] = NULL;
- continue;
- }
- cifs_get_tlink(tlink[i]);
- clear_bit(TCON_LINK_IN_TREE, &tlink[i]->tl_flags);
- radix_tree_delete(&cifs_sb->tlink_tree,
- tlink[i]->tl_index);
- }
- spin_unlock(&cifs_sb->tlink_tree_lock);
+ /*
+ * Because we drop the spinlock in the loop in order to put the tlink
+ * it's not guarded against removal of links from the tree. The only
+ * places that remove entries from the tree are this function and
+ * umounts. Because this function is non-reentrant and is canceled
+ * before umount can proceed, this is safe.
+ */
+ spin_lock(&cifs_sb->tlink_tree_lock);
+ node = rb_first(root);
+ while (node != NULL) {
+ tmp = node;
+ node = rb_next(tmp);
+ tlink = rb_entry(tmp, struct tcon_link, tl_rbnode);
+
+ if (test_bit(TCON_LINK_MASTER, &tlink->tl_flags) ||
+ atomic_read(&tlink->tl_count) != 0 ||
+ time_after(tlink->tl_time + TLINK_IDLE_EXPIRE, jiffies))
+ continue;
- for (i = 0; i < ret; i++) {
- if (tlink[i] != NULL)
- cifs_put_tlink(tlink[i]);
- }
- } while (ret != 0);
+ cifs_get_tlink(tlink);
+ clear_bit(TCON_LINK_IN_TREE, &tlink->tl_flags);
+ rb_erase(tmp, root);
+
+ spin_unlock(&cifs_sb->tlink_tree_lock);
+ cifs_put_tlink(tlink);
+ spin_lock(&cifs_sb->tlink_tree_lock);
+ }
+ spin_unlock(&cifs_sb->tlink_tree_lock);
queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks,
TLINK_IDLE_EXPIRE);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 45af003865d2..06c3e83fa387 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -131,8 +131,7 @@ static inline int cifs_open_inode_helper(struct inode *inode,
/* BB no need to lock inode until after invalidate
since namei code should already have it locked? */
rc = filemap_write_and_wait(inode->i_mapping);
- if (rc != 0)
- pCifsInode->write_behind_rc = rc;
+ mapping_set_error(inode->i_mapping, rc);
}
cFYI(1, "invalidating remote inode since open detected it "
"changed");
@@ -147,12 +146,7 @@ client_can_cache:
rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
xid, NULL);
- if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
- pCifsInode->clientCanCacheAll = true;
- pCifsInode->clientCanCacheRead = true;
- cFYI(1, "Exclusive Oplock granted on inode %p", inode);
- } else if ((oplock & 0xF) == OPLOCK_READ)
- pCifsInode->clientCanCacheRead = true;
+ cifs_set_oplock_level(pCifsInode, oplock);
return rc;
}
@@ -232,6 +226,7 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
if (pCifsFile == NULL)
return pCifsFile;
+ pCifsFile->count = 1;
pCifsFile->netfid = fileHandle;
pCifsFile->pid = current->tgid;
pCifsFile->uid = current_fsuid();
@@ -242,7 +237,6 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
mutex_init(&pCifsFile->fh_mutex);
mutex_init(&pCifsFile->lock_mutex);
INIT_LIST_HEAD(&pCifsFile->llist);
- atomic_set(&pCifsFile->count, 1);
INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break);
spin_lock(&cifs_file_list_lock);
@@ -254,12 +248,7 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList);
spin_unlock(&cifs_file_list_lock);
- if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
- pCifsInode->clientCanCacheAll = true;
- pCifsInode->clientCanCacheRead = true;
- cFYI(1, "Exclusive Oplock inode %p", inode);
- } else if ((oplock & 0xF) == OPLOCK_READ)
- pCifsInode->clientCanCacheRead = true;
+ cifs_set_oplock_level(pCifsInode, oplock);
file->private_data = pCifsFile;
return pCifsFile;
@@ -267,16 +256,18 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
/*
* Release a reference on the file private data. This may involve closing
- * the filehandle out on the server.
+ * the filehandle out on the server. Must be called without holding
+ * cifs_file_list_lock.
*/
void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
{
+ struct inode *inode = cifs_file->dentry->d_inode;
struct cifsTconInfo *tcon = tlink_tcon(cifs_file->tlink);
- struct cifsInodeInfo *cifsi = CIFS_I(cifs_file->dentry->d_inode);
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
struct cifsLockInfo *li, *tmp;
spin_lock(&cifs_file_list_lock);
- if (!atomic_dec_and_test(&cifs_file->count)) {
+ if (--cifs_file->count > 0) {
spin_unlock(&cifs_file_list_lock);
return;
}
@@ -288,8 +279,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
if (list_empty(&cifsi->openFileList)) {
cFYI(1, "closing last open instance for inode %p",
cifs_file->dentry->d_inode);
- cifsi->clientCanCacheRead = false;
- cifsi->clientCanCacheAll = false;
+ cifs_set_oplock_level(cifsi, 0);
}
spin_unlock(&cifs_file_list_lock);
@@ -605,11 +595,8 @@ reopen_success:
if (can_flush) {
rc = filemap_write_and_wait(inode->i_mapping);
- if (rc != 0)
- CIFS_I(inode)->write_behind_rc = rc;
+ mapping_set_error(inode->i_mapping, rc);
- pCifsInode->clientCanCacheAll = false;
- pCifsInode->clientCanCacheRead = false;
if (tcon->unix_ext)
rc = cifs_get_inode_info_unix(&inode,
full_path, inode->i_sb, xid);
@@ -623,18 +610,9 @@ reopen_success:
invalidate the current end of file on the server
we can not go to the server to get the new inod
info */
- if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
- pCifsInode->clientCanCacheAll = true;
- pCifsInode->clientCanCacheRead = true;
- cFYI(1, "Exclusive Oplock granted on inode %p",
- pCifsFile->dentry->d_inode);
- } else if ((oplock & 0xF) == OPLOCK_READ) {
- pCifsInode->clientCanCacheRead = true;
- pCifsInode->clientCanCacheAll = false;
- } else {
- pCifsInode->clientCanCacheRead = false;
- pCifsInode->clientCanCacheAll = false;
- }
+
+ cifs_set_oplock_level(pCifsInode, oplock);
+
cifs_relock_file(pCifsFile);
reopen_error_exit:
@@ -776,12 +754,6 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
tcon = tlink_tcon(((struct cifsFileInfo *)file->private_data)->tlink);
-
- if (file->private_data == NULL) {
- rc = -EBADF;
- FreeXid(xid);
- return rc;
- }
netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
if ((tcon->ses->capabilities & CAP_UNIX) &&
@@ -957,6 +929,7 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
ssize_t cifs_user_write(struct file *file, const char __user *write_data,
size_t write_size, loff_t *poffset)
{
+ struct inode *inode = file->f_path.dentry->d_inode;
int rc = 0;
unsigned int bytes_written = 0;
unsigned int total_written;
@@ -964,7 +937,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
struct cifsTconInfo *pTcon;
int xid, long_op;
struct cifsFileInfo *open_file;
- struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
@@ -1030,21 +1003,17 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
cifs_stats_bytes_written(pTcon, total_written);
- /* since the write may have blocked check these pointers again */
- if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
- struct inode *inode = file->f_path.dentry->d_inode;
/* Do not update local mtime - server will set its actual value on write
- * inode->i_ctime = inode->i_mtime =
- * current_fs_time(inode->i_sb);*/
- if (total_written > 0) {
- spin_lock(&inode->i_lock);
- if (*poffset > file->f_path.dentry->d_inode->i_size)
- i_size_write(file->f_path.dentry->d_inode,
- *poffset);
- spin_unlock(&inode->i_lock);
- }
- mark_inode_dirty_sync(file->f_path.dentry->d_inode);
+ * inode->i_ctime = inode->i_mtime =
+ * current_fs_time(inode->i_sb);*/
+ if (total_written > 0) {
+ spin_lock(&inode->i_lock);
+ if (*poffset > inode->i_size)
+ i_size_write(inode, *poffset);
+ spin_unlock(&inode->i_lock);
}
+ mark_inode_dirty_sync(inode);
+
FreeXid(xid);
return total_written;
}
@@ -1179,7 +1148,7 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
bool fsuid_only)
{
struct cifsFileInfo *open_file;
- struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
+ struct cifs_sb_info *cifs_sb;
bool any_available = false;
int rc;
@@ -1193,6 +1162,8 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
return NULL;
}
+ cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
+
/* only filter by fsuid on multiuser mounts */
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
fsuid_only = false;
@@ -1353,6 +1324,7 @@ static int cifs_writepages(struct address_space *mapping,
if (!experimEnabled && tcon->ses->server->secMode &
(SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
cifsFileInfo_put(open_file);
+ kfree(iov);
return generic_writepages(mapping, wbc);
}
cifsFileInfo_put(open_file);
@@ -1478,12 +1450,7 @@ retry:
if (rc || bytes_written < bytes_to_write) {
cERROR(1, "Write2 ret %d, wrote %d",
rc, bytes_written);
- /* BB what if continued retry is
- requested via mount flags? */
- if (rc == -ENOSPC)
- set_bit(AS_ENOSPC, &mapping->flags);
- else
- set_bit(AS_EIO, &mapping->flags);
+ mapping_set_error(mapping, rc);
} else {
cifs_stats_bytes_written(tcon, bytes_written);
}
@@ -1628,11 +1595,10 @@ int cifs_fsync(struct file *file, int datasync)
rc = filemap_write_and_wait(inode->i_mapping);
if (rc == 0) {
- rc = CIFS_I(inode)->write_behind_rc;
- CIFS_I(inode)->write_behind_rc = 0;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+
tcon = tlink_tcon(smbfile->tlink);
- if (!rc && tcon && smbfile &&
- !(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
+ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
}
@@ -1677,21 +1643,8 @@ int cifs_flush(struct file *file, fl_owner_t id)
struct inode *inode = file->f_path.dentry->d_inode;
int rc = 0;
- /* Rather than do the steps manually:
- lock the inode for writing
- loop through pages looking for write behind data (dirty pages)
- coalesce into contiguous 16K (or smaller) chunks to write to server
- send to server (prefer in parallel)
- deal with writebehind errors
- unlock inode for writing
- filemapfdatawrite appears easier for the time being */
-
- rc = filemap_fdatawrite(inode->i_mapping);
- /* reset wb rc if we were able to write out dirty pages */
- if (!rc) {
- rc = CIFS_I(inode)->write_behind_rc;
- CIFS_I(inode)->write_behind_rc = 0;
- }
+ if (file->f_mode & FMODE_WRITE)
+ rc = filemap_write_and_wait(inode->i_mapping);
cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
@@ -2270,7 +2223,7 @@ void cifs_oplock_break(struct work_struct *work)
oplock_break);
struct inode *inode = cfile->dentry->d_inode;
struct cifsInodeInfo *cinode = CIFS_I(inode);
- int rc, waitrc = 0;
+ int rc = 0;
if (inode && S_ISREG(inode->i_mode)) {
if (cinode->clientCanCacheRead)
@@ -2279,13 +2232,10 @@ void cifs_oplock_break(struct work_struct *work)
break_lease(inode, O_WRONLY);
rc = filemap_fdatawrite(inode->i_mapping);
if (cinode->clientCanCacheRead == 0) {
- waitrc = filemap_fdatawait(inode->i_mapping);
+ rc = filemap_fdatawait(inode->i_mapping);
+ mapping_set_error(inode->i_mapping, rc);
invalidate_remote_inode(inode);
}
- if (!rc)
- rc = waitrc;
- if (rc)
- cinode->write_behind_rc = rc;
cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
}
@@ -2304,7 +2254,7 @@ void cifs_oplock_break(struct work_struct *work)
/*
* We might have kicked in before is_valid_oplock_break()
* finished grabbing reference for us. Make sure it's done by
- * waiting for GlobalSMSSeslock.
+ * waiting for cifs_file_list_lock.
*/
spin_lock(&cifs_file_list_lock);
spin_unlock(&cifs_file_list_lock);
@@ -2312,6 +2262,7 @@ void cifs_oplock_break(struct work_struct *work)
cifs_oplock_break_put(cfile);
}
+/* must be called while holding cifs_file_list_lock */
void cifs_oplock_break_get(struct cifsFileInfo *cfile)
{
cifs_sb_active(cfile->dentry->d_sb);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 94979309698a..ef3a55bf86b6 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1682,8 +1682,7 @@ cifs_invalidate_mapping(struct inode *inode)
/* write back any cached data */
if (inode->i_mapping && inode->i_mapping->nrpages != 0) {
rc = filemap_write_and_wait(inode->i_mapping);
- if (rc)
- cifs_i->write_behind_rc = rc;
+ mapping_set_error(inode->i_mapping, rc);
}
invalidate_remote_inode(inode);
cifs_fscache_reset_inode_cookie(inode);
@@ -1943,10 +1942,8 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
* the flush returns error?
*/
rc = filemap_write_and_wait(inode->i_mapping);
- if (rc != 0) {
- cifsInode->write_behind_rc = rc;
- rc = 0;
- }
+ mapping_set_error(inode->i_mapping, rc);
+ rc = 0;
if (attrs->ia_valid & ATTR_SIZE) {
rc = cifs_set_file_size(inode, attrs, xid, full_path);
@@ -2087,10 +2084,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
* the flush returns error?
*/
rc = filemap_write_and_wait(inode->i_mapping);
- if (rc != 0) {
- cifsInode->write_behind_rc = rc;
- rc = 0;
- }
+ mapping_set_error(inode->i_mapping, rc);
+ rc = 0;
if (attrs->ia_valid & ATTR_SIZE) {
rc = cifs_set_file_size(inode, attrs, xid, full_path);
@@ -2182,7 +2177,6 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
setattr_copy(inode, attrs);
mark_inode_dirty(inode);
- return 0;
cifs_setattr_exit:
kfree(full_path);
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 077bf756f342..0c98672d0122 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -38,10 +38,10 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
struct cifs_sb_info *cifs_sb;
#ifdef CONFIG_CIFS_POSIX
struct cifsFileInfo *pSMBFile = filep->private_data;
- struct cifsTconInfo *tcon = tlink_tcon(pSMBFile->tlink);
+ struct cifsTconInfo *tcon;
__u64 ExtAttrBits = 0;
__u64 ExtAttrMask = 0;
- __u64 caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
+ __u64 caps;
#endif /* CONFIG_CIFS_POSIX */
xid = GetXid();
@@ -62,9 +62,11 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
break;
#ifdef CONFIG_CIFS_POSIX
case FS_IOC_GETFLAGS:
+ if (pSMBFile == NULL)
+ break;
+ tcon = tlink_tcon(pSMBFile->tlink);
+ caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
if (CIFS_UNIX_EXTATTR_CAP & caps) {
- if (pSMBFile == NULL)
- break;
rc = CIFSGetExtAttr(xid, tcon, pSMBFile->netfid,
&ExtAttrBits, &ExtAttrMask);
if (rc == 0)
@@ -75,13 +77,15 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
break;
case FS_IOC_SETFLAGS:
+ if (pSMBFile == NULL)
+ break;
+ tcon = tlink_tcon(pSMBFile->tlink);
+ caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
if (CIFS_UNIX_EXTATTR_CAP & caps) {
if (get_user(ExtAttrBits, (int __user *)arg)) {
rc = -EFAULT;
break;
}
- if (pSMBFile == NULL)
- break;
/* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid,
extAttrBits, &ExtAttrMask);*/
}
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 1c681f6a6803..43f10281bc19 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -569,15 +569,14 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
cFYI(1, "file id match, oplock break");
pCifsInode = CIFS_I(netfile->dentry->d_inode);
- pCifsInode->clientCanCacheAll = false;
- if (pSMB->OplockLevel == 0)
- pCifsInode->clientCanCacheRead = false;
+ cifs_set_oplock_level(pCifsInode,
+ pSMB->OplockLevel);
/*
* cifs_oplock_break_put() can't be called
* from here. Get reference after queueing
* succeeded. cifs_oplock_break() will
- * synchronize using GlobalSMSSeslock.
+ * synchronize using cifs_file_list_lock.
*/
if (queue_work(system_nrt_wq,
&netfile->oplock_break))
@@ -722,3 +721,23 @@ cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb)
cifs_sb_master_tcon(cifs_sb)->treeName);
}
}
+
+void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock)
+{
+ oplock &= 0xF;
+
+ if (oplock == OPLOCK_EXCLUSIVE) {
+ cinode->clientCanCacheAll = true;
+ cinode->clientCanCacheRead = true;
+ cFYI(1, "Exclusive Oplock granted on inode %p",
+ &cinode->vfs_inode);
+ } else if (oplock == OPLOCK_READ) {
+ cinode->clientCanCacheAll = false;
+ cinode->clientCanCacheRead = true;
+ cFYI(1, "Level II Oplock granted on inode %p",
+ &cinode->vfs_inode);
+ } else {
+ cinode->clientCanCacheAll = false;
+ cinode->clientCanCacheRead = false;
+ }
+}
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 2a11efd96592..7b01d3f6eed6 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -32,9 +32,6 @@
#include <linux/slab.h>
#include "cifs_spnego.h"
-extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8,
- unsigned char *p24);
-
/*
* Checks if this is the first smb session to be reconnected after
* the socket has been reestablished (so we know whether to use vc 0).
@@ -402,23 +399,22 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
return -EINVAL;
}
- memcpy(ses->cryptKey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE);
+ memcpy(ses->ntlmssp->cryptkey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE);
/* BB we could decode pblob->NegotiateFlags; some may be useful */
/* In particular we can examine sign flags */
/* BB spec says that if AvId field of MsvAvTimestamp is populated then
we must set the MIC field of the AUTHENTICATE_MESSAGE */
-
+ ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags);
tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset);
tilen = cpu_to_le16(pblob->TargetInfoArray.Length);
- ses->tilen = tilen;
- if (ses->tilen) {
- ses->tiblob = kmalloc(tilen, GFP_KERNEL);
- if (!ses->tiblob) {
+ if (tilen) {
+ ses->auth_key.response = kmalloc(tilen, GFP_KERNEL);
+ if (!ses->auth_key.response) {
cERROR(1, "Challenge target info allocation failure");
- ses->tilen = 0;
return -ENOMEM;
}
- memcpy(ses->tiblob, bcc_ptr + tioffset, ses->tilen);
+ memcpy(ses->auth_key.response, bcc_ptr + tioffset, tilen);
+ ses->auth_key.len = tilen;
}
return 0;
@@ -443,10 +439,12 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
NTLMSSP_NEGOTIATE_NTLM;
if (ses->server->secMode &
- (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
+ (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
flags |= NTLMSSP_NEGOTIATE_SIGN;
- if (ses->server->secMode & SECMODE_SIGN_REQUIRED)
- flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN;
+ if (!ses->server->session_estab)
+ flags |= NTLMSSP_NEGOTIATE_KEY_XCH |
+ NTLMSSP_NEGOTIATE_EXTENDED_SEC;
+ }
sec_blob->NegotiateFlags |= cpu_to_le32(flags);
@@ -469,11 +467,9 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
const struct nls_table *nls_cp)
{
int rc;
- unsigned int size;
AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer;
__u32 flags;
unsigned char *tmp;
- struct ntlmv2_resp ntlmv2_response = {};
memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
sec_blob->MessageType = NtLmAuthenticate;
@@ -497,25 +493,19 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
sec_blob->LmChallengeResponse.MaximumLength = 0;
sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
- rc = setup_ntlmv2_rsp(ses, (char *)&ntlmv2_response, nls_cp);
+ rc = setup_ntlmv2_rsp(ses, nls_cp);
if (rc) {
cERROR(1, "Error %d during NTLMSSP authentication", rc);
goto setup_ntlmv2_ret;
}
- size = sizeof(struct ntlmv2_resp);
- memcpy(tmp, (char *)&ntlmv2_response, size);
- tmp += size;
- if (ses->tilen > 0) {
- memcpy(tmp, ses->tiblob, ses->tilen);
- tmp += ses->tilen;
- }
+ memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+ ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+ tmp += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
- sec_blob->NtChallengeResponse.Length = cpu_to_le16(size + ses->tilen);
+ sec_blob->NtChallengeResponse.Length =
+ cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
sec_blob->NtChallengeResponse.MaximumLength =
- cpu_to_le16(size + ses->tilen);
- kfree(ses->tiblob);
- ses->tiblob = NULL;
- ses->tilen = 0;
+ cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
if (ses->domainName == NULL) {
sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
@@ -554,9 +544,19 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
sec_blob->WorkstationName.MaximumLength = 0;
tmp += 2;
- sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
- sec_blob->SessionKey.Length = 0;
- sec_blob->SessionKey.MaximumLength = 0;
+ if ((ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) &&
+ !calc_seckey(ses)) {
+ memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
+ sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE);
+ sec_blob->SessionKey.MaximumLength =
+ cpu_to_le16(CIFS_CPHTXT_SIZE);
+ tmp += CIFS_CPHTXT_SIZE;
+ } else {
+ sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->SessionKey.Length = 0;
+ sec_blob->SessionKey.MaximumLength = 0;
+ }
setup_ntlmv2_ret:
*buflen = tmp - pbuffer;
@@ -600,8 +600,16 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
return -EINVAL;
type = ses->server->secType;
-
cFYI(1, "sess setup type %d", type);
+ if (type == RawNTLMSSP) {
+ /* if memory allocation is successful, caller of this function
+ * frees it.
+ */
+ ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL);
+ if (!ses->ntlmssp)
+ return -ENOMEM;
+ }
+
ssetup_ntlmssp_authenticate:
if (phase == NtLmChallenge)
phase = NtLmAuthenticate; /* if ntlmssp, now final phase */
@@ -666,10 +674,14 @@ ssetup_ntlmssp_authenticate:
/* no capabilities flags in old lanman negotiation */
pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE);
- /* BB calculate hash with password */
- /* and copy into bcc */
- calc_lanman_hash(ses->password, ses->cryptKey,
+ /* Calculate hash with password and copy into bcc_ptr.
+ * Encryption Key (stored as in cryptkey) gets used if the
+ * security mode bit in Negottiate Protocol response states
+ * to use challenge/response method (i.e. Password bit is 1).
+ */
+
+ calc_lanman_hash(ses->password, ses->server->cryptkey,
ses->server->secMode & SECMODE_PW_ENCRYPT ?
true : false, lnm_session_key);
@@ -687,24 +699,27 @@ ssetup_ntlmssp_authenticate:
ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
#endif
} else if (type == NTLM) {
- char ntlm_session_key[CIFS_SESS_KEY_SIZE];
-
pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
pSMB->req_no_secext.CaseInsensitivePasswordLength =
- cpu_to_le16(CIFS_SESS_KEY_SIZE);
+ cpu_to_le16(CIFS_AUTH_RESP_SIZE);
pSMB->req_no_secext.CaseSensitivePasswordLength =
- cpu_to_le16(CIFS_SESS_KEY_SIZE);
+ cpu_to_le16(CIFS_AUTH_RESP_SIZE);
+
+ /* calculate ntlm response and session key */
+ rc = setup_ntlm_response(ses);
+ if (rc) {
+ cERROR(1, "Error %d during NTLM authentication", rc);
+ goto ssetup_exit;
+ }
- /* calculate session key */
- SMBNTencrypt(ses->password, ses->cryptKey, ntlm_session_key);
+ /* copy ntlm response */
+ memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+ CIFS_AUTH_RESP_SIZE);
+ bcc_ptr += CIFS_AUTH_RESP_SIZE;
+ memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+ CIFS_AUTH_RESP_SIZE);
+ bcc_ptr += CIFS_AUTH_RESP_SIZE;
- cifs_calculate_session_key(&ses->auth_key,
- ntlm_session_key, ses->password);
- /* copy session key */
- memcpy(bcc_ptr, (char *)ntlm_session_key, CIFS_SESS_KEY_SIZE);
- bcc_ptr += CIFS_SESS_KEY_SIZE;
- memcpy(bcc_ptr, (char *)ntlm_session_key, CIFS_SESS_KEY_SIZE);
- bcc_ptr += CIFS_SESS_KEY_SIZE;
if (ses->capabilities & CAP_UNICODE) {
/* unicode strings must be word aligned */
if (iov[0].iov_len % 2) {
@@ -715,47 +730,26 @@ ssetup_ntlmssp_authenticate:
} else
ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
} else if (type == NTLMv2) {
- char *v2_sess_key =
- kmalloc(sizeof(struct ntlmv2_resp), GFP_KERNEL);
-
- /* BB FIXME change all users of v2_sess_key to
- struct ntlmv2_resp */
-
- if (v2_sess_key == NULL) {
- rc = -ENOMEM;
- goto ssetup_exit;
- }
-
pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
/* LM2 password would be here if we supported it */
pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
- /* cpu_to_le16(LM2_SESS_KEY_SIZE); */
- /* calculate session key */
- rc = setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
+ /* calculate nlmv2 response and session key */
+ rc = setup_ntlmv2_rsp(ses, nls_cp);
if (rc) {
cERROR(1, "Error %d during NTLMv2 authentication", rc);
- kfree(v2_sess_key);
goto ssetup_exit;
}
- memcpy(bcc_ptr, (char *)v2_sess_key,
- sizeof(struct ntlmv2_resp));
- bcc_ptr += sizeof(struct ntlmv2_resp);
- kfree(v2_sess_key);
+ memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+ ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+ bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
+
/* set case sensitive password length after tilen may get
* assigned, tilen is 0 otherwise.
*/
pSMB->req_no_secext.CaseSensitivePasswordLength =
- cpu_to_le16(sizeof(struct ntlmv2_resp) + ses->tilen);
- if (ses->tilen > 0) {
- memcpy(bcc_ptr, ses->tiblob, ses->tilen);
- bcc_ptr += ses->tilen;
- /* we never did allocate ses->domainName to free */
- kfree(ses->tiblob);
- ses->tiblob = NULL;
- ses->tilen = 0;
- }
+ cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
if (ses->capabilities & CAP_UNICODE) {
if (iov[0].iov_len % 2) {
@@ -768,6 +762,7 @@ ssetup_ntlmssp_authenticate:
} else if (type == Kerberos) {
#ifdef CONFIG_CIFS_UPCALL
struct cifs_spnego_msg *msg;
+
spnego_key = cifs_get_spnego_key(ses);
if (IS_ERR(spnego_key)) {
rc = PTR_ERR(spnego_key);
@@ -785,16 +780,17 @@ ssetup_ntlmssp_authenticate:
rc = -EKEYREJECTED;
goto ssetup_exit;
}
- /* bail out if key is too long */
- if (msg->sesskey_len >
- sizeof(ses->auth_key.data.krb5)) {
- cERROR(1, "Kerberos signing key too long (%u bytes)",
- msg->sesskey_len);
- rc = -EOVERFLOW;
+
+ ses->auth_key.response = kmalloc(msg->sesskey_len, GFP_KERNEL);
+ if (!ses->auth_key.response) {
+ cERROR(1, "Kerberos can't allocate (%u bytes) memory",
+ msg->sesskey_len);
+ rc = -ENOMEM;
goto ssetup_exit;
}
+ memcpy(ses->auth_key.response, msg->data, msg->sesskey_len);
ses->auth_key.len = msg->sesskey_len;
- memcpy(ses->auth_key.data.krb5, msg->data, msg->sesskey_len);
+
pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
capabilities |= CAP_EXTENDED_SECURITY;
pSMB->req.Capabilities = cpu_to_le32(capabilities);
@@ -897,8 +893,6 @@ ssetup_ntlmssp_authenticate:
CIFS_STD_OP /* not long */ | CIFS_LOG_ERROR);
/* SMB request buf freed in SendReceive2 */
- cFYI(1, "ssetup rc from sendrecv2 is %d", rc);
-
pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base;
smb_buf = (struct smb_hdr *)iov[0].iov_base;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index a66c91eb6eb4..e0588cdf4cc5 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -543,7 +543,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
(ses->server->secMode & (SECMODE_SIGN_REQUIRED |
SECMODE_SIGN_ENABLED))) {
rc = cifs_verify_signature(midQ->resp_buf,
- &ses->server->session_key,
+ ses->server,
midQ->sequence_number+1);
if (rc) {
cERROR(1, "Unexpected SMB signature");
@@ -731,7 +731,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
(ses->server->secMode & (SECMODE_SIGN_REQUIRED |
SECMODE_SIGN_ENABLED))) {
rc = cifs_verify_signature(out_buf,
- &ses->server->session_key,
+ ses->server,
midQ->sequence_number+1);
if (rc) {
cERROR(1, "Unexpected SMB signature");
@@ -981,7 +981,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
(ses->server->secMode & (SECMODE_SIGN_REQUIRED |
SECMODE_SIGN_ENABLED))) {
rc = cifs_verify_signature(out_buf,
- &ses->server->session_key,
+ ses->server,
midQ->sequence_number+1);
if (rc) {
cERROR(1, "Unexpected SMB signature");
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 7993b96ca348..5ea57c8c7f97 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -306,16 +306,16 @@ static int coda_statfs(struct dentry *dentry, struct kstatfs *buf)
/* init_coda: used by filesystems.c to register coda */
-static int coda_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *coda_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_nodev(fs_type, flags, data, coda_fill_super, mnt);
+ return mount_nodev(fs_type, flags, data, coda_fill_super);
}
struct file_system_type coda_fs_type = {
.owner = THIS_MODULE,
.name = "coda",
- .get_sb = coda_get_sb,
+ .mount = coda_mount,
.kill_sb = kill_anon_super,
.fs_flags = FS_BINARY_MOUNTDATA,
};
diff --git a/fs/compat.c b/fs/compat.c
index f03abdadc401..c580c322fa6b 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -29,8 +29,6 @@
#include <linux/vfs.h>
#include <linux/ioctl.h>
#include <linux/init.h>
-#include <linux/smb.h>
-#include <linux/smb_mount.h>
#include <linux/ncp_mount.h>
#include <linux/nfs4_mount.h>
#include <linux/syscalls.h>
@@ -51,6 +49,7 @@
#include <linux/eventpoll.h>
#include <linux/fs_struct.h>
#include <linux/slab.h>
+#include <linux/pagemap.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -608,14 +607,14 @@ ssize_t compat_rw_copy_check_uvector(int type,
/*
* Single unix specification:
* We should -EINVAL if an element length is not >= 0 and fitting an
- * ssize_t. The total length is fitting an ssize_t
+ * ssize_t.
*
- * Be careful here because iov_len is a size_t not an ssize_t
+ * In Linux, the total length is limited to MAX_RW_COUNT, there is
+ * no overflow possibility.
*/
tot_len = 0;
ret = -EINVAL;
for (seg = 0; seg < nr_segs; seg++) {
- compat_ssize_t tmp = tot_len;
compat_uptr_t buf;
compat_ssize_t len;
@@ -626,13 +625,13 @@ ssize_t compat_rw_copy_check_uvector(int type,
}
if (len < 0) /* size_t not fitting in compat_ssize_t .. */
goto out;
- tot_len += len;
- if (tot_len < tmp) /* maths overflow on the compat_ssize_t */
- goto out;
if (!access_ok(vrfy_dir(type), compat_ptr(buf), len)) {
ret = -EFAULT;
goto out;
}
+ if (len > MAX_RW_COUNT - tot_len)
+ len = MAX_RW_COUNT - tot_len;
+ tot_len += len;
iov->iov_base = compat_ptr(buf);
iov->iov_len = (compat_size_t) len;
uvector++;
@@ -745,30 +744,6 @@ static void *do_ncp_super_data_conv(void *raw_data)
return raw_data;
}
-struct compat_smb_mount_data {
- compat_int_t version;
- __compat_uid_t mounted_uid;
- __compat_uid_t uid;
- __compat_gid_t gid;
- compat_mode_t file_mode;
- compat_mode_t dir_mode;
-};
-
-static void *do_smb_super_data_conv(void *raw_data)
-{
- struct smb_mount_data *s = raw_data;
- struct compat_smb_mount_data *c_s = raw_data;
-
- if (c_s->version != SMB_MOUNT_OLDVERSION)
- goto out;
- s->dir_mode = c_s->dir_mode;
- s->file_mode = c_s->file_mode;
- s->gid = c_s->gid;
- s->uid = c_s->uid;
- s->mounted_uid = c_s->mounted_uid;
- out:
- return raw_data;
-}
struct compat_nfs_string {
compat_uint_t len;
@@ -835,7 +810,6 @@ static int do_nfs4_super_data_conv(void *raw_data)
return 0;
}
-#define SMBFS_NAME "smbfs"
#define NCPFS_NAME "ncpfs"
#define NFS4_NAME "nfs4"
@@ -870,9 +844,7 @@ asmlinkage long compat_sys_mount(const char __user * dev_name,
retval = -EINVAL;
if (kernel_type && data_page) {
- if (!strcmp(kernel_type, SMBFS_NAME)) {
- do_smb_super_data_conv((void *)data_page);
- } else if (!strcmp(kernel_type, NCPFS_NAME)) {
+ if (!strcmp(kernel_type, NCPFS_NAME)) {
do_ncp_super_data_conv((void *)data_page);
} else if (!strcmp(kernel_type, NFS4_NAME)) {
if (do_nfs4_super_data_conv((void *) data_page))
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index d0ad09d57789..410ed188faa1 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -46,7 +46,6 @@
#include <linux/videodev.h>
#include <linux/netdevice.h>
#include <linux/raw.h>
-#include <linux/smb_fs.h>
#include <linux/blkdev.h>
#include <linux/elevator.h>
#include <linux/rtc.h>
@@ -558,25 +557,6 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, void __user *argp)
#endif /* CONFIG_BLOCK */
-static int do_smb_getmountuid(unsigned int fd, unsigned int cmd,
- compat_uid_t __user *argp)
-{
- mm_segment_t old_fs = get_fs();
- __kernel_uid_t kuid;
- int err;
-
- cmd = SMB_IOC_GETMOUNTUID;
-
- set_fs(KERNEL_DS);
- err = sys_ioctl(fd, cmd, (unsigned long)&kuid);
- set_fs(old_fs);
-
- if (err >= 0)
- err = put_user(kuid, argp);
-
- return err;
-}
-
/* Bluetooth ioctls */
#define HCIUARTSETPROTO _IOW('U', 200, int)
#define HCIUARTGETPROTO _IOR('U', 201, int)
@@ -1199,8 +1179,9 @@ COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE5)
COMPATIBLE_IOCTL(SOUND_MIXER_GETLEVELS)
COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS)
COMPATIBLE_IOCTL(OSS_GETVERSION)
-/* SMB ioctls which do not need any translations */
-COMPATIBLE_IOCTL(SMB_IOC_NEWCONN)
+/* Raw devices */
+COMPATIBLE_IOCTL(RAW_SETBIND)
+COMPATIBLE_IOCTL(RAW_GETBIND)
/* Watchdog */
COMPATIBLE_IOCTL(WDIOC_GETSUPPORT)
COMPATIBLE_IOCTL(WDIOC_GETSTATUS)
@@ -1458,10 +1439,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
case MTIOCPOS32:
return mt_ioctl_trans(fd, cmd, argp);
#endif
- /* One SMB ioctl needs translations. */
-#define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
- case SMB_IOC_GETMOUNTUID_32:
- return do_smb_getmountuid(fd, cmd, argp);
/* Serial */
case TIOCGSERIAL:
case TIOCSSERIAL:
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index 8c8d64230c2d..7d3607febe1c 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -104,16 +104,16 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
return 0;
}
-static int configfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *configfs_do_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_single(fs_type, flags, data, configfs_fill_super, mnt);
+ return mount_single(fs_type, flags, data, configfs_fill_super);
}
static struct file_system_type configfs_fs_type = {
.owner = THIS_MODULE,
.name = "configfs",
- .get_sb = configfs_get_sb,
+ .mount = configfs_do_mount,
.kill_sb = kill_litter_super,
};
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 1e7a33028d33..32fd5fe9ca0e 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -533,17 +533,16 @@ static const struct super_operations cramfs_ops = {
.statfs = cramfs_statfs,
};
-static int cramfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *cramfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, cramfs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, cramfs_fill_super);
}
static struct file_system_type cramfs_fs_type = {
.owner = THIS_MODULE,
.name = "cramfs",
- .get_sb = cramfs_get_sb,
+ .mount = cramfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index a4ed8380e98a..37a8ca7c1222 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -135,17 +135,17 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent)
return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
}
-static int debug_get_sb(struct file_system_type *fs_type,
+static struct dentry *debug_mount(struct file_system_type *fs_type,
int flags, const char *dev_name,
- void *data, struct vfsmount *mnt)
+ void *data)
{
- return get_sb_single(fs_type, flags, data, debug_fill_super, mnt);
+ return mount_single(fs_type, flags, data, debug_fill_super);
}
static struct file_system_type debug_fs_type = {
.owner = THIS_MODULE,
.name = "debugfs",
- .get_sb = debug_get_sb,
+ .mount = debug_mount,
.kill_sb = kill_litter_super,
};
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 8b3ffd5b5235..1bb547c9cad6 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -331,7 +331,7 @@ static int compare_init_pts_sb(struct super_block *s, void *p)
}
/*
- * devpts_get_sb()
+ * devpts_mount()
*
* If the '-o newinstance' mount option was specified, mount a new
* (private) instance of devpts. PTYs created in this instance are
@@ -345,20 +345,20 @@ static int compare_init_pts_sb(struct super_block *s, void *p)
* semantics in devpts while preserving backward compatibility of the
* current 'single-namespace' semantics. i.e all mounts of devpts
* without the 'newinstance' mount option should bind to the initial
- * kernel mount, like get_sb_single().
+ * kernel mount, like mount_single().
*
* Mounts with 'newinstance' option create a new, private namespace.
*
* NOTE:
*
- * For single-mount semantics, devpts cannot use get_sb_single(),
- * because get_sb_single()/sget() find and use the super-block from
+ * For single-mount semantics, devpts cannot use mount_single(),
+ * because mount_single()/sget() find and use the super-block from
* the most recent mount of devpts. But that recent mount may be a
- * 'newinstance' mount and get_sb_single() would pick the newinstance
+ * 'newinstance' mount and mount_single() would pick the newinstance
* super-block instead of the initial super-block.
*/
-static int devpts_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *devpts_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
int error;
struct pts_mount_opts opts;
@@ -366,7 +366,7 @@ static int devpts_get_sb(struct file_system_type *fs_type,
error = parse_mount_options(data, PARSE_MOUNT, &opts);
if (error)
- return error;
+ return ERR_PTR(error);
if (opts.newinstance)
s = sget(fs_type, NULL, set_anon_super, NULL);
@@ -374,7 +374,7 @@ static int devpts_get_sb(struct file_system_type *fs_type,
s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL);
if (IS_ERR(s))
- return PTR_ERR(s);
+ return ERR_CAST(s);
if (!s->s_root) {
s->s_flags = flags;
@@ -390,13 +390,11 @@ static int devpts_get_sb(struct file_system_type *fs_type,
if (error)
goto out_undo_sget;
- simple_set_mnt(mnt, s);
-
- return 0;
+ return dget(s->s_root);
out_undo_sget:
deactivate_locked_super(s);
- return error;
+ return ERR_PTR(error);
}
#else
@@ -404,10 +402,10 @@ out_undo_sget:
* This supports only the legacy single-instance semantics (no
* multiple-instance semantics)
*/
-static int devpts_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *devpts_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
{
- return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt);
+ return mount_single(fs_type, flags, data, devpts_fill_super);
}
#endif
@@ -421,7 +419,7 @@ static void devpts_kill_sb(struct super_block *sb)
static struct file_system_type devpts_fs_type = {
.name = "devpts",
- .get_sb = devpts_get_sb,
+ .mount = devpts_mount,
.kill_sb = devpts_kill_sb,
};
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 40186b959429..413a3c48f0bb 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -377,6 +377,7 @@ struct ecryptfs_mount_crypt_stat {
#define ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES 0x00000010
#define ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK 0x00000020
#define ECRYPTFS_GLOBAL_ENCFN_USE_FEK 0x00000040
+#define ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY 0x00000080
u32 flags;
struct list_head global_auth_tok_list;
struct mutex global_auth_tok_list_mutex;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 3fbc94203380..9d1a22d62765 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -32,6 +32,7 @@
#include <linux/crypto.h>
#include <linux/fs_stack.h>
#include <linux/slab.h>
+#include <linux/xattr.h>
#include <asm/unaligned.h>
#include "ecryptfs_kernel.h"
@@ -70,15 +71,19 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
struct dentry *dentry_save;
struct vfsmount *vfsmount_save;
+ unsigned int flags_save;
int rc;
dentry_save = nd->path.dentry;
vfsmount_save = nd->path.mnt;
+ flags_save = nd->flags;
nd->path.dentry = lower_dentry;
nd->path.mnt = lower_mnt;
+ nd->flags &= ~LOOKUP_OPEN;
rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd);
nd->path.dentry = dentry_save;
nd->path.mnt = vfsmount_save;
+ nd->flags = flags_save;
return rc;
}
@@ -1108,10 +1113,8 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
rc = -EOPNOTSUPP;
goto out;
}
- mutex_lock(&lower_dentry->d_inode->i_mutex);
- rc = lower_dentry->d_inode->i_op->setxattr(lower_dentry, name, value,
- size, flags);
- mutex_unlock(&lower_dentry->d_inode->i_mutex);
+
+ rc = vfs_setxattr(lower_dentry, name, value, size, flags);
out:
return rc;
}
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 73811cfa2ea4..b1f6858a5223 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -446,6 +446,7 @@ out:
*/
static int
ecryptfs_find_auth_tok_for_sig(
+ struct key **auth_tok_key,
struct ecryptfs_auth_tok **auth_tok,
struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
char *sig)
@@ -453,12 +454,21 @@ ecryptfs_find_auth_tok_for_sig(
struct ecryptfs_global_auth_tok *global_auth_tok;
int rc = 0;
+ (*auth_tok_key) = NULL;
(*auth_tok) = NULL;
if (ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok,
mount_crypt_stat, sig)) {
- struct key *auth_tok_key;
- rc = ecryptfs_keyring_auth_tok_for_sig(&auth_tok_key, auth_tok,
+ /* if the flag ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY is set in the
+ * mount_crypt_stat structure, we prevent to use auth toks that
+ * are not inserted through the ecryptfs_add_global_auth_tok
+ * function.
+ */
+ if (mount_crypt_stat->flags
+ & ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY)
+ return -EINVAL;
+
+ rc = ecryptfs_keyring_auth_tok_for_sig(auth_tok_key, auth_tok,
sig);
} else
(*auth_tok) = global_auth_tok->global_auth_tok;
@@ -509,6 +519,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
char *filename, size_t filename_size)
{
struct ecryptfs_write_tag_70_packet_silly_stack *s;
+ struct key *auth_tok_key = NULL;
int rc = 0;
s = kmalloc(sizeof(*s), GFP_KERNEL);
@@ -606,6 +617,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
}
dest[s->i++] = s->cipher_code;
rc = ecryptfs_find_auth_tok_for_sig(
+ &auth_tok_key,
&s->auth_tok, mount_crypt_stat,
mount_crypt_stat->global_default_fnek_sig);
if (rc) {
@@ -753,6 +765,8 @@ out_free_unlock:
out_unlock:
mutex_unlock(s->tfm_mutex);
out:
+ if (auth_tok_key)
+ key_put(auth_tok_key);
kfree(s);
return rc;
}
@@ -798,6 +812,7 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
char *data, size_t max_packet_size)
{
struct ecryptfs_parse_tag_70_packet_silly_stack *s;
+ struct key *auth_tok_key = NULL;
int rc = 0;
(*packet_size) = 0;
@@ -910,7 +925,8 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
* >= ECRYPTFS_MAX_IV_BYTES. */
memset(s->iv, 0, ECRYPTFS_MAX_IV_BYTES);
s->desc.info = s->iv;
- rc = ecryptfs_find_auth_tok_for_sig(&s->auth_tok, mount_crypt_stat,
+ rc = ecryptfs_find_auth_tok_for_sig(&auth_tok_key,
+ &s->auth_tok, mount_crypt_stat,
s->fnek_sig_hex);
if (rc) {
printk(KERN_ERR "%s: Error attempting to find auth tok for "
@@ -986,6 +1002,8 @@ out:
(*filename_size) = 0;
(*filename) = NULL;
}
+ if (auth_tok_key)
+ key_put(auth_tok_key);
kfree(s);
return rc;
}
@@ -1557,14 +1575,19 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
ECRYPTFS_VERSION_MAJOR,
ECRYPTFS_VERSION_MINOR);
rc = -EINVAL;
- goto out;
+ goto out_release_key;
}
if ((*auth_tok)->token_type != ECRYPTFS_PASSWORD
&& (*auth_tok)->token_type != ECRYPTFS_PRIVATE_KEY) {
printk(KERN_ERR "Invalid auth_tok structure "
"returned from key query\n");
rc = -EINVAL;
- goto out;
+ goto out_release_key;
+ }
+out_release_key:
+ if (rc) {
+ key_put(*auth_tok_key);
+ (*auth_tok_key) = NULL;
}
out:
return rc;
@@ -1688,6 +1711,7 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
size_t tag_11_contents_size;
size_t tag_11_packet_size;
+ struct key *auth_tok_key = NULL;
int rc = 0;
INIT_LIST_HEAD(&auth_tok_list);
@@ -1784,6 +1808,10 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
* just one will be sufficient to decrypt to get the FEK. */
find_next_matching_auth_tok:
found_auth_tok = 0;
+ if (auth_tok_key) {
+ key_put(auth_tok_key);
+ auth_tok_key = NULL;
+ }
list_for_each_entry(auth_tok_list_item, &auth_tok_list, list) {
candidate_auth_tok = &auth_tok_list_item->auth_tok;
if (unlikely(ecryptfs_verbosity > 0)) {
@@ -1800,10 +1828,11 @@ find_next_matching_auth_tok:
rc = -EINVAL;
goto out_wipe_list;
}
- ecryptfs_find_auth_tok_for_sig(&matching_auth_tok,
+ rc = ecryptfs_find_auth_tok_for_sig(&auth_tok_key,
+ &matching_auth_tok,
crypt_stat->mount_crypt_stat,
candidate_auth_tok_sig);
- if (matching_auth_tok) {
+ if (!rc) {
found_auth_tok = 1;
goto found_matching_auth_tok;
}
@@ -1866,6 +1895,8 @@ found_matching_auth_tok:
out_wipe_list:
wipe_auth_tok_list(&auth_tok_list);
out:
+ if (auth_tok_key)
+ key_put(auth_tok_key);
return rc;
}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index cbd4e18adb20..a9dbd62518e6 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -208,7 +208,8 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig,
ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata,
ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig,
ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes,
- ecryptfs_opt_unlink_sigs, ecryptfs_opt_err };
+ ecryptfs_opt_unlink_sigs, ecryptfs_opt_mount_auth_tok_only,
+ ecryptfs_opt_err };
static const match_table_t tokens = {
{ecryptfs_opt_sig, "sig=%s"},
@@ -223,6 +224,7 @@ static const match_table_t tokens = {
{ecryptfs_opt_fn_cipher, "ecryptfs_fn_cipher=%s"},
{ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"},
{ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"},
+ {ecryptfs_opt_mount_auth_tok_only, "ecryptfs_mount_auth_tok_only"},
{ecryptfs_opt_err, NULL}
};
@@ -406,6 +408,10 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options)
case ecryptfs_opt_unlink_sigs:
mount_crypt_stat->flags |= ECRYPTFS_UNLINK_SIGS;
break;
+ case ecryptfs_opt_mount_auth_tok_only:
+ mount_crypt_stat->flags |=
+ ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY;
+ break;
case ecryptfs_opt_err:
default:
printk(KERN_WARNING
@@ -540,9 +546,8 @@ out:
* ecryptfs_interpose to perform most of the linking
* ecryptfs_interpose(): links the lower filesystem into ecryptfs (inode.c)
*/
-static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *raw_data,
- struct vfsmount *mnt)
+static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *raw_data)
{
struct super_block *s;
struct ecryptfs_sb_info *sbi;
@@ -607,8 +612,7 @@ static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags,
err = "Reading sb failed";
goto out;
}
- simple_set_mnt(mnt, s);
- return 0;
+ return dget(s->s_root);
out:
if (sbi) {
@@ -616,7 +620,7 @@ out:
kmem_cache_free(ecryptfs_sb_info_cache, sbi);
}
printk(KERN_ERR "%s; rc = [%d]\n", err, rc);
- return rc;
+ return ERR_PTR(rc);
}
/**
@@ -639,7 +643,7 @@ static void ecryptfs_kill_block_super(struct super_block *sb)
static struct file_system_type ecryptfs_fs_type = {
.owner = THIS_MODULE,
.name = "ecryptfs",
- .get_sb = ecryptfs_get_sb,
+ .mount = ecryptfs_mount,
.kill_sb = ecryptfs_kill_block_super,
.fs_flags = 0
};
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index f7fc286a3aa9..253732382d37 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -180,6 +180,8 @@ static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt)
seq_printf(m, ",ecryptfs_encrypted_view");
if (mount_crypt_stat->flags & ECRYPTFS_UNLINK_SIGS)
seq_printf(m, ",ecryptfs_unlink_sigs");
+ if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY)
+ seq_printf(m, ",ecryptfs_mount_auth_tok_only");
return 0;
}
diff --git a/fs/efs/super.c b/fs/efs/super.c
index f04942810818..5073a07652cc 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -20,16 +20,16 @@
static int efs_statfs(struct dentry *dentry, struct kstatfs *buf);
static int efs_fill_super(struct super_block *s, void *d, int silent);
-static int efs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *efs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, efs_fill_super, mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, efs_fill_super);
}
static struct file_system_type efs_fs_type = {
.owner = THIS_MODULE,
.name = "efs",
- .get_sb = efs_get_sb,
+ .mount = efs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 256bb7bb102a..8cf07242067d 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -77,9 +77,6 @@
/* Maximum number of nesting allowed inside epoll sets */
#define EP_MAX_NESTS 4
-/* Maximum msec timeout value storeable in a long int */
-#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
-
#define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
#define EP_UNACTIVE_PTR ((void *) -1L)
@@ -1117,18 +1114,22 @@ static int ep_send_events(struct eventpoll *ep,
static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
int maxevents, long timeout)
{
- int res, eavail;
+ int res, eavail, timed_out = 0;
unsigned long flags;
- long jtimeout;
+ long slack;
wait_queue_t wait;
-
- /*
- * Calculate the timeout by checking for the "infinite" value (-1)
- * and the overflow condition. The passed timeout is in milliseconds,
- * that why (t * HZ) / 1000.
- */
- jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ?
- MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000;
+ struct timespec end_time;
+ ktime_t expires, *to = NULL;
+
+ if (timeout > 0) {
+ ktime_get_ts(&end_time);
+ timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC);
+ slack = select_estimate_accuracy(&end_time);
+ to = &expires;
+ *to = timespec_to_ktime(end_time);
+ } else if (timeout == 0) {
+ timed_out = 1;
+ }
retry:
spin_lock_irqsave(&ep->lock, flags);
@@ -1150,7 +1151,7 @@ retry:
* to TASK_INTERRUPTIBLE before doing the checks.
*/
set_current_state(TASK_INTERRUPTIBLE);
- if (!list_empty(&ep->rdllist) || !jtimeout)
+ if (!list_empty(&ep->rdllist) || timed_out)
break;
if (signal_pending(current)) {
res = -EINTR;
@@ -1158,7 +1159,9 @@ retry:
}
spin_unlock_irqrestore(&ep->lock, flags);
- jtimeout = schedule_timeout(jtimeout);
+ if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
+ timed_out = 1;
+
spin_lock_irqsave(&ep->lock, flags);
}
__remove_wait_queue(&ep->wq, &wait);
@@ -1176,7 +1179,7 @@ retry:
* more luck.
*/
if (!res && eavail &&
- !(res = ep_send_events(ep, events, maxevents)) && jtimeout)
+ !(res = ep_send_events(ep, events, maxevents)) && !timed_out)
goto retry;
return res;
diff --git a/fs/exec.c b/fs/exec.c
index 3aa75b8888a1..99d33a1371e9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -66,6 +66,12 @@ char core_pattern[CORENAME_MAX_SIZE] = "core";
unsigned int core_pipe_limit;
int suid_dumpable = 0;
+struct core_name {
+ char *corename;
+ int used, size;
+};
+static atomic_t call_count = ATOMIC_INIT(1);
+
/* The maximal length of core_pattern is also specified in sysctl.c */
static LIST_HEAD(formats);
@@ -1003,7 +1009,7 @@ int flush_old_exec(struct linux_binprm * bprm)
bprm->mm = NULL; /* We're using it now */
- current->flags &= ~PF_RANDOMIZE;
+ current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD);
flush_thread();
current->personality &= ~bprm->per_clear;
@@ -1083,14 +1089,14 @@ EXPORT_SYMBOL(setup_new_exec);
*/
int prepare_bprm_creds(struct linux_binprm *bprm)
{
- if (mutex_lock_interruptible(&current->cred_guard_mutex))
+ if (mutex_lock_interruptible(&current->signal->cred_guard_mutex))
return -ERESTARTNOINTR;
bprm->cred = prepare_exec_creds();
if (likely(bprm->cred))
return 0;
- mutex_unlock(&current->cred_guard_mutex);
+ mutex_unlock(&current->signal->cred_guard_mutex);
return -ENOMEM;
}
@@ -1098,7 +1104,7 @@ void free_bprm(struct linux_binprm *bprm)
{
free_arg_pages(bprm);
if (bprm->cred) {
- mutex_unlock(&current->cred_guard_mutex);
+ mutex_unlock(&current->signal->cred_guard_mutex);
abort_creds(bprm->cred);
}
kfree(bprm);
@@ -1119,13 +1125,13 @@ void install_exec_creds(struct linux_binprm *bprm)
* credentials; any time after this it may be unlocked.
*/
security_bprm_committed_creds(bprm);
- mutex_unlock(&current->cred_guard_mutex);
+ mutex_unlock(&current->signal->cred_guard_mutex);
}
EXPORT_SYMBOL(install_exec_creds);
/*
* determine how safe it is to execute the proposed program
- * - the caller must hold current->cred_guard_mutex to protect against
+ * - the caller must hold ->cred_guard_mutex to protect against
* PTRACE_ATTACH
*/
int check_unsafe_exec(struct linux_binprm *bprm)
@@ -1406,7 +1412,6 @@ int do_execve(const char * filename,
if (retval < 0)
goto out;
- current->flags &= ~PF_KTHREAD;
retval = search_binary_handler(bprm,regs);
if (retval < 0)
goto out;
@@ -1459,127 +1464,148 @@ void set_binfmt(struct linux_binfmt *new)
EXPORT_SYMBOL(set_binfmt);
+static int expand_corename(struct core_name *cn)
+{
+ char *old_corename = cn->corename;
+
+ cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
+ cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
+
+ if (!cn->corename) {
+ kfree(old_corename);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int cn_printf(struct core_name *cn, const char *fmt, ...)
+{
+ char *cur;
+ int need;
+ int ret;
+ va_list arg;
+
+ va_start(arg, fmt);
+ need = vsnprintf(NULL, 0, fmt, arg);
+ va_end(arg);
+
+ if (likely(need < cn->size - cn->used - 1))
+ goto out_printf;
+
+ ret = expand_corename(cn);
+ if (ret)
+ goto expand_fail;
+
+out_printf:
+ cur = cn->corename + cn->used;
+ va_start(arg, fmt);
+ vsnprintf(cur, need + 1, fmt, arg);
+ va_end(arg);
+ cn->used += need;
+ return 0;
+
+expand_fail:
+ return ret;
+}
+
/* format_corename will inspect the pattern parameter, and output a
* name into corename, which must have space for at least
* CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
*/
-static int format_corename(char *corename, long signr)
+static int format_corename(struct core_name *cn, long signr)
{
const struct cred *cred = current_cred();
const char *pat_ptr = core_pattern;
int ispipe = (*pat_ptr == '|');
- char *out_ptr = corename;
- char *const out_end = corename + CORENAME_MAX_SIZE;
- int rc;
int pid_in_pattern = 0;
+ int err = 0;
+
+ cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
+ cn->corename = kmalloc(cn->size, GFP_KERNEL);
+ cn->used = 0;
+
+ if (!cn->corename)
+ return -ENOMEM;
/* Repeat as long as we have more pattern to process and more output
space */
while (*pat_ptr) {
if (*pat_ptr != '%') {
- if (out_ptr == out_end)
+ if (*pat_ptr == 0)
goto out;
- *out_ptr++ = *pat_ptr++;
+ err = cn_printf(cn, "%c", *pat_ptr++);
} else {
switch (*++pat_ptr) {
+ /* single % at the end, drop that */
case 0:
goto out;
/* Double percent, output one percent */
case '%':
- if (out_ptr == out_end)
- goto out;
- *out_ptr++ = '%';
+ err = cn_printf(cn, "%c", '%');
break;
/* pid */
case 'p':
pid_in_pattern = 1;
- rc = snprintf(out_ptr, out_end - out_ptr,
- "%d", task_tgid_vnr(current));
- if (rc > out_end - out_ptr)
- goto out;
- out_ptr += rc;
+ err = cn_printf(cn, "%d",
+ task_tgid_vnr(current));
break;
/* uid */
case 'u':
- rc = snprintf(out_ptr, out_end - out_ptr,
- "%d", cred->uid);
- if (rc > out_end - out_ptr)
- goto out;
- out_ptr += rc;
+ err = cn_printf(cn, "%d", cred->uid);
break;
/* gid */
case 'g':
- rc = snprintf(out_ptr, out_end - out_ptr,
- "%d", cred->gid);
- if (rc > out_end - out_ptr)
- goto out;
- out_ptr += rc;
+ err = cn_printf(cn, "%d", cred->gid);
break;
/* signal that caused the coredump */
case 's':
- rc = snprintf(out_ptr, out_end - out_ptr,
- "%ld", signr);
- if (rc > out_end - out_ptr)
- goto out;
- out_ptr += rc;
+ err = cn_printf(cn, "%ld", signr);
break;
/* UNIX time of coredump */
case 't': {
struct timeval tv;
do_gettimeofday(&tv);
- rc = snprintf(out_ptr, out_end - out_ptr,
- "%lu", tv.tv_sec);
- if (rc > out_end - out_ptr)
- goto out;
- out_ptr += rc;
+ err = cn_printf(cn, "%lu", tv.tv_sec);
break;
}
/* hostname */
case 'h':
down_read(&uts_sem);
- rc = snprintf(out_ptr, out_end - out_ptr,
- "%s", utsname()->nodename);
+ err = cn_printf(cn, "%s",
+ utsname()->nodename);
up_read(&uts_sem);
- if (rc > out_end - out_ptr)
- goto out;
- out_ptr += rc;
break;
/* executable */
case 'e':
- rc = snprintf(out_ptr, out_end - out_ptr,
- "%s", current->comm);
- if (rc > out_end - out_ptr)
- goto out;
- out_ptr += rc;
+ err = cn_printf(cn, "%s", current->comm);
break;
/* core limit size */
case 'c':
- rc = snprintf(out_ptr, out_end - out_ptr,
- "%lu", rlimit(RLIMIT_CORE));
- if (rc > out_end - out_ptr)
- goto out;
- out_ptr += rc;
+ err = cn_printf(cn, "%lu",
+ rlimit(RLIMIT_CORE));
break;
default:
break;
}
++pat_ptr;
}
+
+ if (err)
+ return err;
}
+
/* Backward compatibility with core_uses_pid:
*
* If core_pattern does not include a %p (as is the default)
* and core_uses_pid is set, then .%pid will be appended to
* the filename. Do not do this for piped commands. */
if (!ispipe && !pid_in_pattern && core_uses_pid) {
- rc = snprintf(out_ptr, out_end - out_ptr,
- ".%d", task_tgid_vnr(current));
- if (rc > out_end - out_ptr)
- goto out;
- out_ptr += rc;
+ err = cn_printf(cn, ".%d", task_tgid_vnr(current));
+ if (err)
+ return err;
}
out:
- *out_ptr = 0;
return ispipe;
}
@@ -1856,7 +1882,7 @@ static int umh_pipe_setup(struct subprocess_info *info)
void do_coredump(long signr, int exit_code, struct pt_regs *regs)
{
struct core_state core_state;
- char corename[CORENAME_MAX_SIZE + 1];
+ struct core_name cn;
struct mm_struct *mm = current->mm;
struct linux_binfmt * binfmt;
const struct cred *old_cred;
@@ -1911,7 +1937,13 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
*/
clear_thread_flag(TIF_SIGPENDING);
- ispipe = format_corename(corename, signr);
+ ispipe = format_corename(&cn, signr);
+
+ if (ispipe == -ENOMEM) {
+ printk(KERN_WARNING "format_corename failed\n");
+ printk(KERN_WARNING "Aborting core\n");
+ goto fail_corename;
+ }
if (ispipe) {
int dump_count;
@@ -1948,7 +1980,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
goto fail_dropcount;
}
- helper_argv = argv_split(GFP_KERNEL, corename+1, NULL);
+ helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
if (!helper_argv) {
printk(KERN_WARNING "%s failed to allocate memory\n",
__func__);
@@ -1961,7 +1993,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
argv_free(helper_argv);
if (retval) {
printk(KERN_INFO "Core dump to %s pipe failed\n",
- corename);
+ cn.corename);
goto close_fail;
}
} else {
@@ -1970,7 +2002,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
if (cprm.limit < binfmt->min_coredump)
goto fail_unlock;
- cprm.file = filp_open(corename,
+ cprm.file = filp_open(cn.corename,
O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
0600);
if (IS_ERR(cprm.file))
@@ -2012,6 +2044,8 @@ fail_dropcount:
if (ispipe)
atomic_dec(&core_dump_count);
fail_unlock:
+ kfree(cn.corename);
+fail_corename:
coredump_finish(mm);
revert_creds(old_cred);
fail_creds:
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 047e92fa3af8..79c3ae6e0456 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -659,19 +659,19 @@ free_bdi:
/*
* Set up the superblock (calls exofs_fill_super eventually)
*/
-static int exofs_get_sb(struct file_system_type *type,
+static struct dentry *exofs_mount(struct file_system_type *type,
int flags, const char *dev_name,
- void *data, struct vfsmount *mnt)
+ void *data)
{
struct exofs_mountopt opts;
int ret;
ret = parse_options(data, &opts);
if (ret)
- return ret;
+ return ERR_PTR(ret);
opts.dev_name = dev_name;
- return get_sb_nodev(type, flags, &opts, exofs_fill_super, mnt);
+ return mount_nodev(type, flags, &opts, exofs_fill_super);
}
/*
@@ -809,7 +809,7 @@ static const struct export_operations exofs_export_ops = {
static struct file_system_type exofs_type = {
.owner = THIS_MODULE,
.name = "exofs",
- .get_sb = exofs_get_sb,
+ .mount = exofs_mount,
.kill_sb = generic_shutdown_super,
};
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index c6c684b44ea1..0d06f4e75699 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -646,10 +646,9 @@ find_next_usable_block(int start, struct buffer_head *bh, int maxblocks)
return here;
}
-/*
+/**
* ext2_try_to_allocate()
* @sb: superblock
- * @handle: handle to this transaction
* @group: given allocation block group
* @bitmap_bh: bufferhead holds the block bitmap
* @grp_goal: given target block within the group
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 0901320671da..d89e0b6a2d78 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1356,10 +1356,10 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
return 0;
}
-static int ext2_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ext2_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super, mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, ext2_fill_super);
}
#ifdef CONFIG_QUOTA
@@ -1473,7 +1473,7 @@ out:
static struct file_system_type ext2_fs_type = {
.owner = THIS_MODULE,
.name = "ext2",
- .get_sb = ext2_get_sb,
+ .mount = ext2_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 4a32511f4ded..b3db22649426 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -792,9 +792,9 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
if (here < 0)
here = 0;
- p = ((char *)bh->b_data) + (here >> 3);
+ p = bh->b_data + (here >> 3);
r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3));
- next = (r - ((char *)bh->b_data)) << 3;
+ next = (r - bh->b_data) << 3;
if (next < maxblocks && next >= start && ext3_test_allocatable(next, bh))
return next;
@@ -810,8 +810,9 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
/**
* claim_block()
+ * @lock: the spin lock for this block group
* @block: the free block (group relative) to allocate
- * @bh: the bufferhead containts the block group bitmap
+ * @bh: the buffer_head contains the block group bitmap
*
* We think we can allocate this block in this bitmap. Try to set the bit.
* If that succeeds then check that nobody has allocated and then freed the
@@ -956,9 +957,11 @@ fail_access:
* but we will shift to the place where start_block is,
* then start from there, when looking for a reservable space.
*
- * @size: the target new reservation window size
+ * @my_rsv: the reservation window
*
- * @group_first_block: the first block we consider to start
+ * @sb: the super block
+ *
+ * @start_block: the first block we consider to start
* the real search from
*
* @last_block:
@@ -1084,7 +1087,7 @@ static int find_next_reservable_window(
*
* failed: we failed to find a reservation window in this group
*
- * @rsv: the reservation
+ * @my_rsv: the reservation window
*
* @grp_goal: The goal (group-relative). It is where the search for a
* free reservable space should start from.
@@ -1273,8 +1276,8 @@ static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
* @group: given allocation block group
* @bitmap_bh: bufferhead holds the block bitmap
* @grp_goal: given target block within the group
- * @count: target number of blocks to allocate
* @my_rsv: reservation window
+ * @count: target number of blocks to allocate
* @errp: pointer to store the error code
*
* This is the main function used to allocate a new block and its reservation
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 4ab72db3559e..9724aef22460 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -570,9 +570,14 @@ got:
ei->i_state_flags = 0;
ext3_set_inode_state(inode, EXT3_STATE_NEW);
- ei->i_extra_isize =
- (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
- sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
+ /* See comment in ext3_iget for explanation */
+ if (ino >= EXT3_FIRST_INO(sb) + 1 &&
+ EXT3_INODE_SIZE(sb) > EXT3_GOOD_OLD_INODE_SIZE) {
+ ei->i_extra_isize =
+ sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE;
+ } else {
+ ei->i_extra_isize = 0;
+ }
ret = inode;
dquot_initialize(inode);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ad05353040a1..a9580617edd2 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -498,7 +498,7 @@ static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block,
}
/**
- * ext3_blks_to_allocate: Look up the block map and count the number
+ * ext3_blks_to_allocate - Look up the block map and count the number
* of direct blocks need to be allocated for the given branch.
*
* @branch: chain of indirect blocks
@@ -536,14 +536,18 @@ static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
}
/**
- * ext3_alloc_blocks: multiple allocate blocks needed for a branch
+ * ext3_alloc_blocks - multiple allocate blocks needed for a branch
+ * @handle: handle for this transaction
+ * @inode: owner
+ * @goal: preferred place for allocation
* @indirect_blks: the number of blocks need to allocate for indirect
* blocks
- *
+ * @blks: number of blocks need to allocated for direct blocks
* @new_blocks: on return it will store the new block numbers for
* the indirect blocks(if needed) and the first direct block,
- * @blks: on return it will store the total number of allocated
- * direct blocks
+ * @err: here we store the error value
+ *
+ * return the number of direct blocks allocated
*/
static int ext3_alloc_blocks(handle_t *handle, struct inode *inode,
ext3_fsblk_t goal, int indirect_blks, int blks,
@@ -598,9 +602,11 @@ failed_out:
/**
* ext3_alloc_branch - allocate and set up a chain of blocks.
+ * @handle: handle for this transaction
* @inode: owner
* @indirect_blks: number of allocated indirect blocks
* @blks: number of allocated direct blocks
+ * @goal: preferred place for allocation
* @offsets: offsets (in the blocks) to store the pointers to next.
* @branch: place to store the chain in.
*
@@ -700,10 +706,9 @@ failed:
/**
* ext3_splice_branch - splice the allocated branch onto inode.
+ * @handle: handle for this transaction
* @inode: owner
* @block: (logical) number of block we are adding
- * @chain: chain of indirect blocks (with a missing link - see
- * ext3_alloc_branch)
* @where: location of missing link
* @num: number of indirect blocks we are adding
* @blks: number of direct blocks we are adding
@@ -2530,7 +2535,6 @@ void ext3_truncate(struct inode *inode)
*/
} else {
/* Shared branch grows from an indirect block */
- BUFFER_TRACE(partial->bh, "get_write_access");
ext3_free_branches(handle, inode, partial->bh,
partial->p,
partial->p+1, (chain+n-1) - partial);
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 0ccd7b12b73c..e746d30b1232 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -977,7 +977,8 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
o_blocks_count = le32_to_cpu(es->s_blocks_count);
if (test_opt(sb, DEBUG))
- printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK" uto "E3FSBLK" blocks\n",
+ printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK
+ " upto "E3FSBLK" blocks\n",
o_blocks_count, n_blocks_count);
if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
@@ -985,7 +986,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
printk(KERN_ERR "EXT3-fs: filesystem on %s:"
- " too large to resize to %lu blocks safely\n",
+ " too large to resize to "E3FSBLK" blocks safely\n",
sb->s_id, n_blocks_count);
if (sizeof(sector_t) < 8)
ext3_warning(sb, __func__,
@@ -1065,11 +1066,11 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
- ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
- o_blocks_count + add);
+ ext3_debug("freeing blocks "E3FSBLK" through "E3FSBLK"\n",
+ o_blocks_count, o_blocks_count + add);
ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
- ext3_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n", o_blocks_count,
- o_blocks_count + add);
+ ext3_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n",
+ o_blocks_count, o_blocks_count + add);
if ((err = ext3_journal_stop(handle)))
goto exit_put;
if (test_opt(sb, DEBUG))
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 377768009106..2fedaf8b5012 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1301,9 +1301,9 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
ext3_msg(sb, KERN_WARNING,
"warning: mounting fs with errors, "
"running e2fsck is recommended");
- else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
+ else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
le16_to_cpu(es->s_mnt_count) >=
- (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
+ le16_to_cpu(es->s_max_mnt_count))
ext3_msg(sb, KERN_WARNING,
"warning: maximal mount count reached, "
"running e2fsck is recommended");
@@ -1320,7 +1320,7 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
valid forever! :) */
es->s_state &= cpu_to_le16(~EXT3_VALID_FS);
#endif
- if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
+ if (!le16_to_cpu(es->s_max_mnt_count))
es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
le16_add_cpu(&es->s_mnt_count, 1);
es->s_mtime = cpu_to_le32(get_seconds());
@@ -1647,7 +1647,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
* Note: s_es must be initialized as soon as possible because
* some ext3 macro-instructions depend on its value
*/
- es = (struct ext3_super_block *) (((char *)bh->b_data) + offset);
+ es = (struct ext3_super_block *) (bh->b_data + offset);
sbi->s_es = es;
sb->s_magic = le16_to_cpu(es->s_magic);
if (sb->s_magic != EXT3_SUPER_MAGIC)
@@ -1758,7 +1758,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
"error: can't read superblock on 2nd try");
goto failed_mount;
}
- es = (struct ext3_super_block *)(((char *)bh->b_data) + offset);
+ es = (struct ext3_super_block *)(bh->b_data + offset);
sbi->s_es = es;
if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
ext3_msg(sb, KERN_ERR,
@@ -1857,13 +1857,13 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
le32_to_cpu(es->s_first_data_block) - 1)
/ EXT3_BLOCKS_PER_GROUP(sb)) + 1;
- db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) /
- EXT3_DESC_PER_BLOCK(sb);
+ db_count = DIV_ROUND_UP(sbi->s_groups_count, EXT3_DESC_PER_BLOCK(sb));
sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
GFP_KERNEL);
if (sbi->s_group_desc == NULL) {
ext3_msg(sb, KERN_ERR,
"error: not enough memory");
+ ret = -ENOMEM;
goto failed_mount;
}
@@ -1951,6 +1951,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
}
if (err) {
ext3_msg(sb, KERN_ERR, "error: insufficient memory");
+ ret = err;
goto failed_mount3;
}
@@ -2159,7 +2160,7 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
goto out_bdev;
}
- es = (struct ext3_super_block *) (((char *)bh->b_data) + offset);
+ es = (struct ext3_super_block *) (bh->b_data + offset);
if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) ||
!(le32_to_cpu(es->s_feature_incompat) &
EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
@@ -2352,6 +2353,21 @@ static int ext3_commit_super(struct super_block *sb,
if (!sbh)
return error;
+
+ if (buffer_write_io_error(sbh)) {
+ /*
+ * Oh, dear. A previous attempt to write the
+ * superblock failed. This could happen because the
+ * USB device was yanked out. Or it could happen to
+ * be a transient write error and maybe the block will
+ * be remapped. Nothing we can do but to retry the
+ * write and hope for the best.
+ */
+ ext3_msg(sb, KERN_ERR, "previous I/O error to "
+ "superblock detected");
+ clear_buffer_write_io_error(sbh);
+ set_buffer_uptodate(sbh);
+ }
/*
* If the file system is mounted read-only, don't update the
* superblock write time. This avoids updating the superblock
@@ -2368,8 +2384,15 @@ static int ext3_commit_super(struct super_block *sb,
es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb));
BUFFER_TRACE(sbh, "marking dirty");
mark_buffer_dirty(sbh);
- if (sync)
+ if (sync) {
error = sync_dirty_buffer(sbh);
+ if (buffer_write_io_error(sbh)) {
+ ext3_msg(sb, KERN_ERR, "I/O error while writing "
+ "superblock");
+ clear_buffer_write_io_error(sbh);
+ set_buffer_uptodate(sbh);
+ }
+ }
return error;
}
@@ -2997,16 +3020,16 @@ out:
#endif
-static int ext3_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ext3_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, ext3_fill_super);
}
static struct file_system_type ext3_fs_type = {
.owner = THIS_MODULE,
.name = "ext3",
- .get_sb = ext3_get_sb,
+ .mount = ext3_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 8867b2a1e5fe..c947e36eda6c 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -4,7 +4,7 @@
obj-$(CONFIG_EXT4_FS) += ext4.o
-ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index bd30799a43ed..14c3af26c671 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -171,7 +171,8 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
* less than the blocksize * 8 ( which is the size
* of bitmap ), set rest of the block bitmap to 1
*/
- mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data);
+ ext4_mark_bitmap_end(group_blocks, sb->s_blocksize * 8,
+ bh->b_data);
}
return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp);
}
@@ -489,7 +490,7 @@ error_return:
* Check if filesystem has nblocks free & available for allocation.
* On success return 1, return 0 on failure.
*/
-int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
+static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
{
s64 free_blocks, dirty_blocks, root_blocks;
struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 3db5084db9bd..fac90f3fba80 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -29,16 +29,15 @@ struct ext4_system_zone {
static struct kmem_cache *ext4_system_zone_cachep;
-int __init init_ext4_system_zone(void)
+int __init ext4_init_system_zone(void)
{
- ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone,
- SLAB_RECLAIM_ACCOUNT);
+ ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, 0);
if (ext4_system_zone_cachep == NULL)
return -ENOMEM;
return 0;
}
-void exit_ext4_system_zone(void)
+void ext4_exit_system_zone(void)
{
kmem_cache_destroy(ext4_system_zone_cachep);
}
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 374510f72baa..ece76fb6a40c 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -39,7 +39,7 @@ static int ext4_release_dir(struct inode *inode,
struct file *filp);
const struct file_operations ext4_dir_operations = {
- .llseek = generic_file_llseek,
+ .llseek = ext4_llseek,
.read = generic_read_dir,
.readdir = ext4_readdir, /* we take BKL. needed?*/
.unlocked_ioctl = ext4_ioctl,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 889ec9d5e6ad..6a5edea2d70b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -168,7 +168,20 @@ struct mpage_da_data {
int pages_written;
int retval;
};
-#define EXT4_IO_UNWRITTEN 0x1
+
+/*
+ * Flags for ext4_io_end->flags
+ */
+#define EXT4_IO_END_UNWRITTEN 0x0001
+#define EXT4_IO_END_ERROR 0x0002
+
+struct ext4_io_page {
+ struct page *p_page;
+ atomic_t p_count;
+};
+
+#define MAX_IO_PAGES 128
+
typedef struct ext4_io_end {
struct list_head list; /* per-file finished IO list */
struct inode *inode; /* file being written to */
@@ -179,8 +192,18 @@ typedef struct ext4_io_end {
struct work_struct work; /* data work queue */
struct kiocb *iocb; /* iocb struct for AIO */
int result; /* error value for AIO */
+ int num_io_pages;
+ struct ext4_io_page *pages[MAX_IO_PAGES];
} ext4_io_end_t;
+struct ext4_io_submit {
+ int io_op;
+ struct bio *io_bio;
+ ext4_io_end_t *io_end;
+ struct ext4_io_page *io_page;
+ sector_t io_next_block;
+};
+
/*
* Special inodes numbers
*/
@@ -205,6 +228,7 @@ typedef struct ext4_io_end {
#define EXT4_MIN_BLOCK_SIZE 1024
#define EXT4_MAX_BLOCK_SIZE 65536
#define EXT4_MIN_BLOCK_LOG_SIZE 10
+#define EXT4_MAX_BLOCK_LOG_SIZE 16
#ifdef __KERNEL__
# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize)
#else
@@ -834,6 +858,7 @@ struct ext4_inode_info {
spinlock_t i_completed_io_lock;
/* current io_end structure for async DIO write*/
ext4_io_end_t *cur_aio_dio;
+ atomic_t i_ioend_count; /* Number of outstanding io_end structs */
/*
* Transactions that contain inode's metadata needed to complete
@@ -889,6 +914,7 @@ struct ext4_inode_info {
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
+#define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */
#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
@@ -1087,7 +1113,6 @@ struct ext4_sb_info {
struct completion s_kobj_unregister;
/* Journaling */
- struct inode *s_journal_inode;
struct journal_s *s_journal;
struct list_head s_orphan;
struct mutex s_orphan_lock;
@@ -1120,10 +1145,7 @@ struct ext4_sb_info {
/* for buddy allocator */
struct ext4_group_info ***s_group_info;
struct inode *s_buddy_cache;
- long s_blocks_reserved;
- spinlock_t s_reserve_lock;
spinlock_t s_md_lock;
- tid_t s_last_transaction;
unsigned short *s_mb_offsets;
unsigned int *s_mb_maxs;
@@ -1141,7 +1163,6 @@ struct ext4_sb_info {
unsigned long s_mb_last_start;
/* stats for buddy allocator */
- spinlock_t s_mb_pa_lock;
atomic_t s_bal_reqs; /* number of reqs with len > 1 */
atomic_t s_bal_success; /* we found long enough chunks */
atomic_t s_bal_allocated; /* in blocks */
@@ -1172,6 +1193,11 @@ struct ext4_sb_info {
/* timer for periodic error stats printing */
struct timer_list s_err_report;
+
+ /* Lazy inode table initialization info */
+ struct ext4_li_request *s_li_request;
+ /* Wait multiplier for lazy initialization thread */
+ unsigned int s_li_wait_mult;
};
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1533,7 +1559,42 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp);
-extern struct proc_dir_entry *ext4_proc_root;
+/*
+ * Timeout and state flag for lazy initialization inode thread.
+ */
+#define EXT4_DEF_LI_WAIT_MULT 10
+#define EXT4_DEF_LI_MAX_START_DELAY 5
+#define EXT4_LAZYINIT_QUIT 0x0001
+#define EXT4_LAZYINIT_RUNNING 0x0002
+
+/*
+ * Lazy inode table initialization info
+ */
+struct ext4_lazy_init {
+ unsigned long li_state;
+
+ wait_queue_head_t li_wait_daemon;
+ wait_queue_head_t li_wait_task;
+ struct timer_list li_timer;
+ struct task_struct *li_task;
+
+ struct list_head li_request_list;
+ struct mutex li_list_mtx;
+};
+
+struct ext4_li_request {
+ struct super_block *lr_super;
+ struct ext4_sb_info *lr_sbi;
+ ext4_group_t lr_next_group;
+ struct list_head lr_request;
+ unsigned long lr_next_sched;
+ unsigned long lr_timeout;
+};
+
+struct ext4_features {
+ struct kobject f_kobj;
+ struct completion f_kobj_unregister;
+};
/*
* Function prototypes
@@ -1561,7 +1622,6 @@ extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, unsigned long *count, int *errp);
extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
-extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
ext4_fsblk_t block, unsigned long count);
extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
@@ -1605,11 +1665,9 @@ extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
extern unsigned long ext4_count_free_inodes(struct super_block *);
extern unsigned long ext4_count_dirs(struct super_block *);
extern void ext4_check_inodes_bitmap(struct super_block *);
-extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
- struct buffer_head *bh,
- ext4_group_t group,
- struct ext4_group_desc *desc);
-extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
+extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
+extern int ext4_init_inode_table(struct super_block *sb,
+ ext4_group_t group, int barrier);
/* mballoc.c */
extern long ext4_mb_stats;
@@ -1620,16 +1678,15 @@ extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
struct ext4_allocation_request *, int *);
extern int ext4_mb_reserve_blocks(struct super_block *, int);
extern void ext4_discard_preallocations(struct inode *);
-extern int __init init_ext4_mballoc(void);
-extern void exit_ext4_mballoc(void);
+extern int __init ext4_init_mballoc(void);
+extern void ext4_exit_mballoc(void);
extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t block,
unsigned long count, int flags);
extern int ext4_mb_add_groupinfo(struct super_block *sb,
ext4_group_t i, struct ext4_group_desc *desc);
-extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
-extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
- ext4_group_t, int);
+extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
+
/* inode.c */
struct buffer_head *ext4_getblk(handle_t *, struct inode *,
ext4_lblk_t, int, int *);
@@ -1657,13 +1714,11 @@ extern void ext4_get_inode_flags(struct ext4_inode_info *);
extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *);
-extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_block_truncate_page(handle_t *handle,
struct address_space *mapping, loff_t from);
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
-extern int flush_completed_IO(struct inode *inode);
extern void ext4_da_update_reserve_space(struct inode *inode,
int used, int quota_claim);
/* ioctl.c */
@@ -1960,6 +2015,7 @@ extern const struct file_operations ext4_dir_operations;
/* file.c */
extern const struct inode_operations ext4_file_inode_operations;
extern const struct file_operations ext4_file_operations;
+extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
/* namei.c */
extern const struct inode_operations ext4_dir_inode_operations;
@@ -1973,8 +2029,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations;
/* block_validity */
extern void ext4_release_system_zone(struct super_block *sb);
extern int ext4_setup_system_zone(struct super_block *sb);
-extern int __init init_ext4_system_zone(void);
-extern void exit_ext4_system_zone(void);
+extern int __init ext4_init_system_zone(void);
+extern void ext4_exit_system_zone(void);
extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
ext4_fsblk_t start_blk,
unsigned int count);
@@ -2002,6 +2058,18 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
__u64 start_orig, __u64 start_donor,
__u64 len, __u64 *moved_len);
+/* page-io.c */
+extern int __init ext4_init_pageio(void);
+extern void ext4_exit_pageio(void);
+extern void ext4_ioend_wait(struct inode *);
+extern void ext4_free_io_end(ext4_io_end_t *io);
+extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
+extern int ext4_end_io_nolock(ext4_io_end_t *io);
+extern void ext4_io_submit(struct ext4_io_submit *io);
+extern int ext4_bio_write_page(struct ext4_io_submit *io,
+ struct page *page,
+ int len,
+ struct writeback_control *wbc);
/* BH_Uninit flag: blocks are allocated but uninitialized on disk */
enum ext4_state_bits {
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index bdb6ce7e2eb4..28ce70fd9cd0 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -225,11 +225,60 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext)
ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
}
+/*
+ * ext4_ext_pblock:
+ * combine low and high parts of physical block number into ext4_fsblk_t
+ */
+static inline ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
+{
+ ext4_fsblk_t block;
+
+ block = le32_to_cpu(ex->ee_start_lo);
+ block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
+ return block;
+}
+
+/*
+ * ext4_idx_pblock:
+ * combine low and high parts of a leaf physical block number into ext4_fsblk_t
+ */
+static inline ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_idx *ix)
+{
+ ext4_fsblk_t block;
+
+ block = le32_to_cpu(ix->ei_leaf_lo);
+ block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
+ return block;
+}
+
+/*
+ * ext4_ext_store_pblock:
+ * stores a large physical block number into an extent struct,
+ * breaking it into parts
+ */
+static inline void ext4_ext_store_pblock(struct ext4_extent *ex,
+ ext4_fsblk_t pb)
+{
+ ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
+ ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) &
+ 0xffff);
+}
+
+/*
+ * ext4_idx_store_pblock:
+ * stores a large physical block number into an index struct,
+ * breaking it into parts
+ */
+static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix,
+ ext4_fsblk_t pb)
+{
+ ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
+ ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) &
+ 0xffff);
+}
+
extern int ext4_ext_calc_metadata_amount(struct inode *inode,
sector_t lblocks);
-extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
-extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
-extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
extern int ext4_extent_tree_init(handle_t *, struct inode *);
extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
int num,
@@ -237,19 +286,9 @@ extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
extern int ext4_can_extents_be_merged(struct inode *inode,
struct ext4_extent *ex1,
struct ext4_extent *ex2);
-extern int ext4_ext_try_to_merge(struct inode *inode,
- struct ext4_ext_path *path,
- struct ext4_extent *);
-extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *, int);
-extern int ext4_ext_walk_space(struct inode *, ext4_lblk_t, ext4_lblk_t,
- ext_prepare_callback, void *);
extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
struct ext4_ext_path *);
-extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
- ext4_lblk_t *, ext4_fsblk_t *);
-extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
- ext4_lblk_t *, ext4_fsblk_t *);
extern void ext4_ext_drop_refs(struct ext4_ext_path *);
extern int ext4_ext_check_inode(struct inode *inode);
#endif /* _EXT4_EXTENTS */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 06328d3e5717..0554c48cb1fd 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -44,55 +44,6 @@
#include "ext4_jbd2.h"
#include "ext4_extents.h"
-
-/*
- * ext_pblock:
- * combine low and high parts of physical block number into ext4_fsblk_t
- */
-ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
-{
- ext4_fsblk_t block;
-
- block = le32_to_cpu(ex->ee_start_lo);
- block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
- return block;
-}
-
-/*
- * idx_pblock:
- * combine low and high parts of a leaf physical block number into ext4_fsblk_t
- */
-ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
-{
- ext4_fsblk_t block;
-
- block = le32_to_cpu(ix->ei_leaf_lo);
- block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
- return block;
-}
-
-/*
- * ext4_ext_store_pblock:
- * stores a large physical block number into an extent struct,
- * breaking it into parts
- */
-void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
-{
- ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
- ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
-}
-
-/*
- * ext4_idx_store_pblock:
- * stores a large physical block number into an index struct,
- * breaking it into parts
- */
-static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
-{
- ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
- ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
-}
-
static int ext4_ext_truncate_extend_restart(handle_t *handle,
struct inode *inode,
int needed)
@@ -169,7 +120,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
/* try to predict block placement */
ex = path[depth].p_ext;
if (ex)
- return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block));
+ return (ext4_ext_pblock(ex) +
+ (block - le32_to_cpu(ex->ee_block)));
/* it looks like index is empty;
* try to find starting block from index itself */
@@ -354,7 +306,7 @@ ext4_ext_max_entries(struct inode *inode, int depth)
static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
{
- ext4_fsblk_t block = ext_pblock(ext);
+ ext4_fsblk_t block = ext4_ext_pblock(ext);
int len = ext4_ext_get_actual_len(ext);
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
@@ -363,7 +315,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
static int ext4_valid_extent_idx(struct inode *inode,
struct ext4_extent_idx *ext_idx)
{
- ext4_fsblk_t block = idx_pblock(ext_idx);
+ ext4_fsblk_t block = ext4_idx_pblock(ext_idx);
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
}
@@ -463,13 +415,13 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
for (k = 0; k <= l; k++, path++) {
if (path->p_idx) {
ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block),
- idx_pblock(path->p_idx));
+ ext4_idx_pblock(path->p_idx));
} else if (path->p_ext) {
ext_debug(" %d:[%d]%d:%llu ",
le32_to_cpu(path->p_ext->ee_block),
ext4_ext_is_uninitialized(path->p_ext),
ext4_ext_get_actual_len(path->p_ext),
- ext_pblock(path->p_ext));
+ ext4_ext_pblock(path->p_ext));
} else
ext_debug(" []");
}
@@ -494,7 +446,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
ext4_ext_is_uninitialized(ex),
- ext4_ext_get_actual_len(ex), ext_pblock(ex));
+ ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
}
ext_debug("\n");
}
@@ -545,7 +497,7 @@ ext4_ext_binsearch_idx(struct inode *inode,
path->p_idx = l - 1;
ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block),
- idx_pblock(path->p_idx));
+ ext4_idx_pblock(path->p_idx));
#ifdef CHECK_BINSEARCH
{
@@ -614,7 +566,7 @@ ext4_ext_binsearch(struct inode *inode,
path->p_ext = l - 1;
ext_debug(" -> %d:%llu:[%d]%d ",
le32_to_cpu(path->p_ext->ee_block),
- ext_pblock(path->p_ext),
+ ext4_ext_pblock(path->p_ext),
ext4_ext_is_uninitialized(path->p_ext),
ext4_ext_get_actual_len(path->p_ext));
@@ -682,7 +634,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
ext4_ext_binsearch_idx(inode, path + ppos, block);
- path[ppos].p_block = idx_pblock(path[ppos].p_idx);
+ path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
path[ppos].p_depth = i;
path[ppos].p_ext = NULL;
@@ -721,7 +673,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
ext4_ext_binsearch(inode, path + ppos, block);
/* if not an empty leaf */
if (path[ppos].p_ext)
- path[ppos].p_block = ext_pblock(path[ppos].p_ext);
+ path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
ext4_ext_show_path(inode, path);
@@ -739,9 +691,9 @@ err:
* insert new index [@logical;@ptr] into the block at @curp;
* check where to insert: before @curp or after @curp
*/
-int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
- struct ext4_ext_path *curp,
- int logical, ext4_fsblk_t ptr)
+static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *curp,
+ int logical, ext4_fsblk_t ptr)
{
struct ext4_extent_idx *ix;
int len, err;
@@ -917,7 +869,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
EXT_MAX_EXTENT(path[depth].p_hdr)) {
ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
le32_to_cpu(path[depth].p_ext->ee_block),
- ext_pblock(path[depth].p_ext),
+ ext4_ext_pblock(path[depth].p_ext),
ext4_ext_is_uninitialized(path[depth].p_ext),
ext4_ext_get_actual_len(path[depth].p_ext),
newblock);
@@ -1007,7 +959,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
ext_debug("%d: move %d:%llu in new index %llu\n", i,
le32_to_cpu(path[i].p_idx->ei_block),
- idx_pblock(path[i].p_idx),
+ ext4_idx_pblock(path[i].p_idx),
newblock);
/*memmove(++fidx, path[i].p_idx++,
sizeof(struct ext4_extent_idx));
@@ -1146,7 +1098,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
- idx_pblock(EXT_FIRST_INDEX(neh)));
+ ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
neh->eh_depth = cpu_to_le16(path->p_depth + 1);
err = ext4_ext_dirty(handle, inode, curp);
@@ -1232,9 +1184,9 @@ out:
* returns 0 at @phys
* return value contains 0 (success) or error code
*/
-int
-ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
- ext4_lblk_t *logical, ext4_fsblk_t *phys)
+static int ext4_ext_search_left(struct inode *inode,
+ struct ext4_ext_path *path,
+ ext4_lblk_t *logical, ext4_fsblk_t *phys)
{
struct ext4_extent_idx *ix;
struct ext4_extent *ex;
@@ -1286,7 +1238,7 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
}
*logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
- *phys = ext_pblock(ex) + ee_len - 1;
+ *phys = ext4_ext_pblock(ex) + ee_len - 1;
return 0;
}
@@ -1297,9 +1249,9 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
* returns 0 at @phys
* return value contains 0 (success) or error code
*/
-int
-ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
- ext4_lblk_t *logical, ext4_fsblk_t *phys)
+static int ext4_ext_search_right(struct inode *inode,
+ struct ext4_ext_path *path,
+ ext4_lblk_t *logical, ext4_fsblk_t *phys)
{
struct buffer_head *bh = NULL;
struct ext4_extent_header *eh;
@@ -1342,7 +1294,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
}
}
*logical = le32_to_cpu(ex->ee_block);
- *phys = ext_pblock(ex);
+ *phys = ext4_ext_pblock(ex);
return 0;
}
@@ -1357,7 +1309,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
/* next allocated block in this leaf */
ex++;
*logical = le32_to_cpu(ex->ee_block);
- *phys = ext_pblock(ex);
+ *phys = ext4_ext_pblock(ex);
return 0;
}
@@ -1376,7 +1328,7 @@ got_index:
* follow it and find the closest allocated
* block to the right */
ix++;
- block = idx_pblock(ix);
+ block = ext4_idx_pblock(ix);
while (++depth < path->p_depth) {
bh = sb_bread(inode->i_sb, block);
if (bh == NULL)
@@ -1388,7 +1340,7 @@ got_index:
return -EIO;
}
ix = EXT_FIRST_INDEX(eh);
- block = idx_pblock(ix);
+ block = ext4_idx_pblock(ix);
put_bh(bh);
}
@@ -1402,7 +1354,7 @@ got_index:
}
ex = EXT_FIRST_EXTENT(eh);
*logical = le32_to_cpu(ex->ee_block);
- *phys = ext_pblock(ex);
+ *phys = ext4_ext_pblock(ex);
put_bh(bh);
return 0;
}
@@ -1573,7 +1525,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
return 0;
#endif
- if (ext_pblock(ex1) + ext1_ee_len == ext_pblock(ex2))
+ if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2))
return 1;
return 0;
}
@@ -1585,9 +1537,9 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
* Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
* 1 if they got merged.
*/
-int ext4_ext_try_to_merge(struct inode *inode,
- struct ext4_ext_path *path,
- struct ext4_extent *ex)
+static int ext4_ext_try_to_merge(struct inode *inode,
+ struct ext4_ext_path *path,
+ struct ext4_extent *ex)
{
struct ext4_extent_header *eh;
unsigned int depth, len;
@@ -1632,9 +1584,9 @@ int ext4_ext_try_to_merge(struct inode *inode,
* such that there will be no overlap, and then returns 1.
* If there is no overlap found, it returns 0.
*/
-unsigned int ext4_ext_check_overlap(struct inode *inode,
- struct ext4_extent *newext,
- struct ext4_ext_path *path)
+static unsigned int ext4_ext_check_overlap(struct inode *inode,
+ struct ext4_extent *newext,
+ struct ext4_ext_path *path)
{
ext4_lblk_t b1, b2;
unsigned int depth, len1;
@@ -1706,11 +1658,12 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)
&& ext4_can_extents_be_merged(inode, ex, newext)) {
ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
- ext4_ext_is_uninitialized(newext),
- ext4_ext_get_actual_len(newext),
- le32_to_cpu(ex->ee_block),
- ext4_ext_is_uninitialized(ex),
- ext4_ext_get_actual_len(ex), ext_pblock(ex));
+ ext4_ext_is_uninitialized(newext),
+ ext4_ext_get_actual_len(newext),
+ le32_to_cpu(ex->ee_block),
+ ext4_ext_is_uninitialized(ex),
+ ext4_ext_get_actual_len(ex),
+ ext4_ext_pblock(ex));
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
return err;
@@ -1780,7 +1733,7 @@ has_space:
/* there is no extent in this leaf, create first one */
ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n",
le32_to_cpu(newext->ee_block),
- ext_pblock(newext),
+ ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext));
path[depth].p_ext = EXT_FIRST_EXTENT(eh);
@@ -1794,7 +1747,7 @@ has_space:
ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, "
"move %d from 0x%p to 0x%p\n",
le32_to_cpu(newext->ee_block),
- ext_pblock(newext),
+ ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext),
nearex, len, nearex + 1, nearex + 2);
@@ -1808,7 +1761,7 @@ has_space:
ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, "
"move %d from 0x%p to 0x%p\n",
le32_to_cpu(newext->ee_block),
- ext_pblock(newext),
+ ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext),
nearex, len, nearex + 1, nearex + 2);
@@ -1819,7 +1772,7 @@ has_space:
le16_add_cpu(&eh->eh_entries, 1);
nearex = path[depth].p_ext;
nearex->ee_block = newext->ee_block;
- ext4_ext_store_pblock(nearex, ext_pblock(newext));
+ ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
nearex->ee_len = newext->ee_len;
merge:
@@ -1845,9 +1798,9 @@ cleanup:
return err;
}
-int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
- ext4_lblk_t num, ext_prepare_callback func,
- void *cbdata)
+static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
+ ext4_lblk_t num, ext_prepare_callback func,
+ void *cbdata)
{
struct ext4_ext_path *path = NULL;
struct ext4_ext_cache cbex;
@@ -1923,7 +1876,7 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
} else {
cbex.ec_block = le32_to_cpu(ex->ee_block);
cbex.ec_len = ext4_ext_get_actual_len(ex);
- cbex.ec_start = ext_pblock(ex);
+ cbex.ec_start = ext4_ext_pblock(ex);
cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
}
@@ -2073,7 +2026,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
/* free index block */
path--;
- leaf = idx_pblock(path->p_idx);
+ leaf = ext4_idx_pblock(path->p_idx);
if (unlikely(path->p_hdr->eh_entries == 0)) {
EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
return -EIO;
@@ -2181,7 +2134,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t start;
num = le32_to_cpu(ex->ee_block) + ee_len - from;
- start = ext_pblock(ex) + ee_len - num;
+ start = ext4_ext_pblock(ex) + ee_len - num;
ext_debug("free last %u blocks starting %llu\n", num, start);
ext4_free_blocks(handle, inode, 0, start, num, flags);
} else if (from == le32_to_cpu(ex->ee_block)
@@ -2310,7 +2263,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
goto out;
ext_debug("new extent: %u:%u:%llu\n", block, num,
- ext_pblock(ex));
+ ext4_ext_pblock(ex));
ex--;
ex_ee_block = le32_to_cpu(ex->ee_block);
ex_ee_len = ext4_ext_get_actual_len(ex);
@@ -2421,9 +2374,9 @@ again:
struct buffer_head *bh;
/* go to the next level */
ext_debug("move to level %d (block %llu)\n",
- i + 1, idx_pblock(path[i].p_idx));
+ i + 1, ext4_idx_pblock(path[i].p_idx));
memset(path + i + 1, 0, sizeof(*path));
- bh = sb_bread(sb, idx_pblock(path[i].p_idx));
+ bh = sb_bread(sb, ext4_idx_pblock(path[i].p_idx));
if (!bh) {
/* should we reset i_size? */
err = -EIO;
@@ -2535,77 +2488,21 @@ void ext4_ext_release(struct super_block *sb)
#endif
}
-static void bi_complete(struct bio *bio, int error)
-{
- complete((struct completion *)bio->bi_private);
-}
-
/* FIXME!! we need to try to merge to left or right after zero-out */
static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
{
+ ext4_fsblk_t ee_pblock;
+ unsigned int ee_len;
int ret;
- struct bio *bio;
- int blkbits, blocksize;
- sector_t ee_pblock;
- struct completion event;
- unsigned int ee_len, len, done, offset;
-
- blkbits = inode->i_blkbits;
- blocksize = inode->i_sb->s_blocksize;
ee_len = ext4_ext_get_actual_len(ex);
- ee_pblock = ext_pblock(ex);
-
- /* convert ee_pblock to 512 byte sectors */
- ee_pblock = ee_pblock << (blkbits - 9);
-
- while (ee_len > 0) {
-
- if (ee_len > BIO_MAX_PAGES)
- len = BIO_MAX_PAGES;
- else
- len = ee_len;
-
- bio = bio_alloc(GFP_NOIO, len);
- if (!bio)
- return -ENOMEM;
-
- bio->bi_sector = ee_pblock;
- bio->bi_bdev = inode->i_sb->s_bdev;
-
- done = 0;
- offset = 0;
- while (done < len) {
- ret = bio_add_page(bio, ZERO_PAGE(0),
- blocksize, offset);
- if (ret != blocksize) {
- /*
- * We can't add any more pages because of
- * hardware limitations. Start a new bio.
- */
- break;
- }
- done++;
- offset += blocksize;
- if (offset >= PAGE_CACHE_SIZE)
- offset = 0;
- }
+ ee_pblock = ext4_ext_pblock(ex);
- init_completion(&event);
- bio->bi_private = &event;
- bio->bi_end_io = bi_complete;
- submit_bio(WRITE, bio);
- wait_for_completion(&event);
+ ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
+ if (ret > 0)
+ ret = 0;
- if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
- bio_put(bio);
- return -EIO;
- }
- bio_put(bio);
- ee_len -= done;
- ee_pblock += done << (blkbits - 9);
- }
- return 0;
+ return ret;
}
#define EXT4_EXT_ZERO_LEN 7
@@ -2651,12 +2548,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex);
allocated = ee_len - (map->m_lblk - ee_block);
- newblock = map->m_lblk - ee_block + ext_pblock(ex);
+ newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
ex2 = ex;
orig_ex.ee_block = ex->ee_block;
orig_ex.ee_len = cpu_to_le16(ee_len);
- ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
+ ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
/*
* It is safe to convert extent to initialized via explicit
@@ -2675,7 +2572,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
/* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zeroed the full extent */
return allocated;
@@ -2710,7 +2607,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ex->ee_block = orig_ex.ee_block;
ex->ee_len = cpu_to_le16(ee_len - allocated);
ext4_ext_mark_uninitialized(ex);
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
ex3 = &newex;
@@ -2725,7 +2622,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
goto fix_extent_len;
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex,
+ ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* blocks available from map->m_lblk */
return allocated;
@@ -2782,7 +2680,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
/* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zeroed the full extent */
/* blocks available from map->m_lblk */
@@ -2833,7 +2731,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
/* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zero out the first half */
/* blocks available from map->m_lblk */
@@ -2902,7 +2800,7 @@ insert:
/* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zero out the first half */
return allocated;
@@ -2915,7 +2813,7 @@ out:
fix_extent_len:
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_mark_uninitialized(ex);
ext4_ext_dirty(handle, inode, path + depth);
return err;
@@ -2973,12 +2871,12 @@ static int ext4_split_unwritten_extents(handle_t *handle,
ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex);
allocated = ee_len - (map->m_lblk - ee_block);
- newblock = map->m_lblk - ee_block + ext_pblock(ex);
+ newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
ex2 = ex;
orig_ex.ee_block = ex->ee_block;
orig_ex.ee_len = cpu_to_le16(ee_len);
- ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
+ ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
/*
* It is safe to convert extent to initialized via explicit
@@ -3027,7 +2925,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
/* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zeroed the full extent */
/* blocks available from map->m_lblk */
@@ -3099,7 +2997,7 @@ insert:
/* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zero out the first half */
return allocated;
@@ -3112,7 +3010,7 @@ out:
fix_extent_len:
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_mark_uninitialized(ex);
ext4_ext_dirty(handle, inode, path + depth);
return err;
@@ -3180,6 +3078,57 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev,
unmap_underlying_metadata(bdev, block + i);
}
+/*
+ * Handle EOFBLOCKS_FL flag, clearing it if necessary
+ */
+static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
+ struct ext4_map_blocks *map,
+ struct ext4_ext_path *path,
+ unsigned int len)
+{
+ int i, depth;
+ struct ext4_extent_header *eh;
+ struct ext4_extent *ex, *last_ex;
+
+ if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
+ return 0;
+
+ depth = ext_depth(inode);
+ eh = path[depth].p_hdr;
+ ex = path[depth].p_ext;
+
+ if (unlikely(!eh->eh_entries)) {
+ EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and "
+ "EOFBLOCKS_FL set");
+ return -EIO;
+ }
+ last_ex = EXT_LAST_EXTENT(eh);
+ /*
+ * We should clear the EOFBLOCKS_FL flag if we are writing the
+ * last block in the last extent in the file. We test this by
+ * first checking to see if the caller to
+ * ext4_ext_get_blocks() was interested in the last block (or
+ * a block beyond the last block) in the current extent. If
+ * this turns out to be false, we can bail out from this
+ * function immediately.
+ */
+ if (map->m_lblk + len < le32_to_cpu(last_ex->ee_block) +
+ ext4_ext_get_actual_len(last_ex))
+ return 0;
+ /*
+ * If the caller does appear to be planning to write at or
+ * beyond the end of the current extent, we then test to see
+ * if the current extent is the last extent in the file, by
+ * checking to make sure it was reached via the rightmost node
+ * at each level of the tree.
+ */
+ for (i = depth-1; i >= 0; i--)
+ if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
+ return 0;
+ ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
+ return ext4_mark_inode_dirty(handle, inode);
+}
+
static int
ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map,
@@ -3206,7 +3155,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
* completed
*/
if (io)
- io->flag = EXT4_IO_UNWRITTEN;
+ io->flag = EXT4_IO_END_UNWRITTEN;
else
ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
if (ext4_should_dioread_nolock(inode))
@@ -3217,8 +3166,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
ret = ext4_convert_unwritten_extents_endio(handle, inode,
path);
- if (ret >= 0)
+ if (ret >= 0) {
ext4_update_inode_fsync_trans(handle, inode, 1);
+ err = check_eofblocks_fl(handle, inode, map, path,
+ map->m_len);
+ } else
+ err = ret;
goto out2;
}
/* buffered IO case */
@@ -3244,8 +3197,13 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
/* buffered write, writepage time, convert*/
ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
- if (ret >= 0)
+ if (ret >= 0) {
ext4_update_inode_fsync_trans(handle, inode, 1);
+ err = check_eofblocks_fl(handle, inode, map, path, map->m_len);
+ if (err < 0)
+ goto out2;
+ }
+
out:
if (ret <= 0) {
err = ret;
@@ -3292,6 +3250,7 @@ out2:
}
return err ? err : allocated;
}
+
/*
* Block allocation/map/preallocation routine for extents based files
*
@@ -3315,9 +3274,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
{
struct ext4_ext_path *path = NULL;
struct ext4_extent_header *eh;
- struct ext4_extent newex, *ex, *last_ex;
+ struct ext4_extent newex, *ex;
ext4_fsblk_t newblock;
- int i, err = 0, depth, ret, cache_type;
+ int err = 0, depth, ret, cache_type;
unsigned int allocated = 0;
struct ext4_allocation_request ar;
ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
@@ -3341,7 +3300,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
/* block is already allocated */
newblock = map->m_lblk
- le32_to_cpu(newex.ee_block)
- + ext_pblock(&newex);
+ + ext4_ext_pblock(&newex);
/* number of remaining blocks in the extent */
allocated = ext4_ext_get_actual_len(&newex) -
(map->m_lblk - le32_to_cpu(newex.ee_block));
@@ -3379,7 +3338,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
ex = path[depth].p_ext;
if (ex) {
ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
- ext4_fsblk_t ee_start = ext_pblock(ex);
+ ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
unsigned short ee_len;
/*
@@ -3488,7 +3447,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
*/
if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
if (io)
- io->flag = EXT4_IO_UNWRITTEN;
+ io->flag = EXT4_IO_END_UNWRITTEN;
else
ext4_set_inode_state(inode,
EXT4_STATE_DIO_UNWRITTEN);
@@ -3497,44 +3456,23 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
map->m_flags |= EXT4_MAP_UNINIT;
}
- if (unlikely(ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) {
- if (unlikely(!eh->eh_entries)) {
- EXT4_ERROR_INODE(inode,
- "eh->eh_entries == 0 and "
- "EOFBLOCKS_FL set");
- err = -EIO;
- goto out2;
- }
- last_ex = EXT_LAST_EXTENT(eh);
- /*
- * If the current leaf block was reached by looking at
- * the last index block all the way down the tree, and
- * we are extending the inode beyond the last extent
- * in the current leaf block, then clear the
- * EOFBLOCKS_FL flag.
- */
- for (i = depth-1; i >= 0; i--) {
- if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
- break;
- }
- if ((i < 0) &&
- (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) +
- ext4_ext_get_actual_len(last_ex)))
- ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
- }
+ err = check_eofblocks_fl(handle, inode, map, path, ar.len);
+ if (err)
+ goto out2;
+
err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
if (err) {
/* free data blocks we just allocated */
/* not a good idea to call discard here directly,
* but otherwise we'd need to call it every free() */
ext4_discard_preallocations(inode);
- ext4_free_blocks(handle, inode, 0, ext_pblock(&newex),
+ ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex),
ext4_ext_get_actual_len(&newex), 0);
goto out2;
}
/* previous routine could use block we allocated */
- newblock = ext_pblock(&newex);
+ newblock = ext4_ext_pblock(&newex);
allocated = ext4_ext_get_actual_len(&newex);
if (allocated > map->m_len)
allocated = map->m_len;
@@ -3729,7 +3667,7 @@ retry:
printk(KERN_ERR "%s: ext4_ext_map_blocks "
"returned error inode#%lu, block=%u, "
"max_blocks=%u", __func__,
- inode->i_ino, block, max_blocks);
+ inode->i_ino, map.m_lblk, max_blocks);
#endif
ext4_mark_inode_dirty(handle, inode);
ret2 = ext4_journal_stop(handle);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ee92b66d4558..5a5c55ddceef 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -130,8 +130,50 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
return dquot_file_open(inode, filp);
}
+/*
+ * ext4_llseek() copied from generic_file_llseek() to handle both
+ * block-mapped and extent-mapped maxbytes values. This should
+ * otherwise be identical with generic_file_llseek().
+ */
+loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
+{
+ struct inode *inode = file->f_mapping->host;
+ loff_t maxbytes;
+
+ if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
+ maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
+ else
+ maxbytes = inode->i_sb->s_maxbytes;
+ mutex_lock(&inode->i_mutex);
+ switch (origin) {
+ case SEEK_END:
+ offset += inode->i_size;
+ break;
+ case SEEK_CUR:
+ if (offset == 0) {
+ mutex_unlock(&inode->i_mutex);
+ return file->f_pos;
+ }
+ offset += file->f_pos;
+ break;
+ }
+
+ if (offset < 0 || offset > maxbytes) {
+ mutex_unlock(&inode->i_mutex);
+ return -EINVAL;
+ }
+
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ file->f_version = 0;
+ }
+ mutex_unlock(&inode->i_mutex);
+
+ return offset;
+}
+
const struct file_operations ext4_file_operations = {
- .llseek = generic_file_llseek,
+ .llseek = ext4_llseek,
.read = do_sync_read,
.write = do_sync_write,
.aio_read = generic_file_aio_read,
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 3f3ff5ee8f9d..c1a7bc923cf6 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -34,6 +34,89 @@
#include <trace/events/ext4.h>
+static void dump_completed_IO(struct inode * inode)
+{
+#ifdef EXT4_DEBUG
+ struct list_head *cur, *before, *after;
+ ext4_io_end_t *io, *io0, *io1;
+ unsigned long flags;
+
+ if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
+ ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
+ return;
+ }
+
+ ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
+ spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
+ list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
+ cur = &io->list;
+ before = cur->prev;
+ io0 = container_of(before, ext4_io_end_t, list);
+ after = cur->next;
+ io1 = container_of(after, ext4_io_end_t, list);
+
+ ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
+ io, inode->i_ino, io0, io1);
+ }
+ spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
+#endif
+}
+
+/*
+ * This function is called from ext4_sync_file().
+ *
+ * When IO is completed, the work to convert unwritten extents to
+ * written is queued on workqueue but may not get immediately
+ * scheduled. When fsync is called, we need to ensure the
+ * conversion is complete before fsync returns.
+ * The inode keeps track of a list of pending/completed IO that
+ * might needs to do the conversion. This function walks through
+ * the list and convert the related unwritten extents for completed IO
+ * to written.
+ * The function return the number of pending IOs on success.
+ */
+static int flush_completed_IO(struct inode *inode)
+{
+ ext4_io_end_t *io;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ unsigned long flags;
+ int ret = 0;
+ int ret2 = 0;
+
+ if (list_empty(&ei->i_completed_io_list))
+ return ret;
+
+ dump_completed_IO(inode);
+ spin_lock_irqsave(&ei->i_completed_io_lock, flags);
+ while (!list_empty(&ei->i_completed_io_list)){
+ io = list_entry(ei->i_completed_io_list.next,
+ ext4_io_end_t, list);
+ /*
+ * Calling ext4_end_io_nolock() to convert completed
+ * IO to written.
+ *
+ * When ext4_sync_file() is called, run_queue() may already
+ * about to flush the work corresponding to this io structure.
+ * It will be upset if it founds the io structure related
+ * to the work-to-be schedule is freed.
+ *
+ * Thus we need to keep the io structure still valid here after
+ * convertion finished. The io structure has a flag to
+ * avoid double converting from both fsync and background work
+ * queue work.
+ */
+ spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+ ret = ext4_end_io_nolock(io);
+ spin_lock_irqsave(&ei->i_completed_io_lock, flags);
+ if (ret < 0)
+ ret2 = ret;
+ else
+ list_del_init(&io->list);
+ }
+ spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+ return (ret2 < 0) ? ret2 : 0;
+}
+
/*
* If we're not journaling and this is a just-created file, we have to
* sync our parent directory (if it was freshly created) since
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 45853e0d1f21..1ce240a23ebb 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -50,7 +50,7 @@
* need to use it within a single byte (to ensure we get endianness right).
* We can use memset for the rest of the bitmap as there are no other users.
*/
-void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
+void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
{
int i;
@@ -65,9 +65,10 @@ void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
}
/* Initializes an uninitialized inode bitmap */
-unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
- ext4_group_t block_group,
- struct ext4_group_desc *gdp)
+static unsigned ext4_init_inode_bitmap(struct super_block *sb,
+ struct buffer_head *bh,
+ ext4_group_t block_group,
+ struct ext4_group_desc *gdp)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -85,7 +86,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
}
memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
- mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
+ ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
bh->b_data);
return EXT4_INODES_PER_GROUP(sb);
@@ -107,6 +108,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
desc = ext4_get_group_desc(sb, block_group, NULL);
if (!desc)
return NULL;
+
bitmap_blk = ext4_inode_bitmap(sb, desc);
bh = sb_getblk(sb, bitmap_blk);
if (unlikely(!bh)) {
@@ -123,6 +125,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
unlock_buffer(bh);
return bh;
}
+
ext4_lock_group(sb, block_group);
if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
ext4_init_inode_bitmap(sb, bh, block_group, desc);
@@ -133,6 +136,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
return bh;
}
ext4_unlock_group(sb, block_group);
+
if (buffer_uptodate(bh)) {
/*
* if not uninit if bh is uptodate,
@@ -411,8 +415,8 @@ struct orlov_stats {
* for a particular block group or flex_bg. If flex_size is 1, then g
* is a block group number; otherwise it is flex_bg number.
*/
-void get_orlov_stats(struct super_block *sb, ext4_group_t g,
- int flex_size, struct orlov_stats *stats)
+static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
+ int flex_size, struct orlov_stats *stats)
{
struct ext4_group_desc *desc;
struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups;
@@ -712,8 +716,17 @@ static int ext4_claim_inode(struct super_block *sb,
{
int free = 0, retval = 0, count;
struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_group_info *grp = ext4_get_group_info(sb, group);
struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
+ /*
+ * We have to be sure that new inode allocation does not race with
+ * inode table initialization, because otherwise we may end up
+ * allocating and writing new inode right before sb_issue_zeroout
+ * takes place and overwriting our new inode with zeroes. So we
+ * take alloc_sem to prevent it.
+ */
+ down_read(&grp->alloc_sem);
ext4_lock_group(sb, group);
if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
/* not a free inode */
@@ -724,6 +737,7 @@ static int ext4_claim_inode(struct super_block *sb,
if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
ino > EXT4_INODES_PER_GROUP(sb)) {
ext4_unlock_group(sb, group);
+ up_read(&grp->alloc_sem);
ext4_error(sb, "reserved inode or inode > inodes count - "
"block_group = %u, inode=%lu", group,
ino + group * EXT4_INODES_PER_GROUP(sb));
@@ -772,6 +786,7 @@ static int ext4_claim_inode(struct super_block *sb,
gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
err_ret:
ext4_unlock_group(sb, group);
+ up_read(&grp->alloc_sem);
return retval;
}
@@ -1205,3 +1220,109 @@ unsigned long ext4_count_dirs(struct super_block * sb)
}
return count;
}
+
+/*
+ * Zeroes not yet zeroed inode table - just write zeroes through the whole
+ * inode table. Must be called without any spinlock held. The only place
+ * where it is called from on active part of filesystem is ext4lazyinit
+ * thread, so we do not need any special locks, however we have to prevent
+ * inode allocation from the current group, so we take alloc_sem lock, to
+ * block ext4_claim_inode until we are finished.
+ */
+extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
+ int barrier)
+{
+ struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_group_desc *gdp = NULL;
+ struct buffer_head *group_desc_bh;
+ handle_t *handle;
+ ext4_fsblk_t blk;
+ int num, ret = 0, used_blks = 0;
+
+ /* This should not happen, but just to be sure check this */
+ if (sb->s_flags & MS_RDONLY) {
+ ret = 1;
+ goto out;
+ }
+
+ gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
+ if (!gdp)
+ goto out;
+
+ /*
+ * We do not need to lock this, because we are the only one
+ * handling this flag.
+ */
+ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
+ goto out;
+
+ handle = ext4_journal_start_sb(sb, 1);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ goto out;
+ }
+
+ down_write(&grp->alloc_sem);
+ /*
+ * If inode bitmap was already initialized there may be some
+ * used inodes so we need to skip blocks with used inodes in
+ * inode table.
+ */
+ if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
+ used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) -
+ ext4_itable_unused_count(sb, gdp)),
+ sbi->s_inodes_per_block);
+
+ if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
+ ext4_error(sb, "Something is wrong with group %u\n"
+ "Used itable blocks: %d"
+ "itable unused count: %u\n",
+ group, used_blks,
+ ext4_itable_unused_count(sb, gdp));
+ ret = 1;
+ goto out;
+ }
+
+ blk = ext4_inode_table(sb, gdp) + used_blks;
+ num = sbi->s_itb_per_group - used_blks;
+
+ BUFFER_TRACE(group_desc_bh, "get_write_access");
+ ret = ext4_journal_get_write_access(handle,
+ group_desc_bh);
+ if (ret)
+ goto err_out;
+
+ /*
+ * Skip zeroout if the inode table is full. But we set the ZEROED
+ * flag anyway, because obviously, when it is full it does not need
+ * further zeroing.
+ */
+ if (unlikely(num == 0))
+ goto skip_zeroout;
+
+ ext4_debug("going to zero out inode table in group %d\n",
+ group);
+ ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS);
+ if (ret < 0)
+ goto err_out;
+ if (barrier)
+ blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL);
+
+skip_zeroout:
+ ext4_lock_group(sb, group);
+ gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED);
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
+ ext4_unlock_group(sb, group);
+
+ BUFFER_TRACE(group_desc_bh,
+ "call ext4_handle_dirty_metadata");
+ ret = ext4_handle_dirty_metadata(handle, NULL,
+ group_desc_bh);
+
+err_out:
+ up_write(&grp->alloc_sem);
+ ext4_journal_stop(handle);
+out:
+ return ret;
+}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 49635ef236f8..bdbe69902207 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -53,6 +53,7 @@
static inline int ext4_begin_ordered_truncate(struct inode *inode,
loff_t new_size)
{
+ trace_ext4_begin_ordered_truncate(inode, new_size);
return jbd2_journal_begin_ordered_truncate(
EXT4_SB(inode->i_sb)->s_journal,
&EXT4_I(inode)->jinode,
@@ -60,6 +61,12 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
}
static void ext4_invalidatepage(struct page *page, unsigned long offset);
+static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create);
+static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
+static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
+static int __ext4_journalled_writepage(struct page *page, unsigned int len);
+static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
/*
* Test whether an inode is a fast symlink.
@@ -172,6 +179,7 @@ void ext4_evict_inode(struct inode *inode)
handle_t *handle;
int err;
+ trace_ext4_evict_inode(inode);
if (inode->i_nlink) {
truncate_inode_pages(&inode->i_data, 0);
goto no_delete;
@@ -755,6 +763,11 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
* parent to disk.
*/
bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
+ if (unlikely(!bh)) {
+ err = -EIO;
+ goto failed;
+ }
+
branch[n].bh = bh;
lock_buffer(bh);
BUFFER_TRACE(bh, "call get_create_access");
@@ -1207,8 +1220,10 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
break;
idx++;
num++;
- if (num >= max_pages)
+ if (num >= max_pages) {
+ done = 1;
break;
+ }
}
pagevec_release(&pvec);
}
@@ -1995,16 +2010,23 @@ static void ext4_da_page_release_reservation(struct page *page,
*
* As pages are already locked by write_cache_pages(), we can't use it
*/
-static int mpage_da_submit_io(struct mpage_da_data *mpd)
+static int mpage_da_submit_io(struct mpage_da_data *mpd,
+ struct ext4_map_blocks *map)
{
- long pages_skipped;
struct pagevec pvec;
unsigned long index, end;
int ret = 0, err, nr_pages, i;
struct inode *inode = mpd->inode;
struct address_space *mapping = inode->i_mapping;
+ loff_t size = i_size_read(inode);
+ unsigned int len, block_start;
+ struct buffer_head *bh, *page_bufs = NULL;
+ int journal_data = ext4_should_journal_data(inode);
+ sector_t pblock = 0, cur_logical = 0;
+ struct ext4_io_submit io_submit;
BUG_ON(mpd->next_page <= mpd->first_page);
+ memset(&io_submit, 0, sizeof(io_submit));
/*
* We need to start from the first_page to the next_page - 1
* to make sure we also write the mapped dirty buffer_heads.
@@ -2020,122 +2042,108 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
+ int commit_write = 0, redirty_page = 0;
struct page *page = pvec.pages[i];
index = page->index;
if (index > end)
break;
+
+ if (index == size >> PAGE_CACHE_SHIFT)
+ len = size & ~PAGE_CACHE_MASK;
+ else
+ len = PAGE_CACHE_SIZE;
+ if (map) {
+ cur_logical = index << (PAGE_CACHE_SHIFT -
+ inode->i_blkbits);
+ pblock = map->m_pblk + (cur_logical -
+ map->m_lblk);
+ }
index++;
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
- pages_skipped = mpd->wbc->pages_skipped;
- err = mapping->a_ops->writepage(page, mpd->wbc);
- if (!err && (pages_skipped == mpd->wbc->pages_skipped))
- /*
- * have successfully written the page
- * without skipping the same
- */
- mpd->pages_written++;
/*
- * In error case, we have to continue because
- * remaining pages are still locked
- * XXX: unlock and re-dirty them?
+ * If the page does not have buffers (for
+ * whatever reason), try to create them using
+ * __block_write_begin. If this fails,
+ * redirty the page and move on.
*/
- if (ret == 0)
- ret = err;
- }
- pagevec_release(&pvec);
- }
- return ret;
-}
-
-/*
- * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers
- *
- * the function goes through all passed space and put actual disk
- * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
- */
-static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd,
- struct ext4_map_blocks *map)
-{
- struct inode *inode = mpd->inode;
- struct address_space *mapping = inode->i_mapping;
- int blocks = map->m_len;
- sector_t pblock = map->m_pblk, cur_logical;
- struct buffer_head *head, *bh;
- pgoff_t index, end;
- struct pagevec pvec;
- int nr_pages, i;
-
- index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
- end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
- cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
-
- pagevec_init(&pvec, 0);
-
- while (index <= end) {
- /* XXX: optimize tail */
- nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
- if (nr_pages == 0)
- break;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
-
- index = page->index;
- if (index > end)
- break;
- index++;
-
- BUG_ON(!PageLocked(page));
- BUG_ON(PageWriteback(page));
- BUG_ON(!page_has_buffers(page));
-
- bh = page_buffers(page);
- head = bh;
-
- /* skip blocks out of the range */
- do {
- if (cur_logical >= map->m_lblk)
- break;
- cur_logical++;
- } while ((bh = bh->b_this_page) != head);
+ if (!page_has_buffers(page)) {
+ if (__block_write_begin(page, 0, len,
+ noalloc_get_block_write)) {
+ redirty_page:
+ redirty_page_for_writepage(mpd->wbc,
+ page);
+ unlock_page(page);
+ continue;
+ }
+ commit_write = 1;
+ }
+ bh = page_bufs = page_buffers(page);
+ block_start = 0;
do {
- if (cur_logical >= map->m_lblk + blocks)
- break;
-
- if (buffer_delay(bh) || buffer_unwritten(bh)) {
-
- BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
-
+ if (!bh)
+ goto redirty_page;
+ if (map && (cur_logical >= map->m_lblk) &&
+ (cur_logical <= (map->m_lblk +
+ (map->m_len - 1)))) {
if (buffer_delay(bh)) {
clear_buffer_delay(bh);
bh->b_blocknr = pblock;
- } else {
- /*
- * unwritten already should have
- * blocknr assigned. Verify that
- */
- clear_buffer_unwritten(bh);
- BUG_ON(bh->b_blocknr != pblock);
}
+ if (buffer_unwritten(bh) ||
+ buffer_mapped(bh))
+ BUG_ON(bh->b_blocknr != pblock);
+ if (map->m_flags & EXT4_MAP_UNINIT)
+ set_buffer_uninit(bh);
+ clear_buffer_unwritten(bh);
+ }
- } else if (buffer_mapped(bh))
- BUG_ON(bh->b_blocknr != pblock);
-
- if (map->m_flags & EXT4_MAP_UNINIT)
- set_buffer_uninit(bh);
+ /* redirty page if block allocation undone */
+ if (buffer_delay(bh) || buffer_unwritten(bh))
+ redirty_page = 1;
+ bh = bh->b_this_page;
+ block_start += bh->b_size;
cur_logical++;
pblock++;
- } while ((bh = bh->b_this_page) != head);
+ } while (bh != page_bufs);
+
+ if (redirty_page)
+ goto redirty_page;
+
+ if (commit_write)
+ /* mark the buffer_heads as dirty & uptodate */
+ block_commit_write(page, 0, len);
+
+ /*
+ * Delalloc doesn't support data journalling,
+ * but eventually maybe we'll lift this
+ * restriction.
+ */
+ if (unlikely(journal_data && PageChecked(page)))
+ err = __ext4_journalled_writepage(page, len);
+ else
+ err = ext4_bio_write_page(&io_submit, page,
+ len, mpd->wbc);
+
+ if (!err)
+ mpd->pages_written++;
+ /*
+ * In error case, we have to continue because
+ * remaining pages are still locked
+ */
+ if (ret == 0)
+ ret = err;
}
pagevec_release(&pvec);
}
+ ext4_io_submit(&io_submit);
+ return ret;
}
-
static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
sector_t logical, long blk_cnt)
{
@@ -2187,35 +2195,32 @@ static void ext4_print_free_blocks(struct inode *inode)
}
/*
- * mpage_da_map_blocks - go through given space
+ * mpage_da_map_and_submit - go through given space, map them
+ * if necessary, and then submit them for I/O
*
* @mpd - bh describing space
*
* The function skips space we know is already mapped to disk blocks.
*
*/
-static int mpage_da_map_blocks(struct mpage_da_data *mpd)
+static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
{
int err, blks, get_blocks_flags;
- struct ext4_map_blocks map;
+ struct ext4_map_blocks map, *mapp = NULL;
sector_t next = mpd->b_blocknr;
unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
handle_t *handle = NULL;
/*
- * We consider only non-mapped and non-allocated blocks
+ * If the blocks are mapped already, or we couldn't accumulate
+ * any blocks, then proceed immediately to the submission stage.
*/
- if ((mpd->b_state & (1 << BH_Mapped)) &&
- !(mpd->b_state & (1 << BH_Delay)) &&
- !(mpd->b_state & (1 << BH_Unwritten)))
- return 0;
-
- /*
- * If we didn't accumulate anything to write simply return
- */
- if (!mpd->b_size)
- return 0;
+ if ((mpd->b_size == 0) ||
+ ((mpd->b_state & (1 << BH_Mapped)) &&
+ !(mpd->b_state & (1 << BH_Delay)) &&
+ !(mpd->b_state & (1 << BH_Unwritten))))
+ goto submit_io;
handle = ext4_journal_current_handle();
BUG_ON(!handle);
@@ -2252,17 +2257,18 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
err = blks;
/*
- * If get block returns with error we simply
- * return. Later writepage will redirty the page and
- * writepages will find the dirty page again
+ * If get block returns EAGAIN or ENOSPC and there
+ * appears to be free blocks we will call
+ * ext4_writepage() for all of the pages which will
+ * just redirty the pages.
*/
if (err == -EAGAIN)
- return 0;
+ goto submit_io;
if (err == -ENOSPC &&
ext4_count_free_blocks(sb)) {
mpd->retval = err;
- return 0;
+ goto submit_io;
}
/*
@@ -2287,10 +2293,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
/* invalidate all the pages */
ext4_da_block_invalidatepages(mpd, next,
mpd->b_size >> mpd->inode->i_blkbits);
- return err;
+ return;
}
BUG_ON(blks == 0);
+ mapp = &map;
if (map.m_flags & EXT4_MAP_NEW) {
struct block_device *bdev = mpd->inode->i_sb->s_bdev;
int i;
@@ -2299,18 +2306,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
unmap_underlying_metadata(bdev, map.m_pblk + i);
}
- /*
- * If blocks are delayed marked, we need to
- * put actual blocknr and drop delayed bit
- */
- if ((mpd->b_state & (1 << BH_Delay)) ||
- (mpd->b_state & (1 << BH_Unwritten)))
- mpage_put_bnr_to_bhs(mpd, &map);
-
if (ext4_should_order_data(mpd->inode)) {
err = ext4_jbd2_file_inode(handle, mpd->inode);
if (err)
- return err;
+ /* This only happens if the journal is aborted */
+ return;
}
/*
@@ -2321,10 +2321,16 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
disksize = i_size_read(mpd->inode);
if (disksize > EXT4_I(mpd->inode)->i_disksize) {
ext4_update_i_disksize(mpd->inode, disksize);
- return ext4_mark_inode_dirty(handle, mpd->inode);
+ err = ext4_mark_inode_dirty(handle, mpd->inode);
+ if (err)
+ ext4_error(mpd->inode->i_sb,
+ "Failed to mark inode %lu dirty",
+ mpd->inode->i_ino);
}
- return 0;
+submit_io:
+ mpage_da_submit_io(mpd, mapp);
+ mpd->io_done = 1;
}
#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
@@ -2401,9 +2407,7 @@ flush_it:
* We couldn't merge the block to our extent, so we
* need to flush current extent and start new one
*/
- if (mpage_da_map_blocks(mpd) == 0)
- mpage_da_submit_io(mpd);
- mpd->io_done = 1;
+ mpage_da_map_and_submit(mpd);
return;
}
@@ -2422,9 +2426,9 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
* The function finds extents of pages and scan them for all blocks.
*/
static int __mpage_da_writepage(struct page *page,
- struct writeback_control *wbc, void *data)
+ struct writeback_control *wbc,
+ struct mpage_da_data *mpd)
{
- struct mpage_da_data *mpd = data;
struct inode *inode = mpd->inode;
struct buffer_head *bh, *head;
sector_t logical;
@@ -2435,15 +2439,13 @@ static int __mpage_da_writepage(struct page *page,
if (mpd->next_page != page->index) {
/*
* Nope, we can't. So, we map non-allocated blocks
- * and start IO on them using writepage()
+ * and start IO on them
*/
if (mpd->next_page != mpd->first_page) {
- if (mpage_da_map_blocks(mpd) == 0)
- mpage_da_submit_io(mpd);
+ mpage_da_map_and_submit(mpd);
/*
* skip rest of the page in the page_vec
*/
- mpd->io_done = 1;
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return MPAGE_DA_EXTENT_TAIL;
@@ -2622,6 +2624,7 @@ static int __ext4_journalled_writepage(struct page *page,
int ret = 0;
int err;
+ ClearPageChecked(page);
page_bufs = page_buffers(page);
BUG_ON(!page_bufs);
walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one);
@@ -2699,7 +2702,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
static int ext4_writepage(struct page *page,
struct writeback_control *wbc)
{
- int ret = 0;
+ int ret = 0, commit_write = 0;
loff_t size;
unsigned int len;
struct buffer_head *page_bufs = NULL;
@@ -2712,71 +2715,44 @@ static int ext4_writepage(struct page *page,
else
len = PAGE_CACHE_SIZE;
- if (page_has_buffers(page)) {
- page_bufs = page_buffers(page);
- if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
- ext4_bh_delay_or_unwritten)) {
- /*
- * We don't want to do block allocation
- * So redirty the page and return
- * We may reach here when we do a journal commit
- * via journal_submit_inode_data_buffers.
- * If we don't have mapping block we just ignore
- * them. We can also reach here via shrink_page_list
- */
+ /*
+ * If the page does not have buffers (for whatever reason),
+ * try to create them using __block_write_begin. If this
+ * fails, redirty the page and move on.
+ */
+ if (!page_has_buffers(page)) {
+ if (__block_write_begin(page, 0, len,
+ noalloc_get_block_write)) {
+ redirty_page:
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return 0;
}
- } else {
+ commit_write = 1;
+ }
+ page_bufs = page_buffers(page);
+ if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
+ ext4_bh_delay_or_unwritten)) {
/*
- * The test for page_has_buffers() is subtle:
- * We know the page is dirty but it lost buffers. That means
- * that at some moment in time after write_begin()/write_end()
- * has been called all buffers have been clean and thus they
- * must have been written at least once. So they are all
- * mapped and we can happily proceed with mapping them
- * and writing the page.
- *
- * Try to initialize the buffer_heads and check whether
- * all are mapped and non delay. We don't want to
- * do block allocation here.
+ * We don't want to do block allocation, so redirty
+ * the page and return. We may reach here when we do
+ * a journal commit via journal_submit_inode_data_buffers.
+ * We can also reach here via shrink_page_list
*/
- ret = __block_write_begin(page, 0, len,
- noalloc_get_block_write);
- if (!ret) {
- page_bufs = page_buffers(page);
- /* check whether all are mapped and non delay */
- if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
- ext4_bh_delay_or_unwritten)) {
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return 0;
- }
- } else {
- /*
- * We can't do block allocation here
- * so just redity the page and unlock
- * and return
- */
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return 0;
- }
+ goto redirty_page;
+ }
+ if (commit_write)
/* now mark the buffer_heads as dirty and uptodate */
block_commit_write(page, 0, len);
- }
- if (PageChecked(page) && ext4_should_journal_data(inode)) {
+ if (PageChecked(page) && ext4_should_journal_data(inode))
/*
* It's mmapped pagecache. Add buffers and journal it. There
* doesn't seem much point in redirtying the page here.
*/
- ClearPageChecked(page);
return __ext4_journalled_writepage(page, len);
- }
- if (page_bufs && buffer_uninit(page_bufs)) {
+ if (buffer_uninit(page_bufs)) {
ext4_set_bh_endio(page_bufs, inode);
ret = block_write_full_page_endio(page, noalloc_get_block_write,
wbc, ext4_end_io_buffer_write);
@@ -2823,25 +2799,32 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
*/
static int write_cache_pages_da(struct address_space *mapping,
struct writeback_control *wbc,
- struct mpage_da_data *mpd)
+ struct mpage_da_data *mpd,
+ pgoff_t *done_index)
{
int ret = 0;
int done = 0;
struct pagevec pvec;
- int nr_pages;
+ unsigned nr_pages;
pgoff_t index;
pgoff_t end; /* Inclusive */
long nr_to_write = wbc->nr_to_write;
+ int tag;
pagevec_init(&pvec, 0);
index = wbc->range_start >> PAGE_CACHE_SHIFT;
end = wbc->range_end >> PAGE_CACHE_SHIFT;
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ tag = PAGECACHE_TAG_TOWRITE;
+ else
+ tag = PAGECACHE_TAG_DIRTY;
+
+ *done_index = index;
while (!done && (index <= end)) {
int i;
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
- PAGECACHE_TAG_DIRTY,
+ nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
if (nr_pages == 0)
break;
@@ -2861,6 +2844,8 @@ static int write_cache_pages_da(struct address_space *mapping,
break;
}
+ *done_index = page->index + 1;
+
lock_page(page);
/*
@@ -2946,6 +2931,8 @@ static int ext4_da_writepages(struct address_space *mapping,
long desired_nr_to_write, nr_to_writebump = 0;
loff_t range_start = wbc->range_start;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
+ pgoff_t done_index = 0;
+ pgoff_t end;
trace_ext4_da_writepages(inode, wbc);
@@ -2981,8 +2968,11 @@ static int ext4_da_writepages(struct address_space *mapping,
wbc->range_start = index << PAGE_CACHE_SHIFT;
wbc->range_end = LLONG_MAX;
wbc->range_cyclic = 0;
- } else
+ end = -1;
+ } else {
index = wbc->range_start >> PAGE_CACHE_SHIFT;
+ end = wbc->range_end >> PAGE_CACHE_SHIFT;
+ }
/*
* This works around two forms of stupidity. The first is in
@@ -3001,9 +2991,12 @@ static int ext4_da_writepages(struct address_space *mapping,
* sbi->max_writeback_mb_bump whichever is smaller.
*/
max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT);
- if (!range_cyclic && range_whole)
- desired_nr_to_write = wbc->nr_to_write * 8;
- else
+ if (!range_cyclic && range_whole) {
+ if (wbc->nr_to_write == LONG_MAX)
+ desired_nr_to_write = wbc->nr_to_write;
+ else
+ desired_nr_to_write = wbc->nr_to_write * 8;
+ } else
desired_nr_to_write = ext4_num_dirty_pages(inode, index,
max_pages);
if (desired_nr_to_write > max_pages)
@@ -3020,6 +3013,9 @@ static int ext4_da_writepages(struct address_space *mapping,
pages_skipped = wbc->pages_skipped;
retry:
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ tag_pages_for_writeback(mapping, index, end);
+
while (!ret && wbc->nr_to_write > 0) {
/*
@@ -3058,16 +3054,14 @@ retry:
mpd.io_done = 0;
mpd.pages_written = 0;
mpd.retval = 0;
- ret = write_cache_pages_da(mapping, wbc, &mpd);
+ ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index);
/*
* If we have a contiguous extent of pages and we
* haven't done the I/O yet, map the blocks and submit
* them for I/O.
*/
if (!mpd.io_done && mpd.next_page != mpd.first_page) {
- if (mpage_da_map_blocks(&mpd) == 0)
- mpage_da_submit_io(&mpd);
- mpd.io_done = 1;
+ mpage_da_map_and_submit(&mpd);
ret = MPAGE_DA_EXTENT_TAIL;
}
trace_ext4_da_write_pages(inode, &mpd);
@@ -3114,14 +3108,13 @@ retry:
__func__, wbc->nr_to_write, ret);
/* Update index */
- index += pages_written;
wbc->range_cyclic = range_cyclic;
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
/*
* set the writeback_index so that range_cyclic
* mode will write it back later
*/
- mapping->writeback_index = index;
+ mapping->writeback_index = done_index;
out_writepages:
wbc->nr_to_write -= nr_to_writebump;
@@ -3456,15 +3449,6 @@ ext4_readpages(struct file *file, struct address_space *mapping,
return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
}
-static void ext4_free_io_end(ext4_io_end_t *io)
-{
- BUG_ON(!io);
- if (io->page)
- put_page(io->page);
- iput(io->inode);
- kfree(io);
-}
-
static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
{
struct buffer_head *head, *bh;
@@ -3641,173 +3625,6 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock,
EXT4_GET_BLOCKS_IO_CREATE_EXT);
}
-static void dump_completed_IO(struct inode * inode)
-{
-#ifdef EXT4_DEBUG
- struct list_head *cur, *before, *after;
- ext4_io_end_t *io, *io0, *io1;
- unsigned long flags;
-
- if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
- ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
- return;
- }
-
- ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
- spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
- list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
- cur = &io->list;
- before = cur->prev;
- io0 = container_of(before, ext4_io_end_t, list);
- after = cur->next;
- io1 = container_of(after, ext4_io_end_t, list);
-
- ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
- io, inode->i_ino, io0, io1);
- }
- spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
-#endif
-}
-
-/*
- * check a range of space and convert unwritten extents to written.
- */
-static int ext4_end_io_nolock(ext4_io_end_t *io)
-{
- struct inode *inode = io->inode;
- loff_t offset = io->offset;
- ssize_t size = io->size;
- int ret = 0;
-
- ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
- "list->prev 0x%p\n",
- io, inode->i_ino, io->list.next, io->list.prev);
-
- if (list_empty(&io->list))
- return ret;
-
- if (io->flag != EXT4_IO_UNWRITTEN)
- return ret;
-
- ret = ext4_convert_unwritten_extents(inode, offset, size);
- if (ret < 0) {
- printk(KERN_EMERG "%s: failed to convert unwritten"
- "extents to written extents, error is %d"
- " io is still on inode %lu aio dio list\n",
- __func__, ret, inode->i_ino);
- return ret;
- }
-
- if (io->iocb)
- aio_complete(io->iocb, io->result, 0);
- /* clear the DIO AIO unwritten flag */
- io->flag = 0;
- return ret;
-}
-
-/*
- * work on completed aio dio IO, to convert unwritten extents to extents
- */
-static void ext4_end_io_work(struct work_struct *work)
-{
- ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
- struct inode *inode = io->inode;
- struct ext4_inode_info *ei = EXT4_I(inode);
- unsigned long flags;
- int ret;
-
- mutex_lock(&inode->i_mutex);
- ret = ext4_end_io_nolock(io);
- if (ret < 0) {
- mutex_unlock(&inode->i_mutex);
- return;
- }
-
- spin_lock_irqsave(&ei->i_completed_io_lock, flags);
- if (!list_empty(&io->list))
- list_del_init(&io->list);
- spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
- mutex_unlock(&inode->i_mutex);
- ext4_free_io_end(io);
-}
-
-/*
- * This function is called from ext4_sync_file().
- *
- * When IO is completed, the work to convert unwritten extents to
- * written is queued on workqueue but may not get immediately
- * scheduled. When fsync is called, we need to ensure the
- * conversion is complete before fsync returns.
- * The inode keeps track of a list of pending/completed IO that
- * might needs to do the conversion. This function walks through
- * the list and convert the related unwritten extents for completed IO
- * to written.
- * The function return the number of pending IOs on success.
- */
-int flush_completed_IO(struct inode *inode)
-{
- ext4_io_end_t *io;
- struct ext4_inode_info *ei = EXT4_I(inode);
- unsigned long flags;
- int ret = 0;
- int ret2 = 0;
-
- if (list_empty(&ei->i_completed_io_list))
- return ret;
-
- dump_completed_IO(inode);
- spin_lock_irqsave(&ei->i_completed_io_lock, flags);
- while (!list_empty(&ei->i_completed_io_list)){
- io = list_entry(ei->i_completed_io_list.next,
- ext4_io_end_t, list);
- /*
- * Calling ext4_end_io_nolock() to convert completed
- * IO to written.
- *
- * When ext4_sync_file() is called, run_queue() may already
- * about to flush the work corresponding to this io structure.
- * It will be upset if it founds the io structure related
- * to the work-to-be schedule is freed.
- *
- * Thus we need to keep the io structure still valid here after
- * convertion finished. The io structure has a flag to
- * avoid double converting from both fsync and background work
- * queue work.
- */
- spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
- ret = ext4_end_io_nolock(io);
- spin_lock_irqsave(&ei->i_completed_io_lock, flags);
- if (ret < 0)
- ret2 = ret;
- else
- list_del_init(&io->list);
- }
- spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
- return (ret2 < 0) ? ret2 : 0;
-}
-
-static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
-{
- ext4_io_end_t *io = NULL;
-
- io = kmalloc(sizeof(*io), flags);
-
- if (io) {
- igrab(inode);
- io->inode = inode;
- io->flag = 0;
- io->offset = 0;
- io->size = 0;
- io->page = NULL;
- io->iocb = NULL;
- io->result = 0;
- INIT_WORK(&io->work, ext4_end_io_work);
- INIT_LIST_HEAD(&io->list);
- }
-
- return io;
-}
-
static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
ssize_t size, void *private, int ret,
bool is_async)
@@ -3827,7 +3644,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
size);
/* if not aio dio with unwritten extents, just free io and return */
- if (io_end->flag != EXT4_IO_UNWRITTEN){
+ if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
ext4_free_io_end(io_end);
iocb->private = NULL;
out:
@@ -3844,14 +3661,14 @@ out:
}
wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
- /* queue the work to convert unwritten extents to written */
- queue_work(wq, &io_end->work);
-
/* Add the io_end to per-inode completed aio dio list*/
ei = EXT4_I(io_end->inode);
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
list_add_tail(&io_end->list, &ei->i_completed_io_list);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+
+ /* queue the work to convert unwritten extents to written */
+ queue_work(wq, &io_end->work);
iocb->private = NULL;
}
@@ -3872,7 +3689,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
goto out;
}
- io_end->flag = EXT4_IO_UNWRITTEN;
+ io_end->flag = EXT4_IO_END_UNWRITTEN;
inode = io_end->inode;
/* Add the io_end to per-inode completed io list*/
@@ -5463,6 +5280,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = dentry->d_inode;
int error, rc = 0;
+ int orphan = 0;
const unsigned int ia_valid = attr->ia_valid;
error = inode_change_ok(inode, attr);
@@ -5518,8 +5336,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
error = PTR_ERR(handle);
goto err_out;
}
-
- error = ext4_orphan_add(handle, inode);
+ if (ext4_handle_valid(handle)) {
+ error = ext4_orphan_add(handle, inode);
+ orphan = 1;
+ }
EXT4_I(inode)->i_disksize = attr->ia_size;
rc = ext4_mark_inode_dirty(handle, inode);
if (!error)
@@ -5537,6 +5357,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
goto err_out;
}
ext4_orphan_del(handle, inode);
+ orphan = 0;
ext4_journal_stop(handle);
goto err_out;
}
@@ -5559,7 +5380,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
* If the call to ext4_truncate failed to get a transaction handle at
* all, we need to clean up the in-core orphan list manually.
*/
- if (inode->i_nlink)
+ if (orphan && inode->i_nlink)
ext4_orphan_del(NULL, inode);
if (!rc && (ia_valid & ATTR_MODE))
@@ -5591,9 +5412,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
* will return the blocks that include the delayed allocation
* blocks for this file.
*/
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks;
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9;
return 0;
@@ -5642,7 +5461,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
*
* Also account for superblock, inode, quota and xattr blocks
*/
-int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
+static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
{
ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
int gdpblocks;
@@ -5830,6 +5649,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
int err, ret;
might_sleep();
+ trace_ext4_mark_inode_dirty(inode, _RET_IP_);
err = ext4_reserve_inode_write(handle, inode, &iloc);
if (ext4_handle_valid(handle) &&
EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 42f77b1dc72d..5b4d4e3a4d58 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -338,6 +338,14 @@
static struct kmem_cache *ext4_pspace_cachep;
static struct kmem_cache *ext4_ac_cachep;
static struct kmem_cache *ext4_free_ext_cachep;
+
+/* We create slab caches for groupinfo data structures based on the
+ * superblock block size. There will be one per mounted filesystem for
+ * each unique s_blocksize_bits */
+#define NR_GRPINFO_CACHES \
+ (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1)
+static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
+
static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group);
static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
@@ -939,6 +947,85 @@ out:
}
/*
+ * lock the group_info alloc_sem of all the groups
+ * belonging to the same buddy cache page. This
+ * make sure other parallel operation on the buddy
+ * cache doesn't happen whild holding the buddy cache
+ * lock
+ */
+static int ext4_mb_get_buddy_cache_lock(struct super_block *sb,
+ ext4_group_t group)
+{
+ int i;
+ int block, pnum;
+ int blocks_per_page;
+ int groups_per_page;
+ ext4_group_t ngroups = ext4_get_groups_count(sb);
+ ext4_group_t first_group;
+ struct ext4_group_info *grp;
+
+ blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+ /*
+ * the buddy cache inode stores the block bitmap
+ * and buddy information in consecutive blocks.
+ * So for each group we need two blocks.
+ */
+ block = group * 2;
+ pnum = block / blocks_per_page;
+ first_group = pnum * blocks_per_page / 2;
+
+ groups_per_page = blocks_per_page >> 1;
+ if (groups_per_page == 0)
+ groups_per_page = 1;
+ /* read all groups the page covers into the cache */
+ for (i = 0; i < groups_per_page; i++) {
+
+ if ((first_group + i) >= ngroups)
+ break;
+ grp = ext4_get_group_info(sb, first_group + i);
+ /* take all groups write allocation
+ * semaphore. This make sure there is
+ * no block allocation going on in any
+ * of that groups
+ */
+ down_write_nested(&grp->alloc_sem, i);
+ }
+ return i;
+}
+
+static void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
+ ext4_group_t group, int locked_group)
+{
+ int i;
+ int block, pnum;
+ int blocks_per_page;
+ ext4_group_t first_group;
+ struct ext4_group_info *grp;
+
+ blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+ /*
+ * the buddy cache inode stores the block bitmap
+ * and buddy information in consecutive blocks.
+ * So for each group we need two blocks.
+ */
+ block = group * 2;
+ pnum = block / blocks_per_page;
+ first_group = pnum * blocks_per_page / 2;
+ /* release locks on all the groups */
+ for (i = 0; i < locked_group; i++) {
+
+ grp = ext4_get_group_info(sb, first_group + i);
+ /* take all groups write allocation
+ * semaphore. This make sure there is
+ * no block allocation going on in any
+ * of that groups
+ */
+ up_write(&grp->alloc_sem);
+ }
+
+}
+
+/*
* Locking note: This routine calls ext4_mb_init_cache(), which takes the
* block group lock of all groups for this page; do not hold the BG lock when
* calling this routine!
@@ -1915,84 +2002,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
return 0;
}
-/*
- * lock the group_info alloc_sem of all the groups
- * belonging to the same buddy cache page. This
- * make sure other parallel operation on the buddy
- * cache doesn't happen whild holding the buddy cache
- * lock
- */
-int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
-{
- int i;
- int block, pnum;
- int blocks_per_page;
- int groups_per_page;
- ext4_group_t ngroups = ext4_get_groups_count(sb);
- ext4_group_t first_group;
- struct ext4_group_info *grp;
-
- blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
- /*
- * the buddy cache inode stores the block bitmap
- * and buddy information in consecutive blocks.
- * So for each group we need two blocks.
- */
- block = group * 2;
- pnum = block / blocks_per_page;
- first_group = pnum * blocks_per_page / 2;
-
- groups_per_page = blocks_per_page >> 1;
- if (groups_per_page == 0)
- groups_per_page = 1;
- /* read all groups the page covers into the cache */
- for (i = 0; i < groups_per_page; i++) {
-
- if ((first_group + i) >= ngroups)
- break;
- grp = ext4_get_group_info(sb, first_group + i);
- /* take all groups write allocation
- * semaphore. This make sure there is
- * no block allocation going on in any
- * of that groups
- */
- down_write_nested(&grp->alloc_sem, i);
- }
- return i;
-}
-
-void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
- ext4_group_t group, int locked_group)
-{
- int i;
- int block, pnum;
- int blocks_per_page;
- ext4_group_t first_group;
- struct ext4_group_info *grp;
-
- blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
- /*
- * the buddy cache inode stores the block bitmap
- * and buddy information in consecutive blocks.
- * So for each group we need two blocks.
- */
- block = group * 2;
- pnum = block / blocks_per_page;
- first_group = pnum * blocks_per_page / 2;
- /* release locks on all the groups */
- for (i = 0; i < locked_group; i++) {
-
- grp = ext4_get_group_info(sb, first_group + i);
- /* take all groups write allocation
- * semaphore. This make sure there is
- * no block allocation going on in any
- * of that groups
- */
- up_write(&grp->alloc_sem);
- }
-
-}
-
static noinline_for_stack int
ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
{
@@ -2233,15 +2242,24 @@ static const struct file_operations ext4_mb_seq_groups_fops = {
.release = seq_release,
};
+static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
+{
+ int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
+ struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
+
+ BUG_ON(!cachep);
+ return cachep;
+}
/* Create and initialize ext4_group_info data for the given group. */
int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
struct ext4_group_desc *desc)
{
- int i, len;
+ int i;
int metalen = 0;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_group_info **meta_group_info;
+ struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
/*
* First check if this group is the first of a reserved block.
@@ -2261,22 +2279,16 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
meta_group_info;
}
- /*
- * calculate needed size. if change bb_counters size,
- * don't forget about ext4_mb_generate_buddy()
- */
- len = offsetof(typeof(**meta_group_info),
- bb_counters[sb->s_blocksize_bits + 2]);
-
meta_group_info =
sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
- meta_group_info[i] = kzalloc(len, GFP_KERNEL);
+ meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
if (meta_group_info[i] == NULL) {
printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
goto exit_group_info;
}
+ memset(meta_group_info[i], 0, kmem_cache_size(cachep));
set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
&(meta_group_info[i]->bb_state));
@@ -2331,6 +2343,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
int num_meta_group_infos_max;
int array_size;
struct ext4_group_desc *desc;
+ struct kmem_cache *cachep;
/* This is the number of blocks used by GDT */
num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
@@ -2389,8 +2402,9 @@ static int ext4_mb_init_backend(struct super_block *sb)
return 0;
err_freebuddy:
+ cachep = get_groupinfo_cache(sb->s_blocksize_bits);
while (i-- > 0)
- kfree(ext4_get_group_info(sb, i));
+ kmem_cache_free(cachep, ext4_get_group_info(sb, i));
i = num_meta_group_infos;
while (i-- > 0)
kfree(sbi->s_group_info[i]);
@@ -2407,19 +2421,48 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
unsigned offset;
unsigned max;
int ret;
+ int cache_index;
+ struct kmem_cache *cachep;
+ char *namep = NULL;
i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
if (sbi->s_mb_offsets == NULL) {
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
if (sbi->s_mb_maxs == NULL) {
- kfree(sbi->s_mb_offsets);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
+ cachep = ext4_groupinfo_caches[cache_index];
+ if (!cachep) {
+ char name[32];
+ int len = offsetof(struct ext4_group_info,
+ bb_counters[sb->s_blocksize_bits + 2]);
+
+ sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits);
+ namep = kstrdup(name, GFP_KERNEL);
+ if (!namep) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* Need to free the kmem_cache_name() when we
+ * destroy the slab */
+ cachep = kmem_cache_create(namep, len, 0,
+ SLAB_RECLAIM_ACCOUNT, NULL);
+ if (!cachep) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ext4_groupinfo_caches[cache_index] = cachep;
}
/* order 0 is regular bitmap */
@@ -2440,9 +2483,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
/* init file for buddy data */
ret = ext4_mb_init_backend(sb);
if (ret != 0) {
- kfree(sbi->s_mb_offsets);
- kfree(sbi->s_mb_maxs);
- return ret;
+ goto out;
}
spin_lock_init(&sbi->s_md_lock);
@@ -2457,9 +2498,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
if (sbi->s_locality_groups == NULL) {
- kfree(sbi->s_mb_offsets);
- kfree(sbi->s_mb_maxs);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
for_each_possible_cpu(i) {
struct ext4_locality_group *lg;
@@ -2476,7 +2516,13 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
if (sbi->s_journal)
sbi->s_journal->j_commit_callback = release_blocks_on_commit;
- return 0;
+out:
+ if (ret) {
+ kfree(sbi->s_mb_offsets);
+ kfree(sbi->s_mb_maxs);
+ kfree(namep);
+ }
+ return ret;
}
/* need to called with the ext4 group lock held */
@@ -2504,6 +2550,7 @@ int ext4_mb_release(struct super_block *sb)
int num_meta_group_infos;
struct ext4_group_info *grinfo;
struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
if (sbi->s_group_info) {
for (i = 0; i < ngroups; i++) {
@@ -2514,7 +2561,7 @@ int ext4_mb_release(struct super_block *sb)
ext4_lock_group(sb, i);
ext4_mb_cleanup_pa(grinfo);
ext4_unlock_group(sb, i);
- kfree(grinfo);
+ kmem_cache_free(cachep, grinfo);
}
num_meta_group_infos = (ngroups +
EXT4_DESC_PER_BLOCK(sb) - 1) >>
@@ -2558,7 +2605,7 @@ int ext4_mb_release(struct super_block *sb)
return 0;
}
-static inline void ext4_issue_discard(struct super_block *sb,
+static inline int ext4_issue_discard(struct super_block *sb,
ext4_group_t block_group, ext4_grpblk_t block, int count)
{
int ret;
@@ -2568,10 +2615,11 @@ static inline void ext4_issue_discard(struct super_block *sb,
trace_ext4_discard_blocks(sb,
(unsigned long long) discard_block, count);
ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
- if (ret == EOPNOTSUPP) {
+ if (ret == -EOPNOTSUPP) {
ext4_warning(sb, "discard not supported, disabling");
clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
}
+ return ret;
}
/*
@@ -2659,28 +2707,22 @@ static void ext4_remove_debugfs_entry(void)
#endif
-int __init init_ext4_mballoc(void)
+int __init ext4_init_mballoc(void)
{
- ext4_pspace_cachep =
- kmem_cache_create("ext4_prealloc_space",
- sizeof(struct ext4_prealloc_space),
- 0, SLAB_RECLAIM_ACCOUNT, NULL);
+ ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
+ SLAB_RECLAIM_ACCOUNT);
if (ext4_pspace_cachep == NULL)
return -ENOMEM;
- ext4_ac_cachep =
- kmem_cache_create("ext4_alloc_context",
- sizeof(struct ext4_allocation_context),
- 0, SLAB_RECLAIM_ACCOUNT, NULL);
+ ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
+ SLAB_RECLAIM_ACCOUNT);
if (ext4_ac_cachep == NULL) {
kmem_cache_destroy(ext4_pspace_cachep);
return -ENOMEM;
}
- ext4_free_ext_cachep =
- kmem_cache_create("ext4_free_block_extents",
- sizeof(struct ext4_free_data),
- 0, SLAB_RECLAIM_ACCOUNT, NULL);
+ ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data,
+ SLAB_RECLAIM_ACCOUNT);
if (ext4_free_ext_cachep == NULL) {
kmem_cache_destroy(ext4_pspace_cachep);
kmem_cache_destroy(ext4_ac_cachep);
@@ -2690,8 +2732,9 @@ int __init init_ext4_mballoc(void)
return 0;
}
-void exit_ext4_mballoc(void)
+void ext4_exit_mballoc(void)
{
+ int i;
/*
* Wait for completion of call_rcu()'s on ext4_pspace_cachep
* before destroying the slab cache.
@@ -2700,6 +2743,15 @@ void exit_ext4_mballoc(void)
kmem_cache_destroy(ext4_pspace_cachep);
kmem_cache_destroy(ext4_ac_cachep);
kmem_cache_destroy(ext4_free_ext_cachep);
+
+ for (i = 0; i < NR_GRPINFO_CACHES; i++) {
+ struct kmem_cache *cachep = ext4_groupinfo_caches[i];
+ if (cachep) {
+ char *name = (char *)kmem_cache_name(cachep);
+ kmem_cache_destroy(cachep);
+ kfree(name);
+ }
+ }
ext4_remove_debugfs_entry();
}
@@ -3536,8 +3588,7 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
*/
static noinline_for_stack int
ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
- struct ext4_prealloc_space *pa,
- struct ext4_allocation_context *ac)
+ struct ext4_prealloc_space *pa)
{
struct super_block *sb = e4b->bd_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -3555,11 +3606,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
end = bit + pa->pa_len;
- if (ac) {
- ac->ac_sb = sb;
- ac->ac_inode = pa->pa_inode;
- }
-
while (bit < end) {
bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
if (bit >= end)
@@ -3570,16 +3616,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
(unsigned) next - bit, (unsigned) group);
free += next - bit;
- if (ac) {
- ac->ac_b_ex.fe_group = group;
- ac->ac_b_ex.fe_start = bit;
- ac->ac_b_ex.fe_len = next - bit;
- ac->ac_b_ex.fe_logical = 0;
- trace_ext4_mballoc_discard(ac);
- }
-
- trace_ext4_mb_release_inode_pa(sb, ac, pa, grp_blk_start + bit,
- next - bit);
+ trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
+ trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa,
+ grp_blk_start + bit, next - bit);
mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
bit = next + 1;
}
@@ -3602,29 +3641,19 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
static noinline_for_stack int
ext4_mb_release_group_pa(struct ext4_buddy *e4b,
- struct ext4_prealloc_space *pa,
- struct ext4_allocation_context *ac)
+ struct ext4_prealloc_space *pa)
{
struct super_block *sb = e4b->bd_sb;
ext4_group_t group;
ext4_grpblk_t bit;
- trace_ext4_mb_release_group_pa(sb, ac, pa);
+ trace_ext4_mb_release_group_pa(sb, pa);
BUG_ON(pa->pa_deleted == 0);
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
-
- if (ac) {
- ac->ac_sb = sb;
- ac->ac_inode = NULL;
- ac->ac_b_ex.fe_group = group;
- ac->ac_b_ex.fe_start = bit;
- ac->ac_b_ex.fe_len = pa->pa_len;
- ac->ac_b_ex.fe_logical = 0;
- trace_ext4_mballoc_discard(ac);
- }
+ trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
return 0;
}
@@ -3645,7 +3674,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
struct ext4_group_info *grp = ext4_get_group_info(sb, group);
struct buffer_head *bitmap_bh = NULL;
struct ext4_prealloc_space *pa, *tmp;
- struct ext4_allocation_context *ac;
struct list_head list;
struct ext4_buddy e4b;
int err;
@@ -3674,9 +3702,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
INIT_LIST_HEAD(&list);
- ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
- if (ac)
- ac->ac_sb = sb;
repeat:
ext4_lock_group(sb, group);
list_for_each_entry_safe(pa, tmp,
@@ -3731,9 +3756,9 @@ repeat:
spin_unlock(pa->pa_obj_lock);
if (pa->pa_type == MB_GROUP_PA)
- ext4_mb_release_group_pa(&e4b, pa, ac);
+ ext4_mb_release_group_pa(&e4b, pa);
else
- ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
+ ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
list_del(&pa->u.pa_tmp_list);
call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
@@ -3741,8 +3766,6 @@ repeat:
out:
ext4_unlock_group(sb, group);
- if (ac)
- kmem_cache_free(ext4_ac_cachep, ac);
ext4_mb_unload_buddy(&e4b);
put_bh(bitmap_bh);
return free;
@@ -3763,7 +3786,6 @@ void ext4_discard_preallocations(struct inode *inode)
struct super_block *sb = inode->i_sb;
struct buffer_head *bitmap_bh = NULL;
struct ext4_prealloc_space *pa, *tmp;
- struct ext4_allocation_context *ac;
ext4_group_t group = 0;
struct list_head list;
struct ext4_buddy e4b;
@@ -3779,11 +3801,6 @@ void ext4_discard_preallocations(struct inode *inode)
INIT_LIST_HEAD(&list);
- ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
- if (ac) {
- ac->ac_sb = sb;
- ac->ac_inode = inode;
- }
repeat:
/* first, collect all pa's in the inode */
spin_lock(&ei->i_prealloc_lock);
@@ -3853,7 +3870,7 @@ repeat:
ext4_lock_group(sb, group);
list_del(&pa->pa_group_list);
- ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
+ ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
ext4_unlock_group(sb, group);
ext4_mb_unload_buddy(&e4b);
@@ -3862,8 +3879,6 @@ repeat:
list_del(&pa->u.pa_tmp_list);
call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
}
- if (ac)
- kmem_cache_free(ext4_ac_cachep, ac);
}
/*
@@ -4061,14 +4076,10 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
struct ext4_buddy e4b;
struct list_head discard_list;
struct ext4_prealloc_space *pa, *tmp;
- struct ext4_allocation_context *ac;
mb_debug(1, "discard locality group preallocation\n");
INIT_LIST_HEAD(&discard_list);
- ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
- if (ac)
- ac->ac_sb = sb;
spin_lock(&lg->lg_prealloc_lock);
list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
@@ -4120,15 +4131,13 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
}
ext4_lock_group(sb, group);
list_del(&pa->pa_group_list);
- ext4_mb_release_group_pa(&e4b, pa, ac);
+ ext4_mb_release_group_pa(&e4b, pa);
ext4_unlock_group(sb, group);
ext4_mb_unload_buddy(&e4b);
list_del(&pa->u.pa_tmp_list);
call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
}
- if (ac)
- kmem_cache_free(ext4_ac_cachep, ac);
}
/*
@@ -4492,7 +4501,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
{
struct buffer_head *bitmap_bh = NULL;
struct super_block *sb = inode->i_sb;
- struct ext4_allocation_context *ac = NULL;
struct ext4_group_desc *gdp;
unsigned long freed = 0;
unsigned int overflow;
@@ -4532,6 +4540,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
if (!bh)
tbh = sb_find_get_block(inode->i_sb,
block + i);
+ if (unlikely(!tbh))
+ continue;
ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
inode, tbh, block + i);
}
@@ -4547,12 +4557,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
if (!ext4_should_writeback_data(inode))
flags |= EXT4_FREE_BLOCKS_METADATA;
- ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
- if (ac) {
- ac->ac_inode = inode;
- ac->ac_sb = sb;
- }
-
do_more:
overflow = 0;
ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@@ -4610,12 +4614,7 @@ do_more:
BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
}
#endif
- if (ac) {
- ac->ac_b_ex.fe_group = block_group;
- ac->ac_b_ex.fe_start = bit;
- ac->ac_b_ex.fe_len = count;
- trace_ext4_mballoc_free(ac);
- }
+ trace_ext4_mballoc_free(sb, inode, block_group, bit, count);
err = ext4_mb_load_buddy(sb, block_group, &e4b);
if (err)
@@ -4645,8 +4644,6 @@ do_more:
mb_clear_bits(bitmap_bh->b_data, bit, count);
mb_free_blocks(inode, &e4b, bit, count);
ext4_mb_return_to_preallocation(inode, &e4b, block, count);
- if (test_opt(sb, DISCARD))
- ext4_issue_discard(sb, block_group, bit, count);
}
ret = ext4_free_blks_count(sb, gdp) + count;
@@ -4686,7 +4683,190 @@ error_return:
dquot_free_block(inode, freed);
brelse(bitmap_bh);
ext4_std_error(sb, err);
- if (ac)
- kmem_cache_free(ext4_ac_cachep, ac);
return;
}
+
+/**
+ * ext4_trim_extent -- function to TRIM one single free extent in the group
+ * @sb: super block for the file system
+ * @start: starting block of the free extent in the alloc. group
+ * @count: number of blocks to TRIM
+ * @group: alloc. group we are working with
+ * @e4b: ext4 buddy for the group
+ *
+ * Trim "count" blocks starting at "start" in the "group". To assure that no
+ * one will allocate those blocks, mark it as used in buddy bitmap. This must
+ * be called with under the group lock.
+ */
+static int ext4_trim_extent(struct super_block *sb, int start, int count,
+ ext4_group_t group, struct ext4_buddy *e4b)
+{
+ struct ext4_free_extent ex;
+ int ret = 0;
+
+ assert_spin_locked(ext4_group_lock_ptr(sb, group));
+
+ ex.fe_start = start;
+ ex.fe_group = group;
+ ex.fe_len = count;
+
+ /*
+ * Mark blocks used, so no one can reuse them while
+ * being trimmed.
+ */
+ mb_mark_used(e4b, &ex);
+ ext4_unlock_group(sb, group);
+
+ ret = ext4_issue_discard(sb, group, start, count);
+ if (ret)
+ ext4_std_error(sb, ret);
+
+ ext4_lock_group(sb, group);
+ mb_free_blocks(NULL, e4b, start, ex.fe_len);
+ return ret;
+}
+
+/**
+ * ext4_trim_all_free -- function to trim all free space in alloc. group
+ * @sb: super block for file system
+ * @e4b: ext4 buddy
+ * @start: first group block to examine
+ * @max: last group block to examine
+ * @minblocks: minimum extent block count
+ *
+ * ext4_trim_all_free walks through group's buddy bitmap searching for free
+ * extents. When the free block is found, ext4_trim_extent is called to TRIM
+ * the extent.
+ *
+ *
+ * ext4_trim_all_free walks through group's block bitmap searching for free
+ * extents. When the free extent is found, mark it as used in group buddy
+ * bitmap. Then issue a TRIM command on this extent and free the extent in
+ * the group buddy bitmap. This is done until whole group is scanned.
+ */
+ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
+ ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks)
+{
+ void *bitmap;
+ ext4_grpblk_t next, count = 0;
+ ext4_group_t group;
+ int ret = 0;
+
+ BUG_ON(e4b == NULL);
+
+ bitmap = e4b->bd_bitmap;
+ group = e4b->bd_group;
+ start = (e4b->bd_info->bb_first_free > start) ?
+ e4b->bd_info->bb_first_free : start;
+ ext4_lock_group(sb, group);
+
+ while (start < max) {
+ start = mb_find_next_zero_bit(bitmap, max, start);
+ if (start >= max)
+ break;
+ next = mb_find_next_bit(bitmap, max, start);
+
+ if ((next - start) >= minblocks) {
+ ret = ext4_trim_extent(sb, start,
+ next - start, group, e4b);
+ if (ret < 0)
+ break;
+ count += next - start;
+ }
+ start = next + 1;
+
+ if (fatal_signal_pending(current)) {
+ count = -ERESTARTSYS;
+ break;
+ }
+
+ if (need_resched()) {
+ ext4_unlock_group(sb, group);
+ cond_resched();
+ ext4_lock_group(sb, group);
+ }
+
+ if ((e4b->bd_info->bb_free - count) < minblocks)
+ break;
+ }
+ ext4_unlock_group(sb, group);
+
+ ext4_debug("trimmed %d blocks in the group %d\n",
+ count, group);
+
+ if (ret < 0)
+ count = ret;
+
+ return count;
+}
+
+/**
+ * ext4_trim_fs() -- trim ioctl handle function
+ * @sb: superblock for filesystem
+ * @range: fstrim_range structure
+ *
+ * start: First Byte to trim
+ * len: number of Bytes to trim from start
+ * minlen: minimum extent length in Bytes
+ * ext4_trim_fs goes through all allocation groups containing Bytes from
+ * start to start+len. For each such a group ext4_trim_all_free function
+ * is invoked to trim all free space.
+ */
+int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+{
+ struct ext4_buddy e4b;
+ ext4_group_t first_group, last_group;
+ ext4_group_t group, ngroups = ext4_get_groups_count(sb);
+ ext4_grpblk_t cnt = 0, first_block, last_block;
+ uint64_t start, len, minlen, trimmed;
+ int ret = 0;
+
+ start = range->start >> sb->s_blocksize_bits;
+ len = range->len >> sb->s_blocksize_bits;
+ minlen = range->minlen >> sb->s_blocksize_bits;
+ trimmed = 0;
+
+ if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
+ return -EINVAL;
+
+ /* Determine first and last group to examine based on start and len */
+ ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
+ &first_group, &first_block);
+ ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len),
+ &last_group, &last_block);
+ last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
+ last_block = EXT4_BLOCKS_PER_GROUP(sb);
+
+ if (first_group > last_group)
+ return -EINVAL;
+
+ for (group = first_group; group <= last_group; group++) {
+ ret = ext4_mb_load_buddy(sb, group, &e4b);
+ if (ret) {
+ ext4_error(sb, "Error in loading buddy "
+ "information for %u", group);
+ break;
+ }
+
+ if (len >= EXT4_BLOCKS_PER_GROUP(sb))
+ len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block);
+ else
+ last_block = len;
+
+ if (e4b.bd_info->bb_free >= minlen) {
+ cnt = ext4_trim_all_free(sb, &e4b, first_block,
+ last_block, minlen);
+ if (cnt < 0) {
+ ret = cnt;
+ ext4_mb_unload_buddy(&e4b);
+ break;
+ }
+ }
+ ext4_mb_unload_buddy(&e4b);
+ trimmed += cnt;
+ first_block = 0;
+ }
+ range->len = trimmed * sb->s_blocksize;
+
+ return ret;
+}
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 1765c2c50a9b..25f3a974b725 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -412,7 +412,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
struct buffer_head *bh;
struct ext4_extent_header *eh;
- block = idx_pblock(ix);
+ block = ext4_idx_pblock(ix);
bh = sb_bread(inode->i_sb, block);
if (!bh)
return -EIO;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 5f1ed9fc913c..b9f3e7862f13 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -85,7 +85,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
/* leaf block */
*extent = ++path[ppos].p_ext;
- path[ppos].p_block = ext_pblock(path[ppos].p_ext);
+ path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
return 0;
}
@@ -96,7 +96,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
/* index block */
path[ppos].p_idx++;
- path[ppos].p_block = idx_pblock(path[ppos].p_idx);
+ path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
if (path[ppos+1].p_bh)
brelse(path[ppos+1].p_bh);
path[ppos+1].p_bh =
@@ -111,7 +111,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
path[cur_ppos].p_idx =
EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
path[cur_ppos].p_block =
- idx_pblock(path[cur_ppos].p_idx);
+ ext4_idx_pblock(path[cur_ppos].p_idx);
if (path[cur_ppos+1].p_bh)
brelse(path[cur_ppos+1].p_bh);
path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
@@ -133,7 +133,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
path[leaf_ppos].p_ext = *extent =
EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
path[leaf_ppos].p_block =
- ext_pblock(path[leaf_ppos].p_ext);
+ ext4_ext_pblock(path[leaf_ppos].p_ext);
return 0;
}
}
@@ -249,7 +249,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
*/
o_end->ee_block = end_ext->ee_block;
o_end->ee_len = end_ext->ee_len;
- ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
+ ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
}
o_start->ee_len = start_ext->ee_len;
@@ -276,7 +276,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
*/
o_end->ee_block = end_ext->ee_block;
o_end->ee_len = end_ext->ee_len;
- ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
+ ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
/*
* Set 0 to the extent block if new_ext was
@@ -361,7 +361,7 @@ mext_insert_inside_block(struct ext4_extent *o_start,
/* Insert new entry */
if (new_ext->ee_len) {
o_start[i] = *new_ext;
- ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext));
+ ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext));
}
/* Insert end entry */
@@ -488,7 +488,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
start_ext.ee_len = end_ext.ee_len = 0;
new_ext.ee_block = cpu_to_le32(*from);
- ext4_ext_store_pblock(&new_ext, ext_pblock(dext));
+ ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext));
new_ext.ee_len = dext->ee_len;
new_ext_alen = ext4_ext_get_actual_len(&new_ext);
new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
@@ -553,7 +553,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
copy_extent_status(oext, &end_ext);
end_ext_alen = ext4_ext_get_actual_len(&end_ext);
ext4_ext_store_pblock(&end_ext,
- (ext_pblock(o_end) + oext_alen - end_ext_alen));
+ (ext4_ext_pblock(o_end) + oext_alen - end_ext_alen));
end_ext.ee_block =
cpu_to_le32(le32_to_cpu(o_end->ee_block) +
oext_alen - end_ext_alen);
@@ -604,7 +604,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
/* When tmp_dext is too large, pick up the target range. */
diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
- ext4_ext_store_pblock(tmp_dext, ext_pblock(tmp_dext) + diff);
+ ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff);
tmp_dext->ee_block =
cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff);
tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff);
@@ -613,7 +613,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
tmp_dext->ee_len = cpu_to_le16(max_count);
orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block);
- ext4_ext_store_pblock(tmp_oext, ext_pblock(tmp_oext) + orig_diff);
+ ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff);
/* Adjust extent length if donor extent is larger than orig */
if (ext4_ext_get_actual_len(tmp_dext) >
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index bd39885b5998..92203b8a099f 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -856,6 +856,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
struct buffer_head *bh_use[NAMEI_RA_SIZE];
struct buffer_head *bh, *ret = NULL;
ext4_lblk_t start, block, b;
+ const u8 *name = d_name->name;
int ra_max = 0; /* Number of bh's in the readahead
buffer, bh_use[] */
int ra_ptr = 0; /* Current index into readahead
@@ -870,6 +871,16 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
namelen = d_name->len;
if (namelen > EXT4_NAME_LEN)
return NULL;
+ if ((namelen <= 2) && (name[0] == '.') &&
+ (name[1] == '.' || name[1] == '0')) {
+ /*
+ * "." or ".." will only be in the first block
+ * NFS may look up ".."; "." should be handled by the VFS
+ */
+ block = start = 0;
+ nblocks = 1;
+ goto restart;
+ }
if (is_dx(dir)) {
bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
/*
@@ -960,55 +971,35 @@ cleanup_and_exit:
static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
struct ext4_dir_entry_2 **res_dir, int *err)
{
- struct super_block * sb;
+ struct super_block * sb = dir->i_sb;
struct dx_hash_info hinfo;
- u32 hash;
struct dx_frame frames[2], *frame;
- struct ext4_dir_entry_2 *de, *top;
struct buffer_head *bh;
ext4_lblk_t block;
int retval;
- int namelen = d_name->len;
- const u8 *name = d_name->name;
- sb = dir->i_sb;
- /* NFS may look up ".." - look at dx_root directory block */
- if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
- if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
- return NULL;
- } else {
- frame = frames;
- frame->bh = NULL; /* for dx_release() */
- frame->at = (struct dx_entry *)frames; /* hack for zero entry*/
- dx_set_block(frame->at, 0); /* dx_root block is 0 */
- }
- hash = hinfo.hash;
+ if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
+ return NULL;
do {
block = dx_get_block(frame->at);
- if (!(bh = ext4_bread (NULL,dir, block, 0, err)))
+ if (!(bh = ext4_bread(NULL, dir, block, 0, err)))
goto errout;
- de = (struct ext4_dir_entry_2 *) bh->b_data;
- top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize -
- EXT4_DIR_REC_LEN(0));
- for (; de < top; de = ext4_next_entry(de, sb->s_blocksize)) {
- int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
- + ((char *) de - bh->b_data);
-
- if (!ext4_check_dir_entry(dir, de, bh, off)) {
- brelse(bh);
- *err = ERR_BAD_DX_DIR;
- goto errout;
- }
- if (ext4_match(namelen, name, de)) {
- *res_dir = de;
- dx_release(frames);
- return bh;
- }
+ retval = search_dirblock(bh, dir, d_name,
+ block << EXT4_BLOCK_SIZE_BITS(sb),
+ res_dir);
+ if (retval == 1) { /* Success! */
+ dx_release(frames);
+ return bh;
}
brelse(bh);
+ if (retval == -1) {
+ *err = ERR_BAD_DX_DIR;
+ goto errout;
+ }
+
/* Check to see if we should continue to search */
- retval = ext4_htree_next_block(dir, hash, frame,
+ retval = ext4_htree_next_block(dir, hinfo.hash, frame,
frames, NULL);
if (retval < 0) {
ext4_warning(sb,
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
new file mode 100644
index 000000000000..7f5451cd1d38
--- /dev/null
+++ b/fs/ext4/page-io.c
@@ -0,0 +1,431 @@
+/*
+ * linux/fs/ext4/page-io.c
+ *
+ * This contains the new page_io functions for ext4
+ *
+ * Written by Theodore Ts'o, 2010.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/time.h>
+#include <linux/jbd2.h>
+#include <linux/highuid.h>
+#include <linux/pagemap.h>
+#include <linux/quotaops.h>
+#include <linux/string.h>
+#include <linux/buffer_head.h>
+#include <linux/writeback.h>
+#include <linux/pagevec.h>
+#include <linux/mpage.h>
+#include <linux/namei.h>
+#include <linux/uio.h>
+#include <linux/bio.h>
+#include <linux/workqueue.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "ext4_jbd2.h"
+#include "xattr.h"
+#include "acl.h"
+#include "ext4_extents.h"
+
+static struct kmem_cache *io_page_cachep, *io_end_cachep;
+
+#define WQ_HASH_SZ 37
+#define to_ioend_wq(v) (&ioend_wq[((unsigned long)v) % WQ_HASH_SZ])
+static wait_queue_head_t ioend_wq[WQ_HASH_SZ];
+
+int __init ext4_init_pageio(void)
+{
+ int i;
+
+ io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
+ if (io_page_cachep == NULL)
+ return -ENOMEM;
+ io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
+ if (io_page_cachep == NULL) {
+ kmem_cache_destroy(io_page_cachep);
+ return -ENOMEM;
+ }
+ for (i = 0; i < WQ_HASH_SZ; i++)
+ init_waitqueue_head(&ioend_wq[i]);
+
+ return 0;
+}
+
+void ext4_exit_pageio(void)
+{
+ kmem_cache_destroy(io_end_cachep);
+ kmem_cache_destroy(io_page_cachep);
+}
+
+void ext4_ioend_wait(struct inode *inode)
+{
+ wait_queue_head_t *wq = to_ioend_wq(inode);
+
+ wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
+}
+
+static void put_io_page(struct ext4_io_page *io_page)
+{
+ if (atomic_dec_and_test(&io_page->p_count)) {
+ end_page_writeback(io_page->p_page);
+ put_page(io_page->p_page);
+ kmem_cache_free(io_page_cachep, io_page);
+ }
+}
+
+void ext4_free_io_end(ext4_io_end_t *io)
+{
+ int i;
+ wait_queue_head_t *wq;
+
+ BUG_ON(!io);
+ if (io->page)
+ put_page(io->page);
+ for (i = 0; i < io->num_io_pages; i++)
+ put_io_page(io->pages[i]);
+ io->num_io_pages = 0;
+ wq = to_ioend_wq(io->inode);
+ if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) &&
+ waitqueue_active(wq))
+ wake_up_all(wq);
+ kmem_cache_free(io_end_cachep, io);
+}
+
+/*
+ * check a range of space and convert unwritten extents to written.
+ */
+int ext4_end_io_nolock(ext4_io_end_t *io)
+{
+ struct inode *inode = io->inode;
+ loff_t offset = io->offset;
+ ssize_t size = io->size;
+ int ret = 0;
+
+ ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
+ "list->prev 0x%p\n",
+ io, inode->i_ino, io->list.next, io->list.prev);
+
+ if (list_empty(&io->list))
+ return ret;
+
+ if (!(io->flag & EXT4_IO_END_UNWRITTEN))
+ return ret;
+
+ ret = ext4_convert_unwritten_extents(inode, offset, size);
+ if (ret < 0) {
+ printk(KERN_EMERG "%s: failed to convert unwritten "
+ "extents to written extents, error is %d "
+ "io is still on inode %lu aio dio list\n",
+ __func__, ret, inode->i_ino);
+ return ret;
+ }
+
+ if (io->iocb)
+ aio_complete(io->iocb, io->result, 0);
+ /* clear the DIO AIO unwritten flag */
+ io->flag &= ~EXT4_IO_END_UNWRITTEN;
+ return ret;
+}
+
+/*
+ * work on completed aio dio IO, to convert unwritten extents to extents
+ */
+static void ext4_end_io_work(struct work_struct *work)
+{
+ ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
+ struct inode *inode = io->inode;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ unsigned long flags;
+ int ret;
+
+ mutex_lock(&inode->i_mutex);
+ ret = ext4_end_io_nolock(io);
+ if (ret < 0) {
+ mutex_unlock(&inode->i_mutex);
+ return;
+ }
+
+ spin_lock_irqsave(&ei->i_completed_io_lock, flags);
+ if (!list_empty(&io->list))
+ list_del_init(&io->list);
+ spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+ mutex_unlock(&inode->i_mutex);
+ ext4_free_io_end(io);
+}
+
+ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
+{
+ ext4_io_end_t *io = NULL;
+
+ io = kmem_cache_alloc(io_end_cachep, flags);
+ if (io) {
+ memset(io, 0, sizeof(*io));
+ atomic_inc(&EXT4_I(inode)->i_ioend_count);
+ io->inode = inode;
+ INIT_WORK(&io->work, ext4_end_io_work);
+ INIT_LIST_HEAD(&io->list);
+ }
+ return io;
+}
+
+/*
+ * Print an buffer I/O error compatible with the fs/buffer.c. This
+ * provides compatibility with dmesg scrapers that look for a specific
+ * buffer I/O error message. We really need a unified error reporting
+ * structure to userspace ala Digital Unix's uerf system, but it's
+ * probably not going to happen in my lifetime, due to LKML politics...
+ */
+static void buffer_io_error(struct buffer_head *bh)
+{
+ char b[BDEVNAME_SIZE];
+ printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n",
+ bdevname(bh->b_bdev, b),
+ (unsigned long long)bh->b_blocknr);
+}
+
+static void ext4_end_bio(struct bio *bio, int error)
+{
+ ext4_io_end_t *io_end = bio->bi_private;
+ struct workqueue_struct *wq;
+ struct inode *inode;
+ unsigned long flags;
+ int i;
+
+ BUG_ON(!io_end);
+ bio->bi_private = NULL;
+ bio->bi_end_io = NULL;
+ if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+ error = 0;
+ bio_put(bio);
+
+ for (i = 0; i < io_end->num_io_pages; i++) {
+ struct page *page = io_end->pages[i]->p_page;
+ struct buffer_head *bh, *head;
+ int partial_write = 0;
+
+ head = page_buffers(page);
+ if (error)
+ SetPageError(page);
+ BUG_ON(!head);
+ if (head->b_size == PAGE_CACHE_SIZE)
+ clear_buffer_dirty(head);
+ else {
+ loff_t offset;
+ loff_t io_end_offset = io_end->offset + io_end->size;
+
+ offset = (sector_t) page->index << PAGE_CACHE_SHIFT;
+ bh = head;
+ do {
+ if ((offset >= io_end->offset) &&
+ (offset+bh->b_size <= io_end_offset)) {
+ if (error)
+ buffer_io_error(bh);
+
+ clear_buffer_dirty(bh);
+ }
+ if (buffer_delay(bh))
+ partial_write = 1;
+ else if (!buffer_mapped(bh))
+ clear_buffer_dirty(bh);
+ else if (buffer_dirty(bh))
+ partial_write = 1;
+ offset += bh->b_size;
+ bh = bh->b_this_page;
+ } while (bh != head);
+ }
+
+ put_io_page(io_end->pages[i]);
+
+ /*
+ * If this is a partial write which happened to make
+ * all buffers uptodate then we can optimize away a
+ * bogus readpage() for the next read(). Here we
+ * 'discover' whether the page went uptodate as a
+ * result of this (potentially partial) write.
+ */
+ if (!partial_write)
+ SetPageUptodate(page);
+ }
+ io_end->num_io_pages = 0;
+ inode = io_end->inode;
+
+ if (error) {
+ io_end->flag |= EXT4_IO_END_ERROR;
+ ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
+ "(offset %llu size %ld starting block %llu)",
+ inode->i_ino,
+ (unsigned long long) io_end->offset,
+ (long) io_end->size,
+ (unsigned long long)
+ bio->bi_sector >> (inode->i_blkbits - 9));
+ }
+
+ /* Add the io_end to per-inode completed io list*/
+ spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
+ list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
+ spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
+
+ wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
+ /* queue the work to convert unwritten extents to written */
+ queue_work(wq, &io_end->work);
+}
+
+void ext4_io_submit(struct ext4_io_submit *io)
+{
+ struct bio *bio = io->io_bio;
+
+ if (bio) {
+ bio_get(io->io_bio);
+ submit_bio(io->io_op, io->io_bio);
+ BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP));
+ bio_put(io->io_bio);
+ }
+ io->io_bio = 0;
+ io->io_op = 0;
+ io->io_end = 0;
+}
+
+static int io_submit_init(struct ext4_io_submit *io,
+ struct inode *inode,
+ struct writeback_control *wbc,
+ struct buffer_head *bh)
+{
+ ext4_io_end_t *io_end;
+ struct page *page = bh->b_page;
+ int nvecs = bio_get_nr_vecs(bh->b_bdev);
+ struct bio *bio;
+
+ io_end = ext4_init_io_end(inode, GFP_NOFS);
+ if (!io_end)
+ return -ENOMEM;
+ do {
+ bio = bio_alloc(GFP_NOIO, nvecs);
+ nvecs >>= 1;
+ } while (bio == NULL);
+
+ bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+ bio->bi_bdev = bh->b_bdev;
+ bio->bi_private = io->io_end = io_end;
+ bio->bi_end_io = ext4_end_bio;
+
+ io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
+
+ io->io_bio = bio;
+ io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?
+ WRITE_SYNC_PLUG : WRITE);
+ io->io_next_block = bh->b_blocknr;
+ return 0;
+}
+
+static int io_submit_add_bh(struct ext4_io_submit *io,
+ struct ext4_io_page *io_page,
+ struct inode *inode,
+ struct writeback_control *wbc,
+ struct buffer_head *bh)
+{
+ ext4_io_end_t *io_end;
+ int ret;
+
+ if (buffer_new(bh)) {
+ clear_buffer_new(bh);
+ unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+ }
+
+ if (!buffer_mapped(bh) || buffer_delay(bh)) {
+ if (!buffer_mapped(bh))
+ clear_buffer_dirty(bh);
+ if (io->io_bio)
+ ext4_io_submit(io);
+ return 0;
+ }
+
+ if (io->io_bio && bh->b_blocknr != io->io_next_block) {
+submit_and_retry:
+ ext4_io_submit(io);
+ }
+ if (io->io_bio == NULL) {
+ ret = io_submit_init(io, inode, wbc, bh);
+ if (ret)
+ return ret;
+ }
+ io_end = io->io_end;
+ if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
+ (io_end->pages[io_end->num_io_pages-1] != io_page))
+ goto submit_and_retry;
+ if (buffer_uninit(bh))
+ io->io_end->flag |= EXT4_IO_END_UNWRITTEN;
+ io->io_end->size += bh->b_size;
+ io->io_next_block++;
+ ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
+ if (ret != bh->b_size)
+ goto submit_and_retry;
+ if ((io_end->num_io_pages == 0) ||
+ (io_end->pages[io_end->num_io_pages-1] != io_page)) {
+ io_end->pages[io_end->num_io_pages++] = io_page;
+ atomic_inc(&io_page->p_count);
+ }
+ return 0;
+}
+
+int ext4_bio_write_page(struct ext4_io_submit *io,
+ struct page *page,
+ int len,
+ struct writeback_control *wbc)
+{
+ struct inode *inode = page->mapping->host;
+ unsigned block_start, block_end, blocksize;
+ struct ext4_io_page *io_page;
+ struct buffer_head *bh, *head;
+ int ret = 0;
+
+ blocksize = 1 << inode->i_blkbits;
+
+ BUG_ON(PageWriteback(page));
+ set_page_writeback(page);
+ ClearPageError(page);
+
+ io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
+ if (!io_page) {
+ set_page_dirty(page);
+ unlock_page(page);
+ return -ENOMEM;
+ }
+ io_page->p_page = page;
+ atomic_set(&io_page->p_count, 1);
+ get_page(page);
+
+ for (bh = head = page_buffers(page), block_start = 0;
+ bh != head || !block_start;
+ block_start = block_end, bh = bh->b_this_page) {
+ block_end = block_start + blocksize;
+ if (block_start >= len) {
+ clear_buffer_dirty(bh);
+ set_buffer_uptodate(bh);
+ continue;
+ }
+ ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
+ if (ret) {
+ /*
+ * We only get here on ENOMEM. Not much else
+ * we can do but mark the page as dirty, and
+ * better luck next time.
+ */
+ set_page_dirty(page);
+ break;
+ }
+ }
+ unlock_page(page);
+ /*
+ * If the page was truncated before we could do the writeback,
+ * or we had a memory allocation error while trying to write
+ * the first buffer head, we won't have submitted any pages for
+ * I/O. In that case we need to make sure we've cleared the
+ * PageWriteback bit from the page to prevent the system from
+ * wedging later on.
+ */
+ put_io_page(io_page);
+ return ret;
+}
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index ca5c8aa00a2f..dc963929de65 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -226,23 +226,13 @@ static int setup_new_group_blocks(struct super_block *sb,
}
/* Zero out all of the reserved backup group descriptor table blocks */
- for (i = 0, bit = gdblocks + 1, block = start + bit;
- i < reserved_gdb; i++, block++, bit++) {
- struct buffer_head *gdb;
-
- ext4_debug("clear reserved block %#04llx (+%d)\n", block, bit);
-
- if ((err = extend_or_restart_transaction(handle, 1, bh)))
- goto exit_bh;
+ ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
+ block, sbi->s_itb_per_group);
+ err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
+ GFP_NOFS);
+ if (err)
+ goto exit_bh;
- if (IS_ERR(gdb = bclean(handle, sb, block))) {
- err = PTR_ERR(gdb);
- goto exit_bh;
- }
- ext4_handle_dirty_metadata(handle, NULL, gdb);
- ext4_set_bit(bit, bh->b_data);
- brelse(gdb);
- }
ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
input->block_bitmap - start);
ext4_set_bit(input->block_bitmap - start, bh->b_data);
@@ -251,28 +241,18 @@ static int setup_new_group_blocks(struct super_block *sb,
ext4_set_bit(input->inode_bitmap - start, bh->b_data);
/* Zero out all of the inode table blocks */
- for (i = 0, block = input->inode_table, bit = block - start;
- i < sbi->s_itb_per_group; i++, bit++, block++) {
- struct buffer_head *it;
-
- ext4_debug("clear inode block %#04llx (+%d)\n", block, bit);
-
- if ((err = extend_or_restart_transaction(handle, 1, bh)))
- goto exit_bh;
-
- if (IS_ERR(it = bclean(handle, sb, block))) {
- err = PTR_ERR(it);
- goto exit_bh;
- }
- ext4_handle_dirty_metadata(handle, NULL, it);
- brelse(it);
- ext4_set_bit(bit, bh->b_data);
- }
+ block = input->inode_table;
+ ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
+ block, sbi->s_itb_per_group);
+ err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
+ if (err)
+ goto exit_bh;
if ((err = extend_or_restart_transaction(handle, 2, bh)))
goto exit_bh;
- mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data);
+ ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8,
+ bh->b_data);
ext4_handle_dirty_metadata(handle, NULL, bh);
brelse(bh);
/* Mark unused entries in inode bitmap used */
@@ -283,8 +263,8 @@ static int setup_new_group_blocks(struct super_block *sb,
goto exit_journal;
}
- mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
- bh->b_data);
+ ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
+ bh->b_data);
ext4_handle_dirty_metadata(handle, NULL, bh);
exit_bh:
brelse(bh);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8ecc1e590303..61182fe6254e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -40,6 +40,9 @@
#include <linux/crc16.h>
#include <asm/uaccess.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+
#include "ext4.h"
#include "ext4_jbd2.h"
#include "xattr.h"
@@ -49,8 +52,11 @@
#define CREATE_TRACE_POINTS
#include <trace/events/ext4.h>
-struct proc_dir_entry *ext4_proc_root;
+static struct proc_dir_entry *ext4_proc_root;
static struct kset *ext4_kset;
+struct ext4_lazy_init *ext4_li_info;
+struct mutex ext4_li_mtx;
+struct ext4_features *ext4_feat;
static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
unsigned long journal_devnum);
@@ -67,14 +73,16 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
static int ext4_unfreeze(struct super_block *sb);
static void ext4_write_super(struct super_block *sb);
static int ext4_freeze(struct super_block *sb);
-static int ext4_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data, struct vfsmount *mnt);
+static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data);
+static void ext4_destroy_lazyinit_thread(void);
+static void ext4_unregister_li_request(struct super_block *sb);
#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
static struct file_system_type ext3_fs_type = {
.owner = THIS_MODULE,
.name = "ext3",
- .get_sb = ext4_get_sb,
+ .mount = ext4_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
@@ -701,6 +709,7 @@ static void ext4_put_super(struct super_block *sb)
struct ext4_super_block *es = sbi->s_es;
int i, err;
+ ext4_unregister_li_request(sb);
dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
flush_workqueue(sbi->dio_unwritten_wq);
@@ -717,6 +726,7 @@ static void ext4_put_super(struct super_block *sb)
ext4_abort(sb, "Couldn't clean up the journal");
}
+ del_timer(&sbi->s_err_report);
ext4_release_system_zone(sb);
ext4_mb_release(sb);
ext4_ext_release(sb);
@@ -818,12 +828,22 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
ei->cur_aio_dio = NULL;
ei->i_sync_tid = 0;
ei->i_datasync_tid = 0;
+ atomic_set(&ei->i_ioend_count, 0);
return &ei->vfs_inode;
}
+static int ext4_drop_inode(struct inode *inode)
+{
+ int drop = generic_drop_inode(inode);
+
+ trace_ext4_drop_inode(inode, drop);
+ return drop;
+}
+
static void ext4_destroy_inode(struct inode *inode)
{
+ ext4_ioend_wait(inode);
if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
ext4_msg(inode->i_sb, KERN_ERR,
"Inode %lu (%p): orphan list check failed!",
@@ -1042,6 +1062,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
!(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY))
seq_puts(seq, ",block_validity");
+ if (!test_opt(sb, INIT_INODE_TABLE))
+ seq_puts(seq, ",noinit_inode_table");
+ else if (sbi->s_li_wait_mult)
+ seq_printf(seq, ",init_inode_table=%u",
+ (unsigned) sbi->s_li_wait_mult);
+
ext4_show_quota_options(seq, sb);
return 0;
@@ -1157,6 +1183,7 @@ static const struct super_operations ext4_sops = {
.destroy_inode = ext4_destroy_inode,
.write_inode = ext4_write_inode,
.dirty_inode = ext4_dirty_inode,
+ .drop_inode = ext4_drop_inode,
.evict_inode = ext4_evict_inode,
.put_super = ext4_put_super,
.sync_fs = ext4_sync_fs,
@@ -1170,6 +1197,7 @@ static const struct super_operations ext4_sops = {
.quota_write = ext4_quota_write,
#endif
.bdev_try_to_free_page = bdev_try_to_free_page,
+ .trim_fs = ext4_trim_fs
};
static const struct super_operations ext4_nojournal_sops = {
@@ -1177,6 +1205,7 @@ static const struct super_operations ext4_nojournal_sops = {
.destroy_inode = ext4_destroy_inode,
.write_inode = ext4_write_inode,
.dirty_inode = ext4_dirty_inode,
+ .drop_inode = ext4_drop_inode,
.evict_inode = ext4_evict_inode,
.write_super = ext4_write_super,
.put_super = ext4_put_super,
@@ -1216,6 +1245,7 @@ enum {
Opt_inode_readahead_blks, Opt_journal_ioprio,
Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard,
+ Opt_init_inode_table, Opt_noinit_inode_table,
};
static const match_table_t tokens = {
@@ -1286,6 +1316,9 @@ static const match_table_t tokens = {
{Opt_dioread_lock, "dioread_lock"},
{Opt_discard, "discard"},
{Opt_nodiscard, "nodiscard"},
+ {Opt_init_inode_table, "init_itable=%u"},
+ {Opt_init_inode_table, "init_itable"},
+ {Opt_noinit_inode_table, "noinit_itable"},
{Opt_err, NULL},
};
@@ -1756,6 +1789,20 @@ set_qf_format:
case Opt_dioread_lock:
clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
break;
+ case Opt_init_inode_table:
+ set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
+ if (args[0].from) {
+ if (match_int(&args[0], &option))
+ return 0;
+ } else
+ option = EXT4_DEF_LI_WAIT_MULT;
+ if (option < 0)
+ return 0;
+ sbi->s_li_wait_mult = option;
+ break;
+ case Opt_noinit_inode_table:
+ clear_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
+ break;
default:
ext4_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" "
@@ -1939,7 +1986,8 @@ int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
}
/* Called at mount-time, super-block is locked */
-static int ext4_check_descriptors(struct super_block *sb)
+static int ext4_check_descriptors(struct super_block *sb,
+ ext4_group_t *first_not_zeroed)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
@@ -1948,7 +1996,7 @@ static int ext4_check_descriptors(struct super_block *sb)
ext4_fsblk_t inode_bitmap;
ext4_fsblk_t inode_table;
int flexbg_flag = 0;
- ext4_group_t i;
+ ext4_group_t i, grp = sbi->s_groups_count;
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
flexbg_flag = 1;
@@ -1964,6 +2012,10 @@ static int ext4_check_descriptors(struct super_block *sb)
last_block = first_block +
(EXT4_BLOCKS_PER_GROUP(sb) - 1);
+ if ((grp == sbi->s_groups_count) &&
+ !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
+ grp = i;
+
block_bitmap = ext4_block_bitmap(sb, gdp);
if (block_bitmap < first_block || block_bitmap > last_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
@@ -2001,6 +2053,8 @@ static int ext4_check_descriptors(struct super_block *sb)
if (!flexbg_flag)
first_block += EXT4_BLOCKS_PER_GROUP(sb);
}
+ if (NULL != first_not_zeroed)
+ *first_not_zeroed = grp;
ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
@@ -2373,6 +2427,7 @@ static struct ext4_attr ext4_attr_##_name = { \
#define EXT4_ATTR(name, mode, show, store) \
static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
+#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL)
#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
#define EXT4_RW_ATTR_SBI_UI(name, elname) \
@@ -2409,6 +2464,16 @@ static struct attribute *ext4_attrs[] = {
NULL,
};
+/* Features this copy of ext4 supports */
+EXT4_INFO_ATTR(lazy_itable_init);
+EXT4_INFO_ATTR(batched_discard);
+
+static struct attribute *ext4_feat_attrs[] = {
+ ATTR_LIST(lazy_itable_init),
+ ATTR_LIST(batched_discard),
+ NULL,
+};
+
static ssize_t ext4_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
@@ -2437,7 +2502,6 @@ static void ext4_sb_release(struct kobject *kobj)
complete(&sbi->s_kobj_unregister);
}
-
static const struct sysfs_ops ext4_attr_ops = {
.show = ext4_attr_show,
.store = ext4_attr_store,
@@ -2449,6 +2513,17 @@ static struct kobj_type ext4_ktype = {
.release = ext4_sb_release,
};
+static void ext4_feat_release(struct kobject *kobj)
+{
+ complete(&ext4_feat->f_kobj_unregister);
+}
+
+static struct kobj_type ext4_feat_ktype = {
+ .default_attrs = ext4_feat_attrs,
+ .sysfs_ops = &ext4_attr_ops,
+ .release = ext4_feat_release,
+};
+
/*
* Check whether this filesystem can be mounted based on
* the features present and the RDONLY/RDWR mount requested.
@@ -2539,6 +2614,371 @@ static void print_daily_error_info(unsigned long arg)
mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
}
+static void ext4_lazyinode_timeout(unsigned long data)
+{
+ struct task_struct *p = (struct task_struct *)data;
+ wake_up_process(p);
+}
+
+/* Find next suitable group and run ext4_init_inode_table */
+static int ext4_run_li_request(struct ext4_li_request *elr)
+{
+ struct ext4_group_desc *gdp = NULL;
+ ext4_group_t group, ngroups;
+ struct super_block *sb;
+ unsigned long timeout = 0;
+ int ret = 0;
+
+ sb = elr->lr_super;
+ ngroups = EXT4_SB(sb)->s_groups_count;
+
+ for (group = elr->lr_next_group; group < ngroups; group++) {
+ gdp = ext4_get_group_desc(sb, group, NULL);
+ if (!gdp) {
+ ret = 1;
+ break;
+ }
+
+ if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
+ break;
+ }
+
+ if (group == ngroups)
+ ret = 1;
+
+ if (!ret) {
+ timeout = jiffies;
+ ret = ext4_init_inode_table(sb, group,
+ elr->lr_timeout ? 0 : 1);
+ if (elr->lr_timeout == 0) {
+ timeout = jiffies - timeout;
+ if (elr->lr_sbi->s_li_wait_mult)
+ timeout *= elr->lr_sbi->s_li_wait_mult;
+ else
+ timeout *= 20;
+ elr->lr_timeout = timeout;
+ }
+ elr->lr_next_sched = jiffies + elr->lr_timeout;
+ elr->lr_next_group = group + 1;
+ }
+
+ return ret;
+}
+
+/*
+ * Remove lr_request from the list_request and free the
+ * request tructure. Should be called with li_list_mtx held
+ */
+static void ext4_remove_li_request(struct ext4_li_request *elr)
+{
+ struct ext4_sb_info *sbi;
+
+ if (!elr)
+ return;
+
+ sbi = elr->lr_sbi;
+
+ list_del(&elr->lr_request);
+ sbi->s_li_request = NULL;
+ kfree(elr);
+}
+
+static void ext4_unregister_li_request(struct super_block *sb)
+{
+ struct ext4_li_request *elr = EXT4_SB(sb)->s_li_request;
+
+ if (!ext4_li_info)
+ return;
+
+ mutex_lock(&ext4_li_info->li_list_mtx);
+ ext4_remove_li_request(elr);
+ mutex_unlock(&ext4_li_info->li_list_mtx);
+}
+
+/*
+ * This is the function where ext4lazyinit thread lives. It walks
+ * through the request list searching for next scheduled filesystem.
+ * When such a fs is found, run the lazy initialization request
+ * (ext4_rn_li_request) and keep track of the time spend in this
+ * function. Based on that time we compute next schedule time of
+ * the request. When walking through the list is complete, compute
+ * next waking time and put itself into sleep.
+ */
+static int ext4_lazyinit_thread(void *arg)
+{
+ struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
+ struct list_head *pos, *n;
+ struct ext4_li_request *elr;
+ unsigned long next_wakeup;
+ DEFINE_WAIT(wait);
+
+ BUG_ON(NULL == eli);
+
+ eli->li_timer.data = (unsigned long)current;
+ eli->li_timer.function = ext4_lazyinode_timeout;
+
+ eli->li_task = current;
+ wake_up(&eli->li_wait_task);
+
+cont_thread:
+ while (true) {
+ next_wakeup = MAX_JIFFY_OFFSET;
+
+ mutex_lock(&eli->li_list_mtx);
+ if (list_empty(&eli->li_request_list)) {
+ mutex_unlock(&eli->li_list_mtx);
+ goto exit_thread;
+ }
+
+ list_for_each_safe(pos, n, &eli->li_request_list) {
+ elr = list_entry(pos, struct ext4_li_request,
+ lr_request);
+
+ if (time_after_eq(jiffies, elr->lr_next_sched)) {
+ if (ext4_run_li_request(elr) != 0) {
+ /* error, remove the lazy_init job */
+ ext4_remove_li_request(elr);
+ continue;
+ }
+ }
+
+ if (time_before(elr->lr_next_sched, next_wakeup))
+ next_wakeup = elr->lr_next_sched;
+ }
+ mutex_unlock(&eli->li_list_mtx);
+
+ if (freezing(current))
+ refrigerator();
+
+ if ((time_after_eq(jiffies, next_wakeup)) ||
+ (MAX_JIFFY_OFFSET == next_wakeup)) {
+ cond_resched();
+ continue;
+ }
+
+ eli->li_timer.expires = next_wakeup;
+ add_timer(&eli->li_timer);
+ prepare_to_wait(&eli->li_wait_daemon, &wait,
+ TASK_INTERRUPTIBLE);
+ if (time_before(jiffies, next_wakeup))
+ schedule();
+ finish_wait(&eli->li_wait_daemon, &wait);
+ }
+
+exit_thread:
+ /*
+ * It looks like the request list is empty, but we need
+ * to check it under the li_list_mtx lock, to prevent any
+ * additions into it, and of course we should lock ext4_li_mtx
+ * to atomically free the list and ext4_li_info, because at
+ * this point another ext4 filesystem could be registering
+ * new one.
+ */
+ mutex_lock(&ext4_li_mtx);
+ mutex_lock(&eli->li_list_mtx);
+ if (!list_empty(&eli->li_request_list)) {
+ mutex_unlock(&eli->li_list_mtx);
+ mutex_unlock(&ext4_li_mtx);
+ goto cont_thread;
+ }
+ mutex_unlock(&eli->li_list_mtx);
+ del_timer_sync(&ext4_li_info->li_timer);
+ eli->li_task = NULL;
+ wake_up(&eli->li_wait_task);
+
+ kfree(ext4_li_info);
+ ext4_li_info = NULL;
+ mutex_unlock(&ext4_li_mtx);
+
+ return 0;
+}
+
+static void ext4_clear_request_list(void)
+{
+ struct list_head *pos, *n;
+ struct ext4_li_request *elr;
+
+ mutex_lock(&ext4_li_info->li_list_mtx);
+ if (list_empty(&ext4_li_info->li_request_list))
+ return;
+
+ list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
+ elr = list_entry(pos, struct ext4_li_request,
+ lr_request);
+ ext4_remove_li_request(elr);
+ }
+ mutex_unlock(&ext4_li_info->li_list_mtx);
+}
+
+static int ext4_run_lazyinit_thread(void)
+{
+ struct task_struct *t;
+
+ t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit");
+ if (IS_ERR(t)) {
+ int err = PTR_ERR(t);
+ ext4_clear_request_list();
+ del_timer_sync(&ext4_li_info->li_timer);
+ kfree(ext4_li_info);
+ ext4_li_info = NULL;
+ printk(KERN_CRIT "EXT4: error %d creating inode table "
+ "initialization thread\n",
+ err);
+ return err;
+ }
+ ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
+
+ wait_event(ext4_li_info->li_wait_task, ext4_li_info->li_task != NULL);
+ return 0;
+}
+
+/*
+ * Check whether it make sense to run itable init. thread or not.
+ * If there is at least one uninitialized inode table, return
+ * corresponding group number, else the loop goes through all
+ * groups and return total number of groups.
+ */
+static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
+{
+ ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
+ struct ext4_group_desc *gdp = NULL;
+
+ for (group = 0; group < ngroups; group++) {
+ gdp = ext4_get_group_desc(sb, group, NULL);
+ if (!gdp)
+ continue;
+
+ if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
+ break;
+ }
+
+ return group;
+}
+
+static int ext4_li_info_new(void)
+{
+ struct ext4_lazy_init *eli = NULL;
+
+ eli = kzalloc(sizeof(*eli), GFP_KERNEL);
+ if (!eli)
+ return -ENOMEM;
+
+ eli->li_task = NULL;
+ INIT_LIST_HEAD(&eli->li_request_list);
+ mutex_init(&eli->li_list_mtx);
+
+ init_waitqueue_head(&eli->li_wait_daemon);
+ init_waitqueue_head(&eli->li_wait_task);
+ init_timer(&eli->li_timer);
+ eli->li_state |= EXT4_LAZYINIT_QUIT;
+
+ ext4_li_info = eli;
+
+ return 0;
+}
+
+static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
+ ext4_group_t start)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_li_request *elr;
+ unsigned long rnd;
+
+ elr = kzalloc(sizeof(*elr), GFP_KERNEL);
+ if (!elr)
+ return NULL;
+
+ elr->lr_super = sb;
+ elr->lr_sbi = sbi;
+ elr->lr_next_group = start;
+
+ /*
+ * Randomize first schedule time of the request to
+ * spread the inode table initialization requests
+ * better.
+ */
+ get_random_bytes(&rnd, sizeof(rnd));
+ elr->lr_next_sched = jiffies + (unsigned long)rnd %
+ (EXT4_DEF_LI_MAX_START_DELAY * HZ);
+
+ return elr;
+}
+
+static int ext4_register_li_request(struct super_block *sb,
+ ext4_group_t first_not_zeroed)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_li_request *elr;
+ ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+ int ret;
+
+ if (sbi->s_li_request != NULL)
+ return 0;
+
+ if (first_not_zeroed == ngroups ||
+ (sb->s_flags & MS_RDONLY) ||
+ !test_opt(sb, INIT_INODE_TABLE)) {
+ sbi->s_li_request = NULL;
+ return 0;
+ }
+
+ if (first_not_zeroed == ngroups) {
+ sbi->s_li_request = NULL;
+ return 0;
+ }
+
+ elr = ext4_li_request_new(sb, first_not_zeroed);
+ if (!elr)
+ return -ENOMEM;
+
+ mutex_lock(&ext4_li_mtx);
+
+ if (NULL == ext4_li_info) {
+ ret = ext4_li_info_new();
+ if (ret)
+ goto out;
+ }
+
+ mutex_lock(&ext4_li_info->li_list_mtx);
+ list_add(&elr->lr_request, &ext4_li_info->li_request_list);
+ mutex_unlock(&ext4_li_info->li_list_mtx);
+
+ sbi->s_li_request = elr;
+
+ if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
+ ret = ext4_run_lazyinit_thread();
+ if (ret)
+ goto out;
+ }
+out:
+ mutex_unlock(&ext4_li_mtx);
+ if (ret)
+ kfree(elr);
+ return ret;
+}
+
+/*
+ * We do not need to lock anything since this is called on
+ * module unload.
+ */
+static void ext4_destroy_lazyinit_thread(void)
+{
+ /*
+ * If thread exited earlier
+ * there's nothing to be done.
+ */
+ if (!ext4_li_info)
+ return;
+
+ ext4_clear_request_list();
+
+ while (ext4_li_info->li_task) {
+ wake_up(&ext4_li_info->li_wait_daemon);
+ wait_event(ext4_li_info->li_wait_task,
+ ext4_li_info->li_task == NULL);
+ }
+}
+
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
__releases(kernel_lock)
__acquires(kernel_lock)
@@ -2564,6 +3004,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
__u64 blocks_count;
int err;
unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
+ ext4_group_t first_not_zeroed;
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
@@ -2624,6 +3065,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
/* Set defaults before we parse the mount options */
def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
+ set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
if (def_mount_opts & EXT4_DEFM_DEBUG)
set_opt(sbi->s_mount_opt, DEBUG);
if (def_mount_opts & EXT4_DEFM_BSDGROUPS) {
@@ -2901,7 +3343,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount2;
}
}
- if (!ext4_check_descriptors(sb)) {
+ if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
goto failed_mount2;
}
@@ -2917,6 +3359,24 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
get_random_bytes(&sbi->s_next_generation, sizeof(u32));
spin_lock_init(&sbi->s_next_gen_lock);
+ err = percpu_counter_init(&sbi->s_freeblocks_counter,
+ ext4_count_free_blocks(sb));
+ if (!err) {
+ err = percpu_counter_init(&sbi->s_freeinodes_counter,
+ ext4_count_free_inodes(sb));
+ }
+ if (!err) {
+ err = percpu_counter_init(&sbi->s_dirs_counter,
+ ext4_count_dirs(sb));
+ }
+ if (!err) {
+ err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
+ }
+ if (err) {
+ ext4_msg(sb, KERN_ERR, "insufficient memory");
+ goto failed_mount3;
+ }
+
sbi->s_stripe = ext4_get_stripe_size(sbi);
sbi->s_max_writeback_mb_bump = 128;
@@ -3015,22 +3475,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
-no_journal:
- err = percpu_counter_init(&sbi->s_freeblocks_counter,
- ext4_count_free_blocks(sb));
- if (!err)
- err = percpu_counter_init(&sbi->s_freeinodes_counter,
- ext4_count_free_inodes(sb));
- if (!err)
- err = percpu_counter_init(&sbi->s_dirs_counter,
- ext4_count_dirs(sb));
- if (!err)
- err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
- if (err) {
- ext4_msg(sb, KERN_ERR, "insufficient memory");
- goto failed_mount_wq;
- }
+ /*
+ * The journal may have updated the bg summary counts, so we
+ * need to update the global counters.
+ */
+ percpu_counter_set(&sbi->s_freeblocks_counter,
+ ext4_count_free_blocks(sb));
+ percpu_counter_set(&sbi->s_freeinodes_counter,
+ ext4_count_free_inodes(sb));
+ percpu_counter_set(&sbi->s_dirs_counter,
+ ext4_count_dirs(sb));
+ percpu_counter_set(&sbi->s_dirtyblocks_counter, 0);
+no_journal:
EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten");
if (!EXT4_SB(sb)->dio_unwritten_wq) {
printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
@@ -3122,6 +3579,10 @@ no_journal:
goto failed_mount4;
}
+ err = ext4_register_li_request(sb, first_not_zeroed);
+ if (err)
+ goto failed_mount4;
+
sbi->s_kobj.kset = ext4_kset;
init_completion(&sbi->s_kobj_unregister);
err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
@@ -3176,10 +3637,6 @@ failed_mount_wq:
jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
}
- percpu_counter_destroy(&sbi->s_freeblocks_counter);
- percpu_counter_destroy(&sbi->s_freeinodes_counter);
- percpu_counter_destroy(&sbi->s_dirs_counter);
- percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
failed_mount3:
if (sbi->s_flex_groups) {
if (is_vmalloc_addr(sbi->s_flex_groups))
@@ -3187,6 +3644,10 @@ failed_mount3:
else
kfree(sbi->s_flex_groups);
}
+ percpu_counter_destroy(&sbi->s_freeblocks_counter);
+ percpu_counter_destroy(&sbi->s_freeinodes_counter);
+ percpu_counter_destroy(&sbi->s_dirs_counter);
+ percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
failed_mount2:
for (i = 0; i < db_count; i++)
brelse(sbi->s_group_desc[i]);
@@ -3461,7 +3922,7 @@ static int ext4_load_journal(struct super_block *sb,
EXT4_SB(sb)->s_journal = journal;
ext4_clear_journal_err(sb, es);
- if (journal_devnum &&
+ if (!really_read_only && journal_devnum &&
journal_devnum != le32_to_cpu(es->s_journal_dev)) {
es->s_journal_dev = cpu_to_le32(journal_devnum);
@@ -3515,9 +3976,10 @@ static int ext4_commit_super(struct super_block *sb, int sync)
es->s_kbytes_written =
cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
- &EXT4_SB(sb)->s_freeblocks_counter));
- es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
- &EXT4_SB(sb)->s_freeinodes_counter));
+ &EXT4_SB(sb)->s_freeblocks_counter));
+ es->s_free_inodes_count =
+ cpu_to_le32(percpu_counter_sum_positive(
+ &EXT4_SB(sb)->s_freeinodes_counter));
sb->s_dirt = 0;
BUFFER_TRACE(sbh, "marking dirty");
mark_buffer_dirty(sbh);
@@ -3835,6 +4297,19 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
enable_quota = 1;
}
}
+
+ /*
+ * Reinitialize lazy itable initialization thread based on
+ * current settings
+ */
+ if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE))
+ ext4_unregister_li_request(sb);
+ else {
+ ext4_group_t first_not_zeroed;
+ first_not_zeroed = ext4_has_uninit_itable(sb);
+ ext4_register_li_request(sb, first_not_zeroed);
+ }
+
ext4_setup_system_zone(sb);
if (sbi->s_journal == NULL)
ext4_commit_super(sb, 1);
@@ -4105,12 +4580,10 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
static int ext4_quota_off(struct super_block *sb, int type)
{
- /* Force all delayed allocation blocks to be allocated */
- if (test_opt(sb, DELALLOC)) {
- down_read(&sb->s_umount);
+ /* Force all delayed allocation blocks to be allocated.
+ * Caller already holds s_umount sem */
+ if (test_opt(sb, DELALLOC))
sync_filesystem(sb);
- up_read(&sb->s_umount);
- }
return dquot_quota_off(sb, type);
}
@@ -4216,17 +4689,17 @@ out:
#endif
-static int ext4_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
}
#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
static struct file_system_type ext2_fs_type = {
.owner = THIS_MODULE,
.name = "ext2",
- .get_sb = ext4_get_sb,
+ .mount = ext4_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
@@ -4271,28 +4744,58 @@ static inline void unregister_as_ext3(void) { }
static struct file_system_type ext4_fs_type = {
.owner = THIS_MODULE,
.name = "ext4",
- .get_sb = ext4_get_sb,
+ .mount = ext4_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
-static int __init init_ext4_fs(void)
+int __init ext4_init_feat_adverts(void)
+{
+ struct ext4_features *ef;
+ int ret = -ENOMEM;
+
+ ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL);
+ if (!ef)
+ goto out;
+
+ ef->f_kobj.kset = ext4_kset;
+ init_completion(&ef->f_kobj_unregister);
+ ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL,
+ "features");
+ if (ret) {
+ kfree(ef);
+ goto out;
+ }
+
+ ext4_feat = ef;
+ ret = 0;
+out:
+ return ret;
+}
+
+static int __init ext4_init_fs(void)
{
int err;
ext4_check_flag_values();
- err = init_ext4_system_zone();
+ err = ext4_init_pageio();
if (err)
return err;
+ err = ext4_init_system_zone();
+ if (err)
+ goto out5;
ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
if (!ext4_kset)
goto out4;
ext4_proc_root = proc_mkdir("fs/ext4", NULL);
- err = init_ext4_mballoc();
+
+ err = ext4_init_feat_adverts();
+
+ err = ext4_init_mballoc();
if (err)
goto out3;
- err = init_ext4_xattr();
+ err = ext4_init_xattr();
if (err)
goto out2;
err = init_inodecache();
@@ -4303,38 +4806,46 @@ static int __init init_ext4_fs(void)
err = register_filesystem(&ext4_fs_type);
if (err)
goto out;
+
+ ext4_li_info = NULL;
+ mutex_init(&ext4_li_mtx);
return 0;
out:
unregister_as_ext2();
unregister_as_ext3();
destroy_inodecache();
out1:
- exit_ext4_xattr();
+ ext4_exit_xattr();
out2:
- exit_ext4_mballoc();
+ ext4_exit_mballoc();
out3:
+ kfree(ext4_feat);
remove_proc_entry("fs/ext4", NULL);
kset_unregister(ext4_kset);
out4:
- exit_ext4_system_zone();
+ ext4_exit_system_zone();
+out5:
+ ext4_exit_pageio();
return err;
}
-static void __exit exit_ext4_fs(void)
+static void __exit ext4_exit_fs(void)
{
+ ext4_destroy_lazyinit_thread();
unregister_as_ext2();
unregister_as_ext3();
unregister_filesystem(&ext4_fs_type);
destroy_inodecache();
- exit_ext4_xattr();
- exit_ext4_mballoc();
+ ext4_exit_xattr();
+ ext4_exit_mballoc();
remove_proc_entry("fs/ext4", NULL);
kset_unregister(ext4_kset);
- exit_ext4_system_zone();
+ ext4_exit_system_zone();
+ ext4_exit_pageio();
}
MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
MODULE_DESCRIPTION("Fourth Extended Filesystem");
MODULE_LICENSE("GPL");
-module_init(init_ext4_fs)
-module_exit(exit_ext4_fs)
+module_init(ext4_init_fs)
+module_exit(ext4_exit_fs)
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 3a8cd8dff1ad..fa4b899da4b3 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1588,7 +1588,7 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header,
#undef BLOCK_HASH_SHIFT
int __init
-init_ext4_xattr(void)
+ext4_init_xattr(void)
{
ext4_xattr_cache = mb_cache_create("ext4_xattr", 6);
if (!ext4_xattr_cache)
@@ -1597,7 +1597,7 @@ init_ext4_xattr(void)
}
void
-exit_ext4_xattr(void)
+ext4_exit_xattr(void)
{
if (ext4_xattr_cache)
mb_cache_destroy(ext4_xattr_cache);
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 518e96e43905..1ef16520b950 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -83,8 +83,8 @@ extern void ext4_xattr_put_super(struct super_block *);
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle);
-extern int init_ext4_xattr(void);
-extern void exit_ext4_xattr(void);
+extern int __init ext4_init_xattr(void);
+extern void ext4_exit_xattr(void);
extern const struct xattr_handler *ext4_xattr_handlers[];
@@ -121,14 +121,14 @@ ext4_xattr_put_super(struct super_block *sb)
{
}
-static inline int
-init_ext4_xattr(void)
+static __init inline int
+ext4_init_xattr(void)
{
return 0;
}
static inline void
-exit_ext4_xattr(void)
+ext4_exit_xattr(void)
{
}
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index bbca5c186ae7..3345aabd1dd7 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -675,18 +675,17 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent)
return 0;
}
-static int msdos_get_sb(struct file_system_type *fs_type,
+static struct dentry *msdos_mount(struct file_system_type *fs_type,
int flags, const char *dev_name,
- void *data, struct vfsmount *mnt)
+ void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, msdos_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, msdos_fill_super);
}
static struct file_system_type msdos_fs_type = {
.owner = THIS_MODULE,
.name = "msdos",
- .get_sb = msdos_get_sb,
+ .mount = msdos_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 6f0f6c9a0152..b936703b8924 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -1071,18 +1071,17 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent)
return 0;
}
-static int vfat_get_sb(struct file_system_type *fs_type,
+static struct dentry *vfat_mount(struct file_system_type *fs_type,
int flags, const char *dev_name,
- void *data, struct vfsmount *mnt)
+ void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, vfat_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, vfat_fill_super);
}
static struct file_system_type vfat_fs_type = {
.owner = THIS_MODULE,
.name = "vfat",
- .get_sb = vfat_get_sb,
+ .mount = vfat_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/fcntl.c b/fs/fcntl.c
index f8cc34f542c3..ecc8b3954ed6 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -640,7 +640,7 @@ static void fasync_free_rcu(struct rcu_head *head)
* match the state "is the filp on a fasync list".
*
*/
-static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
+int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
{
struct fasync_struct *fa, **fp;
int result = 0;
@@ -666,21 +666,31 @@ static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
return result;
}
+struct fasync_struct *fasync_alloc(void)
+{
+ return kmem_cache_alloc(fasync_cache, GFP_KERNEL);
+}
+
/*
- * Add a fasync entry. Return negative on error, positive if
- * added, and zero if did nothing but change an existing one.
+ * NOTE! This can be used only for unused fasync entries:
+ * entries that actually got inserted on the fasync list
+ * need to be released by rcu - see fasync_remove_entry.
+ */
+void fasync_free(struct fasync_struct *new)
+{
+ kmem_cache_free(fasync_cache, new);
+}
+
+/*
+ * Insert a new entry into the fasync list. Return the pointer to the
+ * old one if we didn't use the new one.
*
* NOTE! It is very important that the FASYNC flag always
* match the state "is the filp on a fasync list".
*/
-static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
+struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new)
{
- struct fasync_struct *new, *fa, **fp;
- int result = 0;
-
- new = kmem_cache_alloc(fasync_cache, GFP_KERNEL);
- if (!new)
- return -ENOMEM;
+ struct fasync_struct *fa, **fp;
spin_lock(&filp->f_lock);
spin_lock(&fasync_lock);
@@ -691,8 +701,6 @@ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fa
spin_lock_irq(&fa->fa_lock);
fa->fa_fd = fd;
spin_unlock_irq(&fa->fa_lock);
-
- kmem_cache_free(fasync_cache, new);
goto out;
}
@@ -702,13 +710,39 @@ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fa
new->fa_fd = fd;
new->fa_next = *fapp;
rcu_assign_pointer(*fapp, new);
- result = 1;
filp->f_flags |= FASYNC;
out:
spin_unlock(&fasync_lock);
spin_unlock(&filp->f_lock);
- return result;
+ return fa;
+}
+
+/*
+ * Add a fasync entry. Return negative on error, positive if
+ * added, and zero if did nothing but change an existing one.
+ */
+static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
+{
+ struct fasync_struct *new;
+
+ new = fasync_alloc();
+ if (!new)
+ return -ENOMEM;
+
+ /*
+ * fasync_insert_entry() returns the old (update) entry if
+ * it existed.
+ *
+ * So free the (unused) new entry and return 0 to let the
+ * caller know that we didn't add any new fasync entries.
+ */
+ if (fasync_insert_entry(fd, filp, fapp, new)) {
+ fasync_free(new);
+ return 0;
+ }
+
+ return 1;
}
/*
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 71b0148b8784..9d1c99558389 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -246,17 +246,16 @@ out:
/*
* The usual module blurb.
*/
-static int vxfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *vxfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, vxfs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, vxfs_fill_super);
}
static struct file_system_type vxfs_fs_type = {
.owner = THIS_MODULE,
.name = "vxfs",
- .get_sb = vxfs_get_sb,
+ .mount = vxfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index aed881a76b22..3d06ccc953aa 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -707,6 +707,17 @@ get_next_work_item(struct backing_dev_info *bdi)
return work;
}
+/*
+ * Add in the number of potentially dirty inodes, because each inode
+ * write can dirty pagecache in the underlying blockdev.
+ */
+static unsigned long get_nr_dirty_pages(void)
+{
+ return global_page_state(NR_FILE_DIRTY) +
+ global_page_state(NR_UNSTABLE_NFS) +
+ get_nr_dirty_inodes();
+}
+
static long wb_check_old_data_flush(struct bdi_writeback *wb)
{
unsigned long expired;
@@ -724,13 +735,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
return 0;
wb->last_old_flush = jiffies;
- /*
- * Add in the number of potentially dirty inodes, because each inode
- * write can dirty pagecache in the underlying blockdev.
- */
- nr_pages = global_page_state(NR_FILE_DIRTY) +
- global_page_state(NR_UNSTABLE_NFS) +
- get_nr_dirty_inodes();
+ nr_pages = get_nr_dirty_pages();
if (nr_pages) {
struct wb_writeback_work work = {
@@ -1076,32 +1081,42 @@ static void wait_sb_inodes(struct super_block *sb)
}
/**
- * writeback_inodes_sb - writeback dirty inodes from given super_block
+ * writeback_inodes_sb_nr - writeback dirty inodes from given super_block
* @sb: the superblock
+ * @nr: the number of pages to write
*
* Start writeback on some inodes on this super_block. No guarantees are made
* on how many (if any) will be written, and this function does not wait
- * for IO completion of submitted IO. The number of pages submitted is
- * returned.
+ * for IO completion of submitted IO.
*/
-void writeback_inodes_sb(struct super_block *sb)
+void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
{
- unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
- unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
DECLARE_COMPLETION_ONSTACK(done);
struct wb_writeback_work work = {
.sb = sb,
.sync_mode = WB_SYNC_NONE,
.done = &done,
+ .nr_pages = nr,
};
WARN_ON(!rwsem_is_locked(&sb->s_umount));
-
- work.nr_pages = nr_dirty + nr_unstable + get_nr_dirty_inodes();
-
bdi_queue_work(sb->s_bdi, &work);
wait_for_completion(&done);
}
+EXPORT_SYMBOL(writeback_inodes_sb_nr);
+
+/**
+ * writeback_inodes_sb - writeback dirty inodes from given super_block
+ * @sb: the superblock
+ *
+ * Start writeback on some inodes on this super_block. No guarantees are made
+ * on how many (if any) will be written, and this function does not wait
+ * for IO completion of submitted IO.
+ */
+void writeback_inodes_sb(struct super_block *sb)
+{
+ return writeback_inodes_sb_nr(sb, get_nr_dirty_pages());
+}
EXPORT_SYMBOL(writeback_inodes_sb);
/**
@@ -1124,6 +1139,27 @@ int writeback_inodes_sb_if_idle(struct super_block *sb)
EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
/**
+ * writeback_inodes_sb_if_idle - start writeback if none underway
+ * @sb: the superblock
+ * @nr: the number of pages to write
+ *
+ * Invoke writeback_inodes_sb if no writeback is currently underway.
+ * Returns 1 if writeback was started, 0 if not.
+ */
+int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
+ unsigned long nr)
+{
+ if (!writeback_in_progress(sb->s_bdi)) {
+ down_read(&sb->s_umount);
+ writeback_inodes_sb_nr(sb, nr);
+ up_read(&sb->s_umount);
+ return 1;
+ } else
+ return 0;
+}
+EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle);
+
+/**
* sync_inodes_sb - sync sb inode pages
* @sb: the superblock
*
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 4eba07661e5c..85542a7daf40 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -322,12 +322,10 @@ static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent)
return 0;
}
-static int fuse_ctl_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *raw_data,
- struct vfsmount *mnt)
+static struct dentry *fuse_ctl_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data)
{
- return get_sb_single(fs_type, flags, raw_data,
- fuse_ctl_fill_super, mnt);
+ return mount_single(fs_type, flags, raw_data, fuse_ctl_fill_super);
}
static void fuse_ctl_kill_sb(struct super_block *sb)
@@ -346,7 +344,7 @@ static void fuse_ctl_kill_sb(struct super_block *sb)
static struct file_system_type fuse_ctl_fs_type = {
.owner = THIS_MODULE,
.name = "fusectl",
- .get_sb = fuse_ctl_get_sb,
+ .mount = fuse_ctl_mount,
.kill_sb = fuse_ctl_kill_sb,
};
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index b98664275f02..6e07696308dc 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1334,12 +1334,7 @@ out_finish:
static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
{
- int i;
-
- for (i = 0; i < req->num_pages; i++) {
- struct page *page = req->pages[i];
- page_cache_release(page);
- }
+ release_pages(req->pages, req->num_pages, 0);
}
static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index da9e6e11374c..cfce3ad86a92 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1041,11 +1041,11 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
return err;
}
-static int fuse_get_sb(struct file_system_type *fs_type,
+static struct dentry *fuse_mount(struct file_system_type *fs_type,
int flags, const char *dev_name,
- void *raw_data, struct vfsmount *mnt)
+ void *raw_data)
{
- return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt);
+ return mount_nodev(fs_type, flags, raw_data, fuse_fill_super);
}
static void fuse_kill_sb_anon(struct super_block *sb)
@@ -1065,17 +1065,16 @@ static struct file_system_type fuse_fs_type = {
.owner = THIS_MODULE,
.name = "fuse",
.fs_flags = FS_HAS_SUBTYPE,
- .get_sb = fuse_get_sb,
+ .mount = fuse_mount,
.kill_sb = fuse_kill_sb_anon,
};
#ifdef CONFIG_BLOCK
-static int fuse_get_sb_blk(struct file_system_type *fs_type,
+static struct dentry *fuse_mount_blk(struct file_system_type *fs_type,
int flags, const char *dev_name,
- void *raw_data, struct vfsmount *mnt)
+ void *raw_data)
{
- return get_sb_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super);
}
static void fuse_kill_sb_blk(struct super_block *sb)
@@ -1094,7 +1093,7 @@ static void fuse_kill_sb_blk(struct super_block *sb)
static struct file_system_type fuseblk_fs_type = {
.owner = THIS_MODULE,
.name = "fuseblk",
- .get_sb = fuse_get_sb_blk,
+ .mount = fuse_mount_blk,
.kill_sb = fuse_kill_sb_blk,
.fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
};
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index cade1acbcea9..3eb1393f7b81 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1250,12 +1250,11 @@ static int test_gfs2_super(struct super_block *s, void *ptr)
}
/**
- * gfs2_get_sb - Get the GFS2 superblock
+ * gfs2_mount - Get the GFS2 superblock
* @fs_type: The GFS2 filesystem type
* @flags: Mount flags
* @dev_name: The name of the device
* @data: The mount arguments
- * @mnt: The vfsmnt for this mount
*
* Q. Why not use get_sb_bdev() ?
* A. We need to select one of two root directories to mount, independent
@@ -1264,8 +1263,8 @@ static int test_gfs2_super(struct super_block *s, void *ptr)
* Returns: 0 or -ve on error
*/
-static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
{
struct block_device *bdev;
struct super_block *s;
@@ -1279,7 +1278,7 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
bdev = open_bdev_exclusive(dev_name, mode, fs_type);
if (IS_ERR(bdev))
- return PTR_ERR(bdev);
+ return ERR_CAST(bdev);
/*
* once the super is inserted into the list by sget, s_umount
@@ -1298,6 +1297,9 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
if (IS_ERR(s))
goto error_bdev;
+ if (s->s_root)
+ close_bdev_exclusive(bdev, mode);
+
memset(&args, 0, sizeof(args));
args.ar_quota = GFS2_QUOTA_DEFAULT;
args.ar_data = GFS2_DATA_DEFAULT;
@@ -1309,17 +1311,13 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
error = gfs2_mount_args(&args, data);
if (error) {
printk(KERN_WARNING "GFS2: can't parse mount arguments\n");
- if (s->s_root)
- goto error_super;
- deactivate_locked_super(s);
- return error;
+ goto error_super;
}
if (s->s_root) {
error = -EBUSY;
if ((flags ^ s->s_flags) & MS_RDONLY)
goto error_super;
- close_bdev_exclusive(bdev, mode);
} else {
char b[BDEVNAME_SIZE];
@@ -1328,27 +1326,24 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
sb_set_blocksize(s, block_size(bdev));
error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0);
- if (error) {
- deactivate_locked_super(s);
- return error;
- }
+ if (error)
+ goto error_super;
s->s_flags |= MS_ACTIVE;
bdev->bd_super = s;
}
sdp = s->s_fs_info;
- mnt->mnt_sb = s;
if (args.ar_meta)
- mnt->mnt_root = dget(sdp->sd_master_dir);
+ return dget(sdp->sd_master_dir);
else
- mnt->mnt_root = dget(sdp->sd_root_dir);
- return 0;
+ return dget(sdp->sd_root_dir);
error_super:
deactivate_locked_super(s);
+ return ERR_PTR(error);
error_bdev:
close_bdev_exclusive(bdev, mode);
- return error;
+ return ERR_PTR(error);
}
static int set_meta_super(struct super_block *s, void *ptr)
@@ -1356,8 +1351,8 @@ static int set_meta_super(struct super_block *s, void *ptr)
return -EINVAL;
}
-static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
struct super_block *s;
struct gfs2_sbd *sdp;
@@ -1368,23 +1363,21 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
if (error) {
printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n",
dev_name, error);
- return error;
+ return ERR_PTR(error);
}
s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super,
path.dentry->d_inode->i_sb->s_bdev);
path_put(&path);
if (IS_ERR(s)) {
printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n");
- return PTR_ERR(s);
+ return ERR_CAST(s);
}
if ((flags ^ s->s_flags) & MS_RDONLY) {
deactivate_locked_super(s);
- return -EBUSY;
+ return ERR_PTR(-EBUSY);
}
sdp = s->s_fs_info;
- mnt->mnt_sb = s;
- mnt->mnt_root = dget(sdp->sd_master_dir);
- return 0;
+ return dget(sdp->sd_master_dir);
}
static void gfs2_kill_sb(struct super_block *sb)
@@ -1410,7 +1403,7 @@ static void gfs2_kill_sb(struct super_block *sb)
struct file_system_type gfs2_fs_type = {
.name = "gfs2",
.fs_flags = FS_REQUIRES_DEV,
- .get_sb = gfs2_get_sb,
+ .mount = gfs2_mount,
.kill_sb = gfs2_kill_sb,
.owner = THIS_MODULE,
};
@@ -1418,7 +1411,7 @@ struct file_system_type gfs2_fs_type = {
struct file_system_type gfs2meta_fs_type = {
.name = "gfs2meta",
.fs_flags = FS_REQUIRES_DEV,
- .get_sb = gfs2_get_sb_meta,
+ .mount = gfs2_mount_meta,
.owner = THIS_MODULE,
};
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 6ee1586f2334..4824c27cebb8 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -441,17 +441,16 @@ bail:
return res;
}
-static int hfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data,
- struct vfsmount *mnt)
+static struct dentry *hfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, hfs_fill_super, mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, hfs_fill_super);
}
static struct file_system_type hfs_fs_type = {
.owner = THIS_MODULE,
.name = "hfs",
- .get_sb = hfs_get_sb,
+ .mount = hfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index e318bbc0daf6..9d59c0571f59 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -317,8 +317,10 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry)
res = hfsplus_rename_cat(inode->i_ino,
dir, &dentry->d_name,
sbi->hidden_dir, &str);
- if (!res)
+ if (!res) {
inode->i_flags |= S_DEAD;
+ drop_nlink(inode);
+ }
goto out;
}
res = hfsplus_delete_cat(cnid, dir, &dentry->d_name);
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index 5b4667e08ef7..40a85a3ded6e 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -92,7 +92,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
mark_inode_dirty(inode);
out_unlock_inode:
- mutex_lock(&inode->i_mutex);
+ mutex_unlock(&inode->i_mutex);
out_drop_write:
mnt_drop_write(file->f_path.mnt);
out:
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 9a88d7536103..52cc746d3ba3 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -495,18 +495,16 @@ static void hfsplus_destroy_inode(struct inode *inode)
#define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info)
-static int hfsplus_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data,
- struct vfsmount *mnt)
+static struct dentry *hfsplus_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super);
}
static struct file_system_type hfsplus_fs_type = {
.owner = THIS_MODULE,
.name = "hfsplus",
- .get_sb = hfsplus_get_sb,
+ .mount = hfsplus_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index cd7c93917cc7..2c0f148a49e6 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -962,11 +962,11 @@ out:
return err;
}
-static int hostfs_read_sb(struct file_system_type *type,
+static struct dentry *hostfs_read_sb(struct file_system_type *type,
int flags, const char *dev_name,
- void *data, struct vfsmount *mnt)
+ void *data)
{
- return get_sb_nodev(type, flags, data, hostfs_fill_sb_common, mnt);
+ return mount_nodev(type, flags, data, hostfs_fill_sb_common);
}
static void hostfs_kill_sb(struct super_block *s)
@@ -978,7 +978,7 @@ static void hostfs_kill_sb(struct super_block *s)
static struct file_system_type hostfs_type = {
.owner = THIS_MODULE,
.name = "hostfs",
- .get_sb = hostfs_read_sb,
+ .mount = hostfs_read_sb,
.kill_sb = hostfs_kill_sb,
.fs_flags = 0,
};
diff --git a/fs/hpfs/buffer.c b/fs/hpfs/buffer.c
index eac5f96323e3..793cb9d943d2 100644
--- a/fs/hpfs/buffer.c
+++ b/fs/hpfs/buffer.c
@@ -14,7 +14,7 @@ void hpfs_lock_creation(struct super_block *s)
#ifdef DEBUG_LOCKS
printk("lock creation\n");
#endif
- down(&hpfs_sb(s)->hpfs_creation_de);
+ mutex_lock(&hpfs_sb(s)->hpfs_creation_de);
}
void hpfs_unlock_creation(struct super_block *s)
@@ -22,7 +22,7 @@ void hpfs_unlock_creation(struct super_block *s)
#ifdef DEBUG_LOCKS
printk("unlock creation\n");
#endif
- up(&hpfs_sb(s)->hpfs_creation_de);
+ mutex_unlock(&hpfs_sb(s)->hpfs_creation_de);
}
/* Map a sector into a buffer and return pointers to it and to the buffer. */
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index b59eac0232a0..2fee17d0d9ab 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -87,7 +87,7 @@ struct hpfs_sb_info {
unsigned *sb_bmp_dir; /* main bitmap directory */
unsigned sb_c_bitmap; /* current bitmap */
unsigned sb_max_fwd_alloc; /* max forwad allocation */
- struct semaphore hpfs_creation_de; /* when creating dirents, nobody else
+ struct mutex hpfs_creation_de; /* when creating dirents, nobody else
can alloc blocks */
/*unsigned sb_mounting : 1;*/
int sb_timeshift;
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index c969a1aa163a..6c5f01597c3a 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -491,7 +491,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
sbi->sb_bmp_dir = NULL;
sbi->sb_cp_table = NULL;
- init_MUTEX(&sbi->hpfs_creation_de);
+ mutex_init(&sbi->hpfs_creation_de);
uid = current_uid();
gid = current_gid();
@@ -686,17 +686,16 @@ bail0:
return -EINVAL;
}
-static int hpfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *hpfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, hpfs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, hpfs_fill_super);
}
static struct file_system_type hpfs_fs_type = {
.owner = THIS_MODULE,
.name = "hpfs",
- .get_sb = hpfs_get_sb,
+ .mount = hpfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 4e2a45ea6140..f702b5f713fc 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -748,17 +748,17 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
return(err);
}
-static int hppfs_read_super(struct file_system_type *type,
+static struct dentry *hppfs_read_super(struct file_system_type *type,
int flags, const char *dev_name,
- void *data, struct vfsmount *mnt)
+ void *data)
{
- return get_sb_nodev(type, flags, data, hppfs_fill_super, mnt);
+ return mount_nodev(type, flags, data, hppfs_fill_super);
}
static struct file_system_type hppfs_type = {
.owner = THIS_MODULE,
.name = "hppfs",
- .get_sb = hppfs_read_super,
+ .mount = hppfs_read_super,
.kill_sb = kill_anon_super,
.fs_flags = 0,
};
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index b14be3f781c7..a5fe68189eed 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -896,15 +896,15 @@ void hugetlb_put_quota(struct address_space *mapping, long delta)
}
}
-static int hugetlbfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super, mnt);
+ return mount_nodev(fs_type, flags, data, hugetlbfs_fill_super);
}
static struct file_system_type hugetlbfs_fs_type = {
.name = "hugetlbfs",
- .get_sb = hugetlbfs_get_sb,
+ .mount = hugetlbfs_mount,
.kill_sb = kill_litter_super,
};
@@ -932,8 +932,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
*user = current_user();
if (user_shm_lock(size, *user)) {
- WARN_ONCE(1,
- "Using mlock ulimits for SHM_HUGETLB deprecated\n");
+ printk_once(KERN_WARNING "Using mlock ulimits for SHM_HUGETLB is deprecated\n");
} else {
*user = NULL;
return ERR_PTR(-EPERM);
diff --git a/fs/internal.h b/fs/internal.h
index ebad3b90752d..e43b9a4dbf4e 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -106,5 +106,5 @@ extern void release_open_intent(struct nameidata *);
* inode.c
*/
extern int get_nr_dirty_inodes(void);
-extern int evict_inodes(struct super_block *);
+extern void evict_inodes(struct super_block *);
extern int invalidate_inodes(struct super_block *);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index f855ea4fc888..e92fdbb3bc3a 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -530,6 +530,41 @@ static int ioctl_fsthaw(struct file *filp)
return thaw_super(sb);
}
+static int ioctl_fstrim(struct file *filp, void __user *argp)
+{
+ struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+ struct fstrim_range range;
+ int ret = 0;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ /* If filesystem doesn't support trim feature, return. */
+ if (sb->s_op->trim_fs == NULL)
+ return -EOPNOTSUPP;
+
+ /* If a blockdevice-backed filesystem isn't specified, return EINVAL. */
+ if (sb->s_bdev == NULL)
+ return -EINVAL;
+
+ if (argp == NULL) {
+ range.start = 0;
+ range.len = ULLONG_MAX;
+ range.minlen = 0;
+ } else if (copy_from_user(&range, argp, sizeof(range)))
+ return -EFAULT;
+
+ ret = sb->s_op->trim_fs(sb, &range);
+ if (ret < 0)
+ return ret;
+
+ if ((argp != NULL) &&
+ (copy_to_user(argp, &range, sizeof(range))))
+ return -EFAULT;
+
+ return 0;
+}
+
/*
* When you add any new common ioctls to the switches above and below
* please update compat_sys_ioctl() too.
@@ -580,6 +615,10 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
error = ioctl_fsthaw(filp);
break;
+ case FITRIM:
+ error = ioctl_fstrim(filp, argp);
+ break;
+
case FS_IOC_FIEMAP:
return ioctl_fiemap(filp, arg);
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 748cfb92dcc6..2f7d05c89922 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -111,12 +111,14 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
read_lock(&tasklist_lock);
switch (which) {
case IOPRIO_WHO_PROCESS:
+ rcu_read_lock();
if (!who)
p = current;
else
p = find_task_by_vpid(who);
if (p)
ret = set_task_ioprio(p, ioprio);
+ rcu_read_unlock();
break;
case IOPRIO_WHO_PGRP:
if (!who)
@@ -139,7 +141,12 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
break;
do_each_thread(g, p) {
- if (__task_cred(p)->uid != who)
+ int match;
+
+ rcu_read_lock();
+ match = __task_cred(p)->uid == who;
+ rcu_read_unlock();
+ if (!match)
continue;
ret = set_task_ioprio(p, ioprio);
if (ret)
@@ -200,12 +207,14 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
read_lock(&tasklist_lock);
switch (which) {
case IOPRIO_WHO_PROCESS:
+ rcu_read_lock();
if (!who)
p = current;
else
p = find_task_by_vpid(who);
if (p)
ret = get_task_ioprio(p);
+ rcu_read_unlock();
break;
case IOPRIO_WHO_PGRP:
if (!who)
@@ -232,7 +241,12 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
break;
do_each_thread(g, p) {
- if (__task_cred(p)->uid != user->uid)
+ int match;
+
+ rcu_read_lock();
+ match = __task_cred(p)->uid == user->uid;
+ rcu_read_unlock();
+ if (!match)
continue;
tmpio = get_task_ioprio(p);
if (tmpio < 0)
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 60c2b944d762..bfdeb82a53be 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -544,6 +544,34 @@ static unsigned int isofs_get_last_session(struct super_block *sb, s32 session)
}
/*
+ * Check if root directory is empty (has less than 3 files).
+ *
+ * Used to detect broken CDs where ISO root directory is empty but Joliet root
+ * directory is OK. If such CD has Rock Ridge extensions, they will be disabled
+ * (and Joliet used instead) or else no files would be visible.
+ */
+static bool rootdir_empty(struct super_block *sb, unsigned long block)
+{
+ int offset = 0, files = 0, de_len;
+ struct iso_directory_record *de;
+ struct buffer_head *bh;
+
+ bh = sb_bread(sb, block);
+ if (!bh)
+ return true;
+ while (files < 3) {
+ de = (struct iso_directory_record *) (bh->b_data + offset);
+ de_len = *(unsigned char *) de;
+ if (de_len == 0)
+ break;
+ files++;
+ offset += de_len;
+ }
+ brelse(bh);
+ return files < 3;
+}
+
+/*
* Initialize the superblock and read the root inode.
*
* Note: a check_disk_change() has been done immediately prior
@@ -843,6 +871,18 @@ root_found:
goto out_no_root;
/*
+ * Fix for broken CDs with Rock Ridge and empty ISO root directory but
+ * correct Joliet root directory.
+ */
+ if (sbi->s_rock == 1 && joliet_level &&
+ rootdir_empty(s, sbi->s_firstdatazone)) {
+ printk(KERN_NOTICE
+ "ISOFS: primary root directory is empty. "
+ "Disabling Rock Ridge and switching to Joliet.");
+ sbi->s_rock = 0;
+ }
+
+ /*
* If this disk has both Rock Ridge and Joliet on it, then we
* want to use Rock Ridge by default. This can be overridden
* by using the norock mount option. There is still one other
@@ -1467,17 +1507,16 @@ struct inode *isofs_iget(struct super_block *sb,
return inode;
}
-static int isofs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *isofs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, isofs_fill_super);
}
static struct file_system_type iso9660_fs_type = {
.owner = THIS_MODULE,
.name = "iso9660",
- .get_sb = isofs_get_sb,
+ .mount = isofs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 05a38b9c4c0e..e4b87bc1fa56 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -221,7 +221,7 @@ restart:
goto restart;
}
if (buffer_locked(bh)) {
- atomic_inc(&bh->b_count);
+ get_bh(bh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
wait_on_buffer(bh);
@@ -283,7 +283,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
int ret = 0;
if (buffer_locked(bh)) {
- atomic_inc(&bh->b_count);
+ get_bh(bh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
wait_on_buffer(bh);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 85a6883c0aca..34a4861c14b8 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -587,13 +587,13 @@ void journal_commit_transaction(journal_t *journal)
/* Bump b_count to prevent truncate from stumbling over
the shadowed buffer! @@@ This can go if we ever get
rid of the BJ_IO/BJ_Shadow pairing of buffers. */
- atomic_inc(&jh2bh(jh)->b_count);
+ get_bh(jh2bh(jh));
/* Make a temporary IO buffer with which to write it out
(this will requeue both the metadata buffer and the
temporary IO buffer). new_bh goes on BJ_IO*/
- set_bit(BH_JWrite, &jh2bh(jh)->b_state);
+ set_buffer_jwrite(jh2bh(jh));
/*
* akpm: journal_write_metadata_buffer() sets
* new_bh->b_transaction to commit_transaction.
@@ -603,7 +603,7 @@ void journal_commit_transaction(journal_t *journal)
JBUFFER_TRACE(jh, "ph3: write metadata");
flags = journal_write_metadata_buffer(commit_transaction,
jh, &new_jh, blocknr);
- set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
+ set_buffer_jwrite(jh2bh(new_jh));
wbuf[bufs++] = jh2bh(new_jh);
/* Record the new block's tag in the current descriptor
@@ -713,7 +713,7 @@ wait_for_iobuf:
shadowed buffer */
jh = commit_transaction->t_shadow_list->b_tprev;
bh = jh2bh(jh);
- clear_bit(BH_JWrite, &bh->b_state);
+ clear_buffer_jwrite(bh);
J_ASSERT_BH(bh, buffer_jbddirty(bh));
/* The metadata is now released for reuse, but we need
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 2c4b1f109da9..da1b5e4ffce1 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -36,6 +36,7 @@
#include <linux/poison.h>
#include <linux/proc_fs.h>
#include <linux/debugfs.h>
+#include <linux/ratelimit.h>
#include <asm/uaccess.h>
#include <asm/page.h>
@@ -84,6 +85,7 @@ EXPORT_SYMBOL(journal_force_commit);
static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
static void __journal_abort_soft (journal_t *journal, int errno);
+static const char *journal_dev_name(journal_t *journal, char *buffer);
/*
* Helper function used to manage commit timeouts
@@ -439,7 +441,7 @@ int __log_start_commit(journal_t *journal, tid_t target)
*/
if (!tid_geq(journal->j_commit_request, target)) {
/*
- * We want a new commit: OK, mark the request and wakup the
+ * We want a new commit: OK, mark the request and wakeup the
* commit thread. We do _not_ do the commit ourselves.
*/
@@ -950,6 +952,8 @@ int journal_create(journal_t *journal)
if (err)
return err;
bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
+ if (unlikely(!bh))
+ return -ENOMEM;
lock_buffer(bh);
memset (bh->b_data, 0, journal->j_blocksize);
BUFFER_TRACE(bh, "marking dirty");
@@ -1010,6 +1014,23 @@ void journal_update_superblock(journal_t *journal, int wait)
goto out;
}
+ if (buffer_write_io_error(bh)) {
+ char b[BDEVNAME_SIZE];
+ /*
+ * Oh, dear. A previous attempt to write the journal
+ * superblock failed. This could happen because the
+ * USB device was yanked out. Or it could happen to
+ * be a transient write error and maybe the block will
+ * be remapped. Nothing we can do but to retry the
+ * write and hope for the best.
+ */
+ printk(KERN_ERR "JBD: previous I/O error detected "
+ "for journal superblock update for %s.\n",
+ journal_dev_name(journal, b));
+ clear_buffer_write_io_error(bh);
+ set_buffer_uptodate(bh);
+ }
+
spin_lock(&journal->j_state_lock);
jbd_debug(1,"JBD: updating superblock (start %u, seq %d, errno %d)\n",
journal->j_tail, journal->j_tail_sequence, journal->j_errno);
@@ -1021,9 +1042,17 @@ void journal_update_superblock(journal_t *journal, int wait)
BUFFER_TRACE(bh, "marking dirty");
mark_buffer_dirty(bh);
- if (wait)
+ if (wait) {
sync_dirty_buffer(bh);
- else
+ if (buffer_write_io_error(bh)) {
+ char b[BDEVNAME_SIZE];
+ printk(KERN_ERR "JBD: I/O error detected "
+ "when updating journal superblock for %s.\n",
+ journal_dev_name(journal, b));
+ clear_buffer_write_io_error(bh);
+ set_buffer_uptodate(bh);
+ }
+ } else
write_dirty_buffer(bh, WRITE);
out:
@@ -1719,7 +1748,6 @@ static void journal_destroy_journal_head_cache(void)
static struct journal_head *journal_alloc_journal_head(void)
{
struct journal_head *ret;
- static unsigned long last_warning;
#ifdef CONFIG_JBD_DEBUG
atomic_inc(&nr_journal_heads);
@@ -1727,11 +1755,9 @@ static struct journal_head *journal_alloc_journal_head(void)
ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
if (ret == NULL) {
jbd_debug(1, "out of memory for journal_head\n");
- if (time_after(jiffies, last_warning + 5*HZ)) {
- printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
- __func__);
- last_warning = jiffies;
- }
+ printk_ratelimited(KERN_NOTICE "ENOMEM in %s, retrying.\n",
+ __func__);
+
while (ret == NULL) {
yield();
ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index 81051dafebf5..5b43e96788e6 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -296,10 +296,10 @@ int journal_skip_recovery(journal_t *journal)
#ifdef CONFIG_JBD_DEBUG
int dropped = info.end_transaction -
be32_to_cpu(journal->j_superblock->s_sequence);
-#endif
jbd_debug(1,
"JBD: ignoring %d transaction%s from the journal.\n",
dropped, (dropped == 1) ? "" : "s");
+#endif
journal->j_transaction_sequence = ++info.end_transaction;
}
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 5ae71e75a491..846a3f314111 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -293,9 +293,7 @@ handle_t *journal_start(journal_t *journal, int nblocks)
jbd_free_handle(handle);
current->journal_info = NULL;
handle = ERR_PTR(err);
- goto out;
}
-out:
return handle;
}
@@ -528,7 +526,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
transaction = handle->h_transaction;
journal = transaction->t_journal;
- jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy);
+ jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy);
JBUFFER_TRACE(jh, "entry");
repeat:
@@ -713,7 +711,7 @@ done:
J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)),
"Possible IO failure.\n");
page = jh2bh(jh)->b_page;
- offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
+ offset = offset_in_page(jh2bh(jh)->b_data);
source = kmap_atomic(page, KM_USER0);
memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
kunmap_atomic(source, KM_USER0);
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 6571a056e55d..6a79fd0a1a32 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -299,6 +299,16 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
transaction->t_chp_stats.cs_forced_to_close++;
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
+ if (unlikely(journal->j_flags & JBD2_UNMOUNT))
+ /*
+ * The journal thread is dead; so starting and
+ * waiting for a commit to finish will cause
+ * us to wait for a _very_ long time.
+ */
+ printk(KERN_ERR "JBD2: %s: "
+ "Waiting for Godot: block %llu\n",
+ journal->j_devname,
+ (unsigned long long) bh->b_blocknr);
jbd2_log_start_commit(journal, tid);
jbd2_log_wait_commit(journal, tid);
ret = 1;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index bc6be8bda1cc..f3ad1598b201 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -26,7 +26,9 @@
#include <linux/backing-dev.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
+#include <linux/bitops.h>
#include <trace/events/jbd2.h>
+#include <asm/system.h>
/*
* Default IO end handler for temporary BJ_IO buffer_heads.
@@ -201,7 +203,7 @@ static int journal_submit_data_buffers(journal_t *journal,
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
mapping = jinode->i_vfs_inode->i_mapping;
- jinode->i_flags |= JI_COMMIT_RUNNING;
+ set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
spin_unlock(&journal->j_list_lock);
/*
* submit the inode data buffers. We use writepage
@@ -216,7 +218,8 @@ static int journal_submit_data_buffers(journal_t *journal,
spin_lock(&journal->j_list_lock);
J_ASSERT(jinode->i_transaction == commit_transaction);
commit_transaction->t_flushed_data_blocks = 1;
- jinode->i_flags &= ~JI_COMMIT_RUNNING;
+ clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
+ smp_mb__after_clear_bit();
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
}
spin_unlock(&journal->j_list_lock);
@@ -237,7 +240,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
/* For locking, see the comment in journal_submit_data_buffers() */
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
- jinode->i_flags |= JI_COMMIT_RUNNING;
+ set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
spin_unlock(&journal->j_list_lock);
err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
if (err) {
@@ -253,7 +256,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
ret = err;
}
spin_lock(&journal->j_list_lock);
- jinode->i_flags &= ~JI_COMMIT_RUNNING;
+ clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
+ smp_mb__after_clear_bit();
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 262419f83d80..c590d155c095 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -42,12 +42,14 @@
#include <linux/log2.h>
#include <linux/vmalloc.h>
#include <linux/backing-dev.h>
+#include <linux/bitops.h>
#define CREATE_TRACE_POINTS
#include <trace/events/jbd2.h>
#include <asm/uaccess.h>
#include <asm/page.h>
+#include <asm/system.h>
EXPORT_SYMBOL(jbd2_journal_extend);
EXPORT_SYMBOL(jbd2_journal_stop);
@@ -478,7 +480,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
*/
if (!tid_geq(journal->j_commit_request, target)) {
/*
- * We want a new commit: OK, mark the request and wakup the
+ * We want a new commit: OK, mark the request and wakeup the
* commit thread. We do _not_ do the commit ourselves.
*/
@@ -1836,7 +1838,6 @@ size_t journal_tag_bytes(journal_t *journal)
*/
#define JBD2_MAX_SLABS 8
static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS];
-static DECLARE_MUTEX(jbd2_slab_create_sem);
static const char *jbd2_slab_names[JBD2_MAX_SLABS] = {
"jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k",
@@ -1857,6 +1858,7 @@ static void jbd2_journal_destroy_slabs(void)
static int jbd2_journal_create_slab(size_t size)
{
+ static DEFINE_MUTEX(jbd2_slab_create_mutex);
int i = order_base_2(size) - 10;
size_t slab_size;
@@ -1868,16 +1870,16 @@ static int jbd2_journal_create_slab(size_t size)
if (unlikely(i < 0))
i = 0;
- down(&jbd2_slab_create_sem);
+ mutex_lock(&jbd2_slab_create_mutex);
if (jbd2_slab[i]) {
- up(&jbd2_slab_create_sem);
+ mutex_unlock(&jbd2_slab_create_mutex);
return 0; /* Already created */
}
slab_size = 1 << (i+10);
jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size,
slab_size, 0, NULL);
- up(&jbd2_slab_create_sem);
+ mutex_unlock(&jbd2_slab_create_mutex);
if (!jbd2_slab[i]) {
printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n");
return -ENOMEM;
@@ -2210,7 +2212,7 @@ void jbd2_journal_release_jbd_inode(journal_t *journal,
restart:
spin_lock(&journal->j_list_lock);
/* Is commit writing out inode - we have to wait */
- if (jinode->i_flags & JI_COMMIT_RUNNING) {
+ if (test_bit(__JI_COMMIT_RUNNING, &jinode->i_flags)) {
wait_queue_head_t *wq;
DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index f3479d6e0a83..6bf0a242613e 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -156,6 +156,7 @@ alloc_transaction:
*/
repeat:
read_lock(&journal->j_state_lock);
+ BUG_ON(journal->j_flags & JBD2_UNMOUNT);
if (is_journal_aborted(journal) ||
(journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
read_unlock(&journal->j_state_lock);
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index a906f538d11c..85c6be2db02f 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -23,7 +23,7 @@ static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *,
static inline struct jffs2_inode_cache *
first_inode_chain(int *i, struct jffs2_sb_info *c)
{
- for (; *i < INOCACHE_HASHSIZE; (*i)++) {
+ for (; *i < c->inocache_hashsize; (*i)++) {
if (c->inocache_list[*i])
return c->inocache_list[*i];
}
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index 617a1e5694c1..de4247021d25 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -103,7 +103,7 @@ uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
spin_unlock(&jffs2_compressor_list_lock);
*datalen = orig_slen;
*cdatalen = orig_dlen;
- compr_ret = this->compress(data_in, output_buf, datalen, cdatalen, NULL);
+ compr_ret = this->compress(data_in, output_buf, datalen, cdatalen);
spin_lock(&jffs2_compressor_list_lock);
this->usecount--;
if (!compr_ret) {
@@ -152,7 +152,7 @@ uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
spin_unlock(&jffs2_compressor_list_lock);
*datalen = orig_slen;
*cdatalen = orig_dlen;
- compr_ret = this->compress(data_in, this->compr_buf, datalen, cdatalen, NULL);
+ compr_ret = this->compress(data_in, this->compr_buf, datalen, cdatalen);
spin_lock(&jffs2_compressor_list_lock);
this->usecount--;
if (!compr_ret) {
@@ -220,7 +220,7 @@ int jffs2_decompress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
if (comprtype == this->compr) {
this->usecount++;
spin_unlock(&jffs2_compressor_list_lock);
- ret = this->decompress(cdata_in, data_out, cdatalen, datalen, NULL);
+ ret = this->decompress(cdata_in, data_out, cdatalen, datalen);
spin_lock(&jffs2_compressor_list_lock);
if (ret) {
printk(KERN_WARNING "Decompressor \"%s\" returned %d\n", this->name, ret);
diff --git a/fs/jffs2/compr.h b/fs/jffs2/compr.h
index e471a9106fd9..13bb7597ab39 100644
--- a/fs/jffs2/compr.h
+++ b/fs/jffs2/compr.h
@@ -49,9 +49,9 @@ struct jffs2_compressor {
char *name;
char compr; /* JFFS2_COMPR_XXX */
int (*compress)(unsigned char *data_in, unsigned char *cpage_out,
- uint32_t *srclen, uint32_t *destlen, void *model);
+ uint32_t *srclen, uint32_t *destlen);
int (*decompress)(unsigned char *cdata_in, unsigned char *data_out,
- uint32_t cdatalen, uint32_t datalen, void *model);
+ uint32_t cdatalen, uint32_t datalen);
int usecount;
int disabled; /* if set the compressor won't compress */
unsigned char *compr_buf; /* used by size compr. mode */
diff --git a/fs/jffs2/compr_lzo.c b/fs/jffs2/compr_lzo.c
index ed25ae7c98eb..af186ee674d8 100644
--- a/fs/jffs2/compr_lzo.c
+++ b/fs/jffs2/compr_lzo.c
@@ -42,7 +42,7 @@ static int __init alloc_workspace(void)
}
static int jffs2_lzo_compress(unsigned char *data_in, unsigned char *cpage_out,
- uint32_t *sourcelen, uint32_t *dstlen, void *model)
+ uint32_t *sourcelen, uint32_t *dstlen)
{
size_t compress_size;
int ret;
@@ -67,7 +67,7 @@ static int jffs2_lzo_compress(unsigned char *data_in, unsigned char *cpage_out,
}
static int jffs2_lzo_decompress(unsigned char *data_in, unsigned char *cpage_out,
- uint32_t srclen, uint32_t destlen, void *model)
+ uint32_t srclen, uint32_t destlen)
{
size_t dl = destlen;
int ret;
diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c
index 9696ad9ef5f7..16a5047903a6 100644
--- a/fs/jffs2/compr_rtime.c
+++ b/fs/jffs2/compr_rtime.c
@@ -31,8 +31,7 @@
/* _compress returns the compressed size, -1 if bigger */
static int jffs2_rtime_compress(unsigned char *data_in,
unsigned char *cpage_out,
- uint32_t *sourcelen, uint32_t *dstlen,
- void *model)
+ uint32_t *sourcelen, uint32_t *dstlen)
{
short positions[256];
int outpos = 0;
@@ -73,8 +72,7 @@ static int jffs2_rtime_compress(unsigned char *data_in,
static int jffs2_rtime_decompress(unsigned char *data_in,
unsigned char *cpage_out,
- uint32_t srclen, uint32_t destlen,
- void *model)
+ uint32_t srclen, uint32_t destlen)
{
short positions[256];
int outpos = 0;
diff --git a/fs/jffs2/compr_rubin.c b/fs/jffs2/compr_rubin.c
index a12b4f763373..9e7cec808c4c 100644
--- a/fs/jffs2/compr_rubin.c
+++ b/fs/jffs2/compr_rubin.c
@@ -298,7 +298,7 @@ static int rubin_do_compress(int bit_divider, int *bits, unsigned char *data_in,
#if 0
/* _compress returns the compressed size, -1 if bigger */
int jffs2_rubinmips_compress(unsigned char *data_in, unsigned char *cpage_out,
- uint32_t *sourcelen, uint32_t *dstlen, void *model)
+ uint32_t *sourcelen, uint32_t *dstlen)
{
return rubin_do_compress(BIT_DIVIDER_MIPS, bits_mips, data_in,
cpage_out, sourcelen, dstlen);
@@ -306,8 +306,7 @@ int jffs2_rubinmips_compress(unsigned char *data_in, unsigned char *cpage_out,
#endif
static int jffs2_dynrubin_compress(unsigned char *data_in,
unsigned char *cpage_out,
- uint32_t *sourcelen, uint32_t *dstlen,
- void *model)
+ uint32_t *sourcelen, uint32_t *dstlen)
{
int bits[8];
unsigned char histo[256];
@@ -387,8 +386,7 @@ static void rubin_do_decompress(int bit_divider, int *bits,
static int jffs2_rubinmips_decompress(unsigned char *data_in,
unsigned char *cpage_out,
- uint32_t sourcelen, uint32_t dstlen,
- void *model)
+ uint32_t sourcelen, uint32_t dstlen)
{
rubin_do_decompress(BIT_DIVIDER_MIPS, bits_mips, data_in,
cpage_out, sourcelen, dstlen);
@@ -397,8 +395,7 @@ static int jffs2_rubinmips_decompress(unsigned char *data_in,
static int jffs2_dynrubin_decompress(unsigned char *data_in,
unsigned char *cpage_out,
- uint32_t sourcelen, uint32_t dstlen,
- void *model)
+ uint32_t sourcelen, uint32_t dstlen)
{
int bits[8];
int c;
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index 97fc45de6f81..fd05a0b9431d 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -68,8 +68,7 @@ static void free_workspaces(void)
static int jffs2_zlib_compress(unsigned char *data_in,
unsigned char *cpage_out,
- uint32_t *sourcelen, uint32_t *dstlen,
- void *model)
+ uint32_t *sourcelen, uint32_t *dstlen)
{
int ret;
@@ -136,8 +135,7 @@ static int jffs2_zlib_compress(unsigned char *data_in,
static int jffs2_zlib_decompress(unsigned char *data_in,
unsigned char *cpage_out,
- uint32_t srclen, uint32_t destlen,
- void *model)
+ uint32_t srclen, uint32_t destlen)
{
int ret;
int wbits = MAX_WBITS;
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 79121aa5858b..92978658ed18 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -367,7 +367,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
}
/* We use f->target field to store the target path. */
- f->target = kmalloc(targetlen + 1, GFP_KERNEL);
+ f->target = kmemdup(target, targetlen + 1, GFP_KERNEL);
if (!f->target) {
printk(KERN_WARNING "Can't allocate %d bytes of memory\n", targetlen + 1);
mutex_unlock(&f->sem);
@@ -376,7 +376,6 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
goto fail;
}
- memcpy(f->target, target, targetlen + 1);
D1(printk(KERN_DEBUG "jffs2_symlink: symlink's target '%s' cached\n", (char *)f->target));
/* No data here. Only a metadata node, which will be
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index abac961f617b..e513f1913c15 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -151,7 +151,7 @@ int jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count)
}
/* Be nice */
- yield();
+ cond_resched();
mutex_lock(&c->erase_free_sem);
spin_lock(&c->erase_completion_lock);
}
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index d9beb06e6fca..e896e67767eb 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -474,6 +474,25 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
return inode;
}
+static int calculate_inocache_hashsize(uint32_t flash_size)
+{
+ /*
+ * Pick a inocache hash size based on the size of the medium.
+ * Count how many megabytes we're dealing with, apply a hashsize twice
+ * that size, but rounding down to the usual big powers of 2. And keep
+ * to sensible bounds.
+ */
+
+ int size_mb = flash_size / 1024 / 1024;
+ int hashsize = (size_mb * 2) & ~0x3f;
+
+ if (hashsize < INOCACHE_HASHSIZE_MIN)
+ return INOCACHE_HASHSIZE_MIN;
+ if (hashsize > INOCACHE_HASHSIZE_MAX)
+ return INOCACHE_HASHSIZE_MAX;
+
+ return hashsize;
+}
int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
{
@@ -520,7 +539,8 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
if (ret)
return ret;
- c->inocache_list = kcalloc(INOCACHE_HASHSIZE, sizeof(struct jffs2_inode_cache *), GFP_KERNEL);
+ c->inocache_hashsize = calculate_inocache_hashsize(c->flash_size);
+ c->inocache_list = kcalloc(c->inocache_hashsize, sizeof(struct jffs2_inode_cache *), GFP_KERNEL);
if (!c->inocache_list) {
ret = -ENOMEM;
goto out_wbuf;
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 846a79452497..31dce611337c 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -219,13 +219,14 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
if (!list_empty(&c->erase_complete_list) ||
!list_empty(&c->erase_pending_list)) {
spin_unlock(&c->erase_completion_lock);
+ mutex_unlock(&c->alloc_sem);
D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() erasing pending blocks\n"));
- if (jffs2_erase_pending_blocks(c, 1)) {
- mutex_unlock(&c->alloc_sem);
+ if (jffs2_erase_pending_blocks(c, 1))
return 0;
- }
+
D1(printk(KERN_DEBUG "No progress from erasing blocks; doing GC anyway\n"));
spin_lock(&c->erase_completion_lock);
+ mutex_lock(&c->alloc_sem);
}
/* First, work out which block we're garbage-collecting */
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index 6784bc89add1..f864005de64c 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -100,6 +100,7 @@ struct jffs2_sb_info {
wait_queue_head_t erase_wait; /* For waiting for erases to complete */
wait_queue_head_t inocache_wq;
+ int inocache_hashsize;
struct jffs2_inode_cache **inocache_list;
spinlock_t inocache_lock;
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index af02bd138469..5e03233c2363 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -420,7 +420,7 @@ struct jffs2_inode_cache *jffs2_get_ino_cache(struct jffs2_sb_info *c, uint32_t
{
struct jffs2_inode_cache *ret;
- ret = c->inocache_list[ino % INOCACHE_HASHSIZE];
+ ret = c->inocache_list[ino % c->inocache_hashsize];
while (ret && ret->ino < ino) {
ret = ret->next;
}
@@ -441,7 +441,7 @@ void jffs2_add_ino_cache (struct jffs2_sb_info *c, struct jffs2_inode_cache *new
dbg_inocache("add %p (ino #%u)\n", new, new->ino);
- prev = &c->inocache_list[new->ino % INOCACHE_HASHSIZE];
+ prev = &c->inocache_list[new->ino % c->inocache_hashsize];
while ((*prev) && (*prev)->ino < new->ino) {
prev = &(*prev)->next;
@@ -462,7 +462,7 @@ void jffs2_del_ino_cache(struct jffs2_sb_info *c, struct jffs2_inode_cache *old)
dbg_inocache("del %p (ino #%u)\n", old, old->ino);
spin_lock(&c->inocache_lock);
- prev = &c->inocache_list[old->ino % INOCACHE_HASHSIZE];
+ prev = &c->inocache_list[old->ino % c->inocache_hashsize];
while ((*prev) && (*prev)->ino < old->ino) {
prev = &(*prev)->next;
@@ -487,7 +487,7 @@ void jffs2_free_ino_caches(struct jffs2_sb_info *c)
int i;
struct jffs2_inode_cache *this, *next;
- for (i=0; i<INOCACHE_HASHSIZE; i++) {
+ for (i=0; i < c->inocache_hashsize; i++) {
this = c->inocache_list[i];
while (this) {
next = this->next;
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index 523a91691052..5a53d9bdb2b5 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -199,7 +199,8 @@ struct jffs2_inode_cache {
#define RAWNODE_CLASS_XATTR_DATUM 1
#define RAWNODE_CLASS_XATTR_REF 2
-#define INOCACHE_HASHSIZE 128
+#define INOCACHE_HASHSIZE_MIN 128
+#define INOCACHE_HASHSIZE_MAX 1024
#define write_ofs(c) ((c)->nextblock->offset + (c)->sector_size - (c)->nextblock->free_size)
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 46f870d1cc36..b632dddcb482 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -20,7 +20,7 @@
#include "summary.h"
#include "debug.h"
-#define DEFAULT_EMPTY_SCAN_SIZE 1024
+#define DEFAULT_EMPTY_SCAN_SIZE 256
#define noisy_printk(noise, args...) do { \
if (*(noise)) { \
@@ -435,7 +435,7 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
unsigned char *buf, uint32_t buf_size, struct jffs2_summary *s) {
struct jffs2_unknown_node *node;
struct jffs2_unknown_node crcnode;
- uint32_t ofs, prevofs;
+ uint32_t ofs, prevofs, max_ofs;
uint32_t hdr_crc, buf_ofs, buf_len;
int err;
int noise = 0;
@@ -550,12 +550,12 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
/* We temporarily use 'ofs' as a pointer into the buffer/jeb */
ofs = 0;
-
- /* Scan only 4KiB of 0xFF before declaring it's empty */
- while(ofs < EMPTY_SCAN_SIZE(c->sector_size) && *(uint32_t *)(&buf[ofs]) == 0xFFFFFFFF)
+ max_ofs = EMPTY_SCAN_SIZE(c->sector_size);
+ /* Scan only EMPTY_SCAN_SIZE of 0xFF before declaring it's empty */
+ while(ofs < max_ofs && *(uint32_t *)(&buf[ofs]) == 0xFFFFFFFF)
ofs += 4;
- if (ofs == EMPTY_SCAN_SIZE(c->sector_size)) {
+ if (ofs == max_ofs) {
#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
if (jffs2_cleanmarker_oob(c)) {
/* scan oob, take care of cleanmarker */
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index d1ae5dfc22b9..c86041b866a4 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -179,12 +179,11 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent)
return ret;
}
-static int jffs2_get_sb(struct file_system_type *fs_type,
+static struct dentry *jffs2_mount(struct file_system_type *fs_type,
int flags, const char *dev_name,
- void *data, struct vfsmount *mnt)
+ void *data)
{
- return get_sb_mtd(fs_type, flags, dev_name, data, jffs2_fill_super,
- mnt);
+ return mount_mtd(fs_type, flags, dev_name, data, jffs2_fill_super);
}
static void jffs2_put_super (struct super_block *sb)
@@ -229,7 +228,7 @@ static void jffs2_kill_sb(struct super_block *sb)
static struct file_system_type jffs2_fs_type = {
.owner = THIS_MODULE,
.name = "jffs2",
- .get_sb = jffs2_get_sb,
+ .mount = jffs2_mount,
.kill_sb = jffs2_kill_sb,
};
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 68eee2bf629e..0669fc1cc3bf 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -583,11 +583,10 @@ static int jfs_unfreeze(struct super_block *sb)
return 0;
}
-static int jfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *jfs_do_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, jfs_fill_super);
}
static int jfs_sync_fs(struct super_block *sb, int wait)
@@ -770,7 +769,7 @@ static const struct export_operations jfs_export_operations = {
static struct file_system_type jfs_fs_type = {
.owner = THIS_MODULE,
.name = "jfs",
- .get_sb = jfs_get_sb,
+ .mount = jfs_do_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/libfs.c b/fs/libfs.c
index 304a5132ca27..a3accdf528ad 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -201,9 +201,8 @@ static const struct super_operations simple_super_operations = {
* Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
* will never be mountable)
*/
-int get_sb_pseudo(struct file_system_type *fs_type, char *name,
- const struct super_operations *ops, unsigned long magic,
- struct vfsmount *mnt)
+struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name,
+ const struct super_operations *ops, unsigned long magic)
{
struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
struct dentry *dentry;
@@ -211,7 +210,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
struct qstr d_name = {.name = name, .len = strlen(name)};
if (IS_ERR(s))
- return PTR_ERR(s);
+ return ERR_CAST(s);
s->s_flags = MS_NOUSER;
s->s_maxbytes = MAX_LFS_FILESIZE;
@@ -241,12 +240,11 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
d_instantiate(dentry, root);
s->s_root = dentry;
s->s_flags |= MS_ACTIVE;
- simple_set_mnt(mnt, s);
- return 0;
+ return dget(s->s_root);
Enomem:
deactivate_locked_super(s);
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
}
int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
@@ -951,7 +949,7 @@ EXPORT_SYMBOL(dcache_dir_lseek);
EXPORT_SYMBOL(dcache_dir_open);
EXPORT_SYMBOL(dcache_readdir);
EXPORT_SYMBOL(generic_read_dir);
-EXPORT_SYMBOL(get_sb_pseudo);
+EXPORT_SYMBOL(mount_pseudo);
EXPORT_SYMBOL(simple_write_begin);
EXPORT_SYMBOL(simple_write_end);
EXPORT_SYMBOL(simple_dir_inode_operations);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index b13aabc12298..abfff9d7979d 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -22,7 +22,6 @@
#include <linux/in.h>
#include <linux/uio.h>
#include <linux/smp.h>
-#include <linux/smp_lock.h>
#include <linux/mutex.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
@@ -130,15 +129,6 @@ lockd(void *vrqstp)
dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
- /*
- * FIXME: it would be nice if lockd didn't spend its entire life
- * running under the BKL. At the very least, it would be good to
- * have someone clarify what it's intended to protect here. I've
- * seen some handwavy posts about posix locking needing to be
- * done under the BKL, but it's far from clear.
- */
- lock_kernel();
-
if (!nlm_timeout)
nlm_timeout = LOCKD_DFLT_TIMEO;
nlmsvc_timeout = nlm_timeout * HZ;
@@ -195,7 +185,6 @@ lockd(void *vrqstp)
if (nlmsvc_ops)
nlmsvc_invalidate_all();
nlm_shutdown_hosts();
- unlock_kernel();
return 0;
}
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 6f1ef000975a..c462d346acbd 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -700,14 +700,16 @@ nlmsvc_notify_blocked(struct file_lock *fl)
struct nlm_block *block;
dprintk("lockd: VFS unblock notification for block %p\n", fl);
+ spin_lock(&nlm_blocked_lock);
list_for_each_entry(block, &nlm_blocked, b_list) {
if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
- nlmsvc_insert_block(block, 0);
+ nlmsvc_insert_block_locked(block, 0);
+ spin_unlock(&nlm_blocked_lock);
svc_wake_up(block->b_daemon);
return;
}
}
-
+ spin_unlock(&nlm_blocked_lock);
printk(KERN_WARNING "lockd: notification for unknown block!\n");
}
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index d0ef94cfb3da..1ca0679c80bf 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -170,6 +170,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
again:
file->f_locks = 0;
+ lock_flocks(); /* protects i_flock list */
for (fl = inode->i_flock; fl; fl = fl->fl_next) {
if (fl->fl_lmops != &nlmsvc_lock_operations)
continue;
@@ -181,6 +182,7 @@ again:
if (match(lockhost, host)) {
struct file_lock lock = *fl;
+ unlock_flocks();
lock.fl_type = F_UNLCK;
lock.fl_start = 0;
lock.fl_end = OFFSET_MAX;
@@ -192,6 +194,7 @@ again:
goto again;
}
}
+ unlock_flocks();
return 0;
}
@@ -226,10 +229,14 @@ nlm_file_inuse(struct nlm_file *file)
if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares)
return 1;
+ lock_flocks();
for (fl = inode->i_flock; fl; fl = fl->fl_next) {
- if (fl->fl_lmops == &nlmsvc_lock_operations)
+ if (fl->fl_lmops == &nlmsvc_lock_operations) {
+ unlock_flocks();
return 1;
+ }
}
+ unlock_flocks();
file->f_locks = 0;
return 0;
}
diff --git a/fs/locks.c b/fs/locks.c
index 4de3a2666810..0e62dd35d088 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -142,6 +142,7 @@ int lease_break_time = 45;
static LIST_HEAD(file_lock_list);
static LIST_HEAD(blocked_list);
+static DEFINE_SPINLOCK(file_lock_lock);
/*
* Protects the two list heads above, plus the inode->i_flock list
@@ -149,23 +150,24 @@ static LIST_HEAD(blocked_list);
*/
void lock_flocks(void)
{
- lock_kernel();
+ spin_lock(&file_lock_lock);
}
EXPORT_SYMBOL_GPL(lock_flocks);
void unlock_flocks(void)
{
- unlock_kernel();
+ spin_unlock(&file_lock_lock);
}
EXPORT_SYMBOL_GPL(unlock_flocks);
static struct kmem_cache *filelock_cache __read_mostly;
/* Allocate an empty lock structure. */
-static struct file_lock *locks_alloc_lock(void)
+struct file_lock *locks_alloc_lock(void)
{
return kmem_cache_alloc(filelock_cache, GFP_KERNEL);
}
+EXPORT_SYMBOL_GPL(locks_alloc_lock);
void locks_release_private(struct file_lock *fl)
{
@@ -184,7 +186,7 @@ void locks_release_private(struct file_lock *fl)
EXPORT_SYMBOL_GPL(locks_release_private);
/* Free a lock which is not in use. */
-static void locks_free_lock(struct file_lock *fl)
+void locks_free_lock(struct file_lock *fl)
{
BUG_ON(waitqueue_active(&fl->fl_wait));
BUG_ON(!list_empty(&fl->fl_block));
@@ -193,6 +195,7 @@ static void locks_free_lock(struct file_lock *fl)
locks_release_private(fl);
kmem_cache_free(filelock_cache, fl);
}
+EXPORT_SYMBOL(locks_free_lock);
void locks_init_lock(struct file_lock *fl)
{
@@ -232,11 +235,8 @@ static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
fl->fl_ops->fl_copy_lock(new, fl);
new->fl_ops = fl->fl_ops;
}
- if (fl->fl_lmops) {
- if (fl->fl_lmops->fl_copy_lock)
- fl->fl_lmops->fl_copy_lock(new, fl);
+ if (fl->fl_lmops)
new->fl_lmops = fl->fl_lmops;
- }
}
/*
@@ -1365,31 +1365,27 @@ int fcntl_getlease(struct file *filp)
int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
{
struct file_lock *fl, **before, **my_before = NULL, *lease;
- struct file_lock *new_fl = NULL;
struct dentry *dentry = filp->f_path.dentry;
struct inode *inode = dentry->d_inode;
int error, rdlease_count = 0, wrlease_count = 0;
+ lease = *flp;
+
+ error = -EACCES;
if ((current_fsuid() != inode->i_uid) && !capable(CAP_LEASE))
- return -EACCES;
+ goto out;
+ error = -EINVAL;
if (!S_ISREG(inode->i_mode))
- return -EINVAL;
+ goto out;
error = security_file_lock(filp, arg);
if (error)
- return error;
+ goto out;
time_out_leases(inode);
BUG_ON(!(*flp)->fl_lmops->fl_break);
- lease = *flp;
-
if (arg != F_UNLCK) {
- error = -ENOMEM;
- new_fl = locks_alloc_lock();
- if (new_fl == NULL)
- goto out;
-
error = -EAGAIN;
if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
goto out;
@@ -1429,12 +1425,12 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
goto out;
if (my_before != NULL) {
- *flp = *my_before;
error = lease->fl_lmops->fl_change(my_before, arg);
+ if (!error)
+ *flp = *my_before;
goto out;
}
- error = 0;
if (arg == F_UNLCK)
goto out;
@@ -1442,15 +1438,10 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
if (!leases_enable)
goto out;
- locks_copy_lock(new_fl, lease);
- locks_insert_lock(before, new_fl);
-
- *flp = new_fl;
+ locks_insert_lock(before, lease);
return 0;
out:
- if (new_fl != NULL)
- locks_free_lock(new_fl);
return error;
}
EXPORT_SYMBOL(generic_setlease);
@@ -1502,6 +1493,59 @@ int vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
}
EXPORT_SYMBOL_GPL(vfs_setlease);
+static int do_fcntl_delete_lease(struct file *filp)
+{
+ struct file_lock fl, *flp = &fl;
+
+ lease_init(filp, F_UNLCK, flp);
+
+ return vfs_setlease(filp, F_UNLCK, &flp);
+}
+
+static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
+{
+ struct file_lock *fl, *ret;
+ struct fasync_struct *new;
+ int error;
+
+ fl = lease_alloc(filp, arg);
+ if (IS_ERR(fl))
+ return PTR_ERR(fl);
+
+ new = fasync_alloc();
+ if (!new) {
+ locks_free_lock(fl);
+ return -ENOMEM;
+ }
+ ret = fl;
+ lock_flocks();
+ error = __vfs_setlease(filp, arg, &ret);
+ if (error) {
+ unlock_flocks();
+ locks_free_lock(fl);
+ goto out_free_fasync;
+ }
+ if (ret != fl)
+ locks_free_lock(fl);
+
+ /*
+ * fasync_insert_entry() returns the old entry if any.
+ * If there was no old entry, then it used 'new' and
+ * inserted it into the fasync list. Clear new so that
+ * we don't release it here.
+ */
+ if (!fasync_insert_entry(fd, filp, &ret->fl_fasync, new))
+ new = NULL;
+
+ error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
+ unlock_flocks();
+
+out_free_fasync:
+ if (new)
+ fasync_free(new);
+ return error;
+}
+
/**
* fcntl_setlease - sets a lease on an open file
* @fd: open file descriptor
@@ -1514,34 +1558,9 @@ EXPORT_SYMBOL_GPL(vfs_setlease);
*/
int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
{
- struct file_lock fl, *flp = &fl;
- struct inode *inode = filp->f_path.dentry->d_inode;
- int error;
-
- locks_init_lock(&fl);
- error = lease_init(filp, arg, &fl);
- if (error)
- return error;
-
- lock_flocks();
-
- error = __vfs_setlease(filp, arg, &flp);
- if (error || arg == F_UNLCK)
- goto out_unlock;
-
- error = fasync_helper(fd, filp, 1, &flp->fl_fasync);
- if (error < 0) {
- /* remove lease just inserted by setlease */
- flp->fl_type = F_UNLCK | F_INPROGRESS;
- flp->fl_break_time = jiffies - 10;
- time_out_leases(inode);
- goto out_unlock;
- }
-
- error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
-out_unlock:
- unlock_flocks();
- return error;
+ if (arg == F_UNLCK)
+ return do_fcntl_delete_lease(filp);
+ return do_fcntl_add_lease(fd, filp, arg);
}
/**
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 9bd2ce2a3040..92ca6fbe09bd 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -298,9 +298,9 @@ static int bdev_write_sb(struct super_block *sb, struct page *page)
return sync_request(page, bdev, WRITE);
}
-static void bdev_put_device(struct super_block *sb)
+static void bdev_put_device(struct logfs_super *s)
{
- close_bdev_exclusive(logfs_super(sb)->s_bdev, FMODE_READ|FMODE_WRITE);
+ close_bdev_exclusive(s->s_bdev, FMODE_READ|FMODE_WRITE);
}
static int bdev_can_write_buf(struct super_block *sb, u64 ofs)
@@ -320,8 +320,8 @@ static const struct logfs_device_ops bd_devops = {
.put_device = bdev_put_device,
};
-int logfs_get_sb_bdev(struct file_system_type *type, int flags,
- const char *devname, struct vfsmount *mnt)
+int logfs_get_sb_bdev(struct logfs_super *p, struct file_system_type *type,
+ const char *devname)
{
struct block_device *bdev;
@@ -332,8 +332,11 @@ int logfs_get_sb_bdev(struct file_system_type *type, int flags,
if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) {
int mtdnr = MINOR(bdev->bd_dev);
close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE);
- return logfs_get_sb_mtd(type, flags, mtdnr, mnt);
+ return logfs_get_sb_mtd(p, mtdnr);
}
- return logfs_get_sb_device(type, flags, NULL, bdev, &bd_devops, mnt);
+ p->s_bdev = bdev;
+ p->s_mtd = NULL;
+ p->s_devops = &bd_devops;
+ return 0;
}
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index a85d47d13e4b..7466e9dcc8c5 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -230,9 +230,9 @@ static void mtd_writeseg(struct super_block *sb, u64 ofs, size_t len)
__mtd_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT);
}
-static void mtd_put_device(struct super_block *sb)
+static void mtd_put_device(struct logfs_super *s)
{
- put_mtd_device(logfs_super(sb)->s_mtd);
+ put_mtd_device(s->s_mtd);
}
static int mtd_can_write_buf(struct super_block *sb, u64 ofs)
@@ -265,14 +265,14 @@ static const struct logfs_device_ops mtd_devops = {
.put_device = mtd_put_device,
};
-int logfs_get_sb_mtd(struct file_system_type *type, int flags,
- int mtdnr, struct vfsmount *mnt)
+int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
{
- struct mtd_info *mtd;
- const struct logfs_device_ops *devops = &mtd_devops;
-
- mtd = get_mtd_device(NULL, mtdnr);
+ struct mtd_info *mtd = get_mtd_device(NULL, mtdnr);
if (IS_ERR(mtd))
return PTR_ERR(mtd);
- return logfs_get_sb_device(type, flags, mtd, NULL, devops, mnt);
+
+ s->s_bdev = NULL;
+ s->s_mtd = mtd;
+ s->s_devops = &mtd_devops;
+ return 0;
}
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index b8786264d243..57afd4a6fabb 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -136,6 +136,7 @@ struct logfs_area_ops {
int (*erase_segment)(struct logfs_area *area);
};
+struct logfs_super; /* forward */
/**
* struct logfs_device_ops - device access operations
*
@@ -156,7 +157,7 @@ struct logfs_device_ops {
int ensure_write);
int (*can_write_buf)(struct super_block *sb, u64 ofs);
void (*sync)(struct super_block *sb);
- void (*put_device)(struct super_block *sb);
+ void (*put_device)(struct logfs_super *s);
};
/**
@@ -471,11 +472,13 @@ void logfs_compr_exit(void);
/* dev_bdev.c */
#ifdef CONFIG_BLOCK
-int logfs_get_sb_bdev(struct file_system_type *type, int flags,
- const char *devname, struct vfsmount *mnt);
+int logfs_get_sb_bdev(struct logfs_super *s,
+ struct file_system_type *type,
+ const char *devname);
#else
-static inline int logfs_get_sb_bdev(struct file_system_type *type, int flags,
- const char *devname, struct vfsmount *mnt)
+static inline int logfs_get_sb_bdev(struct logfs_super *s,
+ struct file_system_type *type,
+ const char *devname)
{
return -ENODEV;
}
@@ -483,11 +486,9 @@ static inline int logfs_get_sb_bdev(struct file_system_type *type, int flags,
/* dev_mtd.c */
#ifdef CONFIG_MTD
-int logfs_get_sb_mtd(struct file_system_type *type, int flags,
- int mtdnr, struct vfsmount *mnt);
+int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr);
#else
-static inline int logfs_get_sb_mtd(struct file_system_type *type, int flags,
- int mtdnr, struct vfsmount *mnt)
+static inline int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
{
return -ENODEV;
}
@@ -619,9 +620,6 @@ void emergency_read_end(struct page *page);
void logfs_crash_dump(struct super_block *sb);
void *memchr_inv(const void *s, int c, size_t n);
int logfs_statfs(struct dentry *dentry, struct kstatfs *stats);
-int logfs_get_sb_device(struct file_system_type *type, int flags,
- struct mtd_info *mtd, struct block_device *bdev,
- const struct logfs_device_ops *devops, struct vfsmount *mnt);
int logfs_check_ds(struct logfs_disk_super *ds);
int logfs_write_sb(struct super_block *sb);
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 5336155c5d81..33435e4b14d2 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -325,7 +325,7 @@ static int logfs_make_writeable(struct super_block *sb)
return 0;
}
-static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
+static int logfs_get_sb_final(struct super_block *sb)
{
struct logfs_super *super = logfs_super(sb);
struct inode *rootdir;
@@ -356,7 +356,6 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
}
log_super("LogFS: Finished mounting\n");
- simple_set_mnt(mnt, sb);
return 0;
fail:
@@ -529,43 +528,37 @@ static void logfs_kill_sb(struct super_block *sb)
logfs_cleanup_rw(sb);
if (super->s_erase_page)
__free_page(super->s_erase_page);
- super->s_devops->put_device(sb);
+ super->s_devops->put_device(super);
logfs_mempool_destroy(super->s_btree_pool);
logfs_mempool_destroy(super->s_alias_pool);
kfree(super);
log_super("LogFS: Finished unmounting\n");
}
-int logfs_get_sb_device(struct file_system_type *type, int flags,
- struct mtd_info *mtd, struct block_device *bdev,
- const struct logfs_device_ops *devops, struct vfsmount *mnt)
+static struct dentry *logfs_get_sb_device(struct logfs_super *super,
+ struct file_system_type *type, int flags)
{
- struct logfs_super *super;
struct super_block *sb;
int err = -ENOMEM;
static int mount_count;
log_super("LogFS: Start mount %x\n", mount_count++);
- super = kzalloc(sizeof(*super), GFP_KERNEL);
- if (!super)
- goto err0;
- super->s_mtd = mtd;
- super->s_bdev = bdev;
err = -EINVAL;
sb = sget(type, logfs_sb_test, logfs_sb_set, super);
- if (IS_ERR(sb))
- goto err0;
+ if (IS_ERR(sb)) {
+ super->s_devops->put_device(super);
+ kfree(super);
+ return ERR_CAST(sb);
+ }
if (sb->s_root) {
/* Device is already in use */
- err = 0;
- simple_set_mnt(mnt, sb);
- goto err0;
+ super->s_devops->put_device(super);
+ kfree(super);
+ return dget(sb->s_root);
}
- super->s_devops = devops;
-
/*
* sb->s_maxbytes is limited to 8TB. On 32bit systems, the page cache
* only covers 16TB and the upper 8TB are used for indirect blocks.
@@ -581,10 +574,12 @@ int logfs_get_sb_device(struct file_system_type *type, int flags,
goto err1;
sb->s_flags |= MS_ACTIVE;
- err = logfs_get_sb_final(sb, mnt);
- if (err)
+ err = logfs_get_sb_final(sb);
+ if (err) {
deactivate_locked_super(sb);
- return err;
+ return ERR_PTR(err);
+ }
+ return dget(sb->s_root);
err1:
/* no ->s_root, no ->put_super() */
@@ -592,37 +587,45 @@ err1:
iput(super->s_segfile_inode);
iput(super->s_mapping_inode);
deactivate_locked_super(sb);
- return err;
-err0:
- kfree(super);
- //devops->put_device(sb);
- return err;
+ return ERR_PTR(err);
}
-static int logfs_get_sb(struct file_system_type *type, int flags,
- const char *devname, void *data, struct vfsmount *mnt)
+static struct dentry *logfs_mount(struct file_system_type *type, int flags,
+ const char *devname, void *data)
{
ulong mtdnr;
+ struct logfs_super *super;
+ int err;
- if (!devname)
- return logfs_get_sb_bdev(type, flags, devname, mnt);
- if (strncmp(devname, "mtd", 3))
- return logfs_get_sb_bdev(type, flags, devname, mnt);
+ super = kzalloc(sizeof(*super), GFP_KERNEL);
+ if (!super)
+ return ERR_PTR(-ENOMEM);
- {
+ if (!devname)
+ err = logfs_get_sb_bdev(super, type, devname);
+ else if (strncmp(devname, "mtd", 3))
+ err = logfs_get_sb_bdev(super, type, devname);
+ else {
char *garbage;
mtdnr = simple_strtoul(devname+3, &garbage, 0);
if (*garbage)
- return -EINVAL;
+ err = -EINVAL;
+ else
+ err = logfs_get_sb_mtd(super, mtdnr);
+ }
+
+ if (err) {
+ kfree(super);
+ return ERR_PTR(err);
}
- return logfs_get_sb_mtd(type, flags, mtdnr, mnt);
+ return logfs_get_sb_device(super, type, flags);
}
static struct file_system_type logfs_fs_type = {
.owner = THIS_MODULE,
.name = "logfs",
- .get_sb = logfs_get_sb,
+ .mount = logfs_mount,
.kill_sb = logfs_kill_sb,
.fs_flags = FS_REQUIRES_DEV,
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index e39d6bf2e8fb..fb2020858a34 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -614,17 +614,16 @@ void minix_truncate(struct inode * inode)
V2_minix_truncate(inode);
}
-static int minix_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *minix_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, minix_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, minix_fill_super);
}
static struct file_system_type minix_fs_type = {
.owner = THIS_MODULE,
.name = "minix",
- .get_sb = minix_get_sb,
+ .mount = minix_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/namei.c b/fs/namei.c
index f7dbc06857ab..5362af9b7372 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1574,6 +1574,7 @@ static struct file *finish_open(struct nameidata *nd,
*/
if (will_truncate)
mnt_drop_write(nd->path.mnt);
+ path_put(&nd->path);
return filp;
exit:
@@ -1675,6 +1676,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
}
filp = nameidata_to_filp(nd);
mnt_drop_write(nd->path.mnt);
+ path_put(&nd->path);
if (!IS_ERR(filp)) {
error = ima_file_check(filp, acc_mode);
if (error) {
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 985fabb26aca..d290545aa0c4 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -1020,16 +1020,16 @@ out:
return result;
}
-static int ncp_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ncp_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_nodev(fs_type, flags, data, ncp_fill_super, mnt);
+ return mount_nodev(fs_type, flags, data, ncp_fill_super);
}
static struct file_system_type ncp_fs_type = {
.owner = THIS_MODULE,
.name = "ncpfs",
- .get_sb = ncp_get_sb,
+ .mount = ncp_mount,
.kill_sb = kill_anon_super,
.fs_flags = FS_BINARY_MOUNTDATA,
};
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index fd667652c502..ba306658a6db 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -1,7 +1,6 @@
config NFS_FS
tristate "NFS client support"
depends on INET && FILE_LOCKING
- depends on BKL # fix as soon as lockd is done
select LOCKD
select SUNRPC
select NFS_ACL_SUPPORT if NFS_V3_ACL
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 064a80961677..84d3c8b90206 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -873,7 +873,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
dreq->inode = inode;
dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
dreq->l_ctx = nfs_get_lock_context(dreq->ctx);
- if (dreq->l_ctx != NULL)
+ if (dreq->l_ctx == NULL)
goto out_release;
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index e756075637b0..60677f9f1311 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -884,6 +884,5 @@ static int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
dprintk("NFS: setlease(%s/%s, arg=%ld)\n",
file->f_path.dentry->d_parent->d_name.name,
file->f_path.dentry->d_name.name, arg);
-
return -EINVAL;
}
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index dec47ed8b6b9..4e2d9b6b1380 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -123,7 +123,7 @@ static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen,
size_t desclen = typelen + namelen + 2;
*desc = kmalloc(desclen, GFP_KERNEL);
- if (!desc)
+ if (!*desc)
return -ENOMEM;
cp = *desc;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 32c8758c99fd..0f24cdf2cb13 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -429,7 +429,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
* returned NFS4ERR_DELAY as per Section 2.10.6.2
* of RFC5661.
*/
- dprintk("%s: slot=%ld seq=%d: Operation in progress\n",
+ dprintk("%s: slot=%td seq=%d: Operation in progress\n",
__func__,
res->sr_slot - res->sr_session->fc_slot_table.slots,
res->sr_slot->seq_nr);
@@ -573,7 +573,7 @@ int nfs4_setup_sequence(const struct nfs_server *server,
goto out;
}
- dprintk("--> %s clp %p session %p sr_slot %ld\n",
+ dprintk("--> %s clp %p session %p sr_slot %td\n",
__func__, session->clp, session, res->sr_slot ?
res->sr_slot - session->fc_slot_table.slots : -1);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 919490232e17..137b549e63db 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -65,6 +65,13 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
if (req == NULL)
return ERR_PTR(-ENOMEM);
+ /* get lock context early so we can deal with alloc failures */
+ req->wb_lock_context = nfs_get_lock_context(ctx);
+ if (req->wb_lock_context == NULL) {
+ nfs_page_free(req);
+ return ERR_PTR(-ENOMEM);
+ }
+
/* Initialize the request struct. Initially, we assume a
* long write-back delay. This will be adjusted in
* update_nfs_request below if the region is not locked. */
@@ -79,7 +86,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
req->wb_pgbase = offset;
req->wb_bytes = count;
req->wb_context = get_nfs_open_context(ctx);
- req->wb_lock_context = nfs_get_lock_context(ctx);
kref_init(&req->wb_kref);
return req;
}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 3600ec700d58..0a42e8f4adcb 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -260,8 +260,8 @@ static int nfs_statfs(struct dentry *, struct kstatfs *);
static int nfs_show_options(struct seq_file *, struct vfsmount *);
static int nfs_show_stats(struct seq_file *, struct vfsmount *);
static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *);
-static int nfs_xdev_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data);
static void nfs_put_super(struct super_block *);
static void nfs_kill_super(struct super_block *);
static int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
@@ -277,7 +277,7 @@ static struct file_system_type nfs_fs_type = {
struct file_system_type nfs_xdev_fs_type = {
.owner = THIS_MODULE,
.name = "nfs",
- .get_sb = nfs_xdev_get_sb,
+ .mount = nfs_xdev_mount,
.kill_sb = nfs_kill_super,
.fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
};
@@ -302,14 +302,14 @@ static int nfs4_try_mount(int flags, const char *dev_name,
struct nfs_parsed_mount_data *data, struct vfsmount *mnt);
static int nfs4_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
-static int nfs4_remote_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
-static int nfs4_xdev_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data);
+static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data);
static int nfs4_referral_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
-static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data);
static void nfs4_kill_super(struct super_block *sb);
static struct file_system_type nfs4_fs_type = {
@@ -323,7 +323,7 @@ static struct file_system_type nfs4_fs_type = {
static struct file_system_type nfs4_remote_fs_type = {
.owner = THIS_MODULE,
.name = "nfs4",
- .get_sb = nfs4_remote_get_sb,
+ .mount = nfs4_remote_mount,
.kill_sb = nfs4_kill_super,
.fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
};
@@ -331,7 +331,7 @@ static struct file_system_type nfs4_remote_fs_type = {
struct file_system_type nfs4_xdev_fs_type = {
.owner = THIS_MODULE,
.name = "nfs4",
- .get_sb = nfs4_xdev_get_sb,
+ .mount = nfs4_xdev_mount,
.kill_sb = nfs4_kill_super,
.fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
};
@@ -339,7 +339,7 @@ struct file_system_type nfs4_xdev_fs_type = {
static struct file_system_type nfs4_remote_referral_fs_type = {
.owner = THIS_MODULE,
.name = "nfs4",
- .get_sb = nfs4_remote_referral_get_sb,
+ .mount = nfs4_remote_referral_mount,
.kill_sb = nfs4_kill_super,
.fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
};
@@ -2397,9 +2397,9 @@ static void nfs_kill_super(struct super_block *s)
/*
* Clone an NFS2/3 server record on xdev traversal (FSID-change)
*/
-static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *raw_data,
- struct vfsmount *mnt)
+static struct dentry *
+nfs_xdev_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *raw_data)
{
struct nfs_clone_mount *data = raw_data;
struct super_block *s;
@@ -2411,7 +2411,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
};
int error;
- dprintk("--> nfs_xdev_get_sb()\n");
+ dprintk("--> nfs_xdev_mount()\n");
/* create a new volume representation */
server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
@@ -2458,28 +2458,26 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
}
s->s_flags |= MS_ACTIVE;
- mnt->mnt_sb = s;
- mnt->mnt_root = mntroot;
/* clone any lsm security options from the parent to the new sb */
security_sb_clone_mnt_opts(data->sb, s);
- dprintk("<-- nfs_xdev_get_sb() = 0\n");
- return 0;
+ dprintk("<-- nfs_xdev_mount() = 0\n");
+ return mntroot;
out_err_nosb:
nfs_free_server(server);
out_err_noserver:
- dprintk("<-- nfs_xdev_get_sb() = %d [error]\n", error);
- return error;
+ dprintk("<-- nfs_xdev_mount() = %d [error]\n", error);
+ return ERR_PTR(error);
error_splat_super:
if (server && !s->s_root)
bdi_unregister(&server->backing_dev_info);
error_splat_bdi:
deactivate_locked_super(s);
- dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error);
- return error;
+ dprintk("<-- nfs_xdev_mount() = %d [splat]\n", error);
+ return ERR_PTR(error);
}
#ifdef CONFIG_NFS_V4
@@ -2649,8 +2647,9 @@ out_no_address:
/*
* Get the superblock for the NFS4 root partition
*/
-static int nfs4_remote_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
+static struct dentry *
+nfs4_remote_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *raw_data)
{
struct nfs_parsed_mount_data *data = raw_data;
struct super_block *s;
@@ -2714,15 +2713,16 @@ static int nfs4_remote_get_sb(struct file_system_type *fs_type,
goto error_splat_root;
s->s_flags |= MS_ACTIVE;
- mnt->mnt_sb = s;
- mnt->mnt_root = mntroot;
- error = 0;
+
+ security_free_mnt_opts(&data->lsm_opts);
+ nfs_free_fhandle(mntfh);
+ return mntroot;
out:
security_free_mnt_opts(&data->lsm_opts);
out_free_fh:
nfs_free_fhandle(mntfh);
- return error;
+ return ERR_PTR(error);
out_free:
nfs_free_server(server);
@@ -2968,9 +2968,9 @@ static void nfs4_kill_super(struct super_block *sb)
/*
* Clone an NFS4 server record on xdev traversal (FSID-change)
*/
-static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *raw_data,
- struct vfsmount *mnt)
+static struct dentry *
+nfs4_xdev_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *raw_data)
{
struct nfs_clone_mount *data = raw_data;
struct super_block *s;
@@ -2982,7 +2982,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
};
int error;
- dprintk("--> nfs4_xdev_get_sb()\n");
+ dprintk("--> nfs4_xdev_mount()\n");
/* create a new volume representation */
server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
@@ -3029,32 +3029,30 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
}
s->s_flags |= MS_ACTIVE;
- mnt->mnt_sb = s;
- mnt->mnt_root = mntroot;
security_sb_clone_mnt_opts(data->sb, s);
- dprintk("<-- nfs4_xdev_get_sb() = 0\n");
- return 0;
+ dprintk("<-- nfs4_xdev_mount() = 0\n");
+ return mntroot;
out_err_nosb:
nfs_free_server(server);
out_err_noserver:
- dprintk("<-- nfs4_xdev_get_sb() = %d [error]\n", error);
- return error;
+ dprintk("<-- nfs4_xdev_mount() = %d [error]\n", error);
+ return ERR_PTR(error);
error_splat_super:
if (server && !s->s_root)
bdi_unregister(&server->backing_dev_info);
error_splat_bdi:
deactivate_locked_super(s);
- dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error);
- return error;
+ dprintk("<-- nfs4_xdev_mount() = %d [splat]\n", error);
+ return ERR_PTR(error);
}
-static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data,
- struct vfsmount *mnt)
+static struct dentry *
+nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *raw_data)
{
struct nfs_clone_mount *data = raw_data;
struct super_block *s;
@@ -3118,14 +3116,12 @@ static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
}
s->s_flags |= MS_ACTIVE;
- mnt->mnt_sb = s;
- mnt->mnt_root = mntroot;
security_sb_clone_mnt_opts(data->sb, s);
nfs_free_fhandle(mntfh);
dprintk("<-- nfs4_referral_get_sb() = 0\n");
- return 0;
+ return mntroot;
out_err_nosb:
nfs_free_server(server);
@@ -3133,7 +3129,7 @@ out_err_noserver:
nfs_free_fhandle(mntfh);
out_err_nofh:
dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
- return error;
+ return ERR_PTR(error);
error_splat_super:
if (server && !s->s_root)
@@ -3142,7 +3138,7 @@ error_splat_bdi:
deactivate_locked_super(s);
nfs_free_fhandle(mntfh);
dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
- return error;
+ return ERR_PTR(error);
}
/*
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 9a16bad5d2ea..7bdec8531400 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -444,9 +444,9 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
/* set up nfs_renamedata */
data->old_dir = old_dir;
- atomic_inc(&old_dir->i_count);
+ ihold(old_dir);
data->new_dir = new_dir;
- atomic_inc(&new_dir->i_count);
+ ihold(new_dir);
data->old_dentry = dget(old_dentry);
data->new_dentry = dget(new_dentry);
nfs_fattr_init(&data->old_fattr);
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 31a78fce4732..18b3e8975fe0 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -2,7 +2,6 @@ config NFSD
tristate "NFS server support"
depends on INET
depends on FILE_LOCKING
- depends on BKL # fix as soon as lockd is done
select LOCKD
select SUNRPC
select EXPORTFS
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9019e8ec9dc8..ad2bfa68d534 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -673,16 +673,17 @@ static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
spin_unlock(&clp->cl_lock);
}
-static void nfsd4_register_conn(struct nfsd4_conn *conn)
+static int nfsd4_register_conn(struct nfsd4_conn *conn)
{
conn->cn_xpt_user.callback = nfsd4_conn_lost;
- register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user);
+ return register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user);
}
static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses)
{
struct nfsd4_conn *conn;
u32 flags = NFS4_CDFC4_FORE;
+ int ret;
if (ses->se_flags & SESSION4_BACK_CHAN)
flags |= NFS4_CDFC4_BACK;
@@ -690,7 +691,10 @@ static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses)
if (!conn)
return nfserr_jukebox;
nfsd4_hash_conn(conn, ses);
- nfsd4_register_conn(conn);
+ ret = nfsd4_register_conn(conn);
+ if (ret)
+ /* oops; xprt is already down: */
+ nfsd4_conn_lost(&conn->cn_xpt_user);
return nfs_ok;
}
@@ -1644,6 +1648,7 @@ static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_sessi
{
struct nfs4_client *clp = ses->se_client;
struct nfsd4_conn *c;
+ int ret;
spin_lock(&clp->cl_lock);
c = __nfsd4_find_conn(new->cn_xprt, ses);
@@ -1654,7 +1659,10 @@ static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_sessi
}
__nfsd4_hash_conn(new, ses);
spin_unlock(&clp->cl_lock);
- nfsd4_register_conn(new);
+ ret = nfsd4_register_conn(new);
+ if (ret)
+ /* oops; xprt is already down: */
+ nfsd4_conn_lost(&new->cn_xpt_user);
return;
}
@@ -2310,22 +2318,6 @@ void nfsd_release_deleg_cb(struct file_lock *fl)
}
/*
- * Set the delegation file_lock back pointer.
- *
- * Called from setlease() with lock_kernel() held.
- */
-static
-void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl)
-{
- struct nfs4_delegation *dp = (struct nfs4_delegation *)new->fl_owner;
-
- dprintk("NFSD: nfsd_copy_lock_deleg_cb: new fl %p dp %p\n", new, dp);
- if (!dp)
- return;
- dp->dl_flock = new;
-}
-
-/*
* Called from setlease() with lock_kernel() held
*/
static
@@ -2355,7 +2347,6 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
static const struct lock_manager_operations nfsd_lease_mng_ops = {
.fl_break = nfsd_break_deleg_cb,
.fl_release_private = nfsd_release_deleg_cb,
- .fl_copy_lock = nfsd_copy_lock_deleg_cb,
.fl_mylease = nfsd_same_client_deleg_cb,
.fl_change = nfsd_change_deleg_cb,
};
@@ -2614,7 +2605,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
struct nfs4_delegation *dp;
struct nfs4_stateowner *sop = stp->st_stateowner;
int cb_up = atomic_read(&sop->so_client->cl_cb_set);
- struct file_lock fl, *flp = &fl;
+ struct file_lock *fl;
int status, flag = 0;
flag = NFS4_OPEN_DELEGATE_NONE;
@@ -2648,21 +2639,28 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
flag = NFS4_OPEN_DELEGATE_NONE;
goto out;
}
- locks_init_lock(&fl);
- fl.fl_lmops = &nfsd_lease_mng_ops;
- fl.fl_flags = FL_LEASE;
- fl.fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
- fl.fl_end = OFFSET_MAX;
- fl.fl_owner = (fl_owner_t)dp;
- fl.fl_file = find_readable_file(stp->st_file);
- BUG_ON(!fl.fl_file);
- fl.fl_pid = current->tgid;
+ status = -ENOMEM;
+ fl = locks_alloc_lock();
+ if (!fl)
+ goto out;
+ locks_init_lock(fl);
+ fl->fl_lmops = &nfsd_lease_mng_ops;
+ fl->fl_flags = FL_LEASE;
+ fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
+ fl->fl_end = OFFSET_MAX;
+ fl->fl_owner = (fl_owner_t)dp;
+ fl->fl_file = find_readable_file(stp->st_file);
+ BUG_ON(!fl->fl_file);
+ fl->fl_pid = current->tgid;
+ dp->dl_flock = fl;
/* vfs_setlease checks to see if delegation should be handed out.
* the lock_manager callbacks fl_mylease and fl_change are used
*/
- if ((status = vfs_setlease(fl.fl_file, fl.fl_type, &flp))) {
+ if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) {
dprintk("NFSD: setlease failed [%d], no delegation\n", status);
+ dp->dl_flock = NULL;
+ locks_free_lock(fl);
unhash_delegation(dp);
flag = NFS4_OPEN_DELEGATE_NONE;
goto out;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index d6dc3f61f8ba..4514ebbee4d6 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1405,16 +1405,16 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
return simple_fill_super(sb, 0x6e667364, nfsd_files);
}
-static int nfsd_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *nfsd_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_single(fs_type, flags, data, nfsd_fill_super, mnt);
+ return mount_single(fs_type, flags, data, nfsd_fill_super);
}
static struct file_system_type nfsd_fs_type = {
.owner = THIS_MODULE,
.name = "nfsd",
- .get_sb = nfsd_get_sb,
+ .mount = nfsd_mount,
.kill_sb = kill_litter_super,
};
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 35ae03c0db86..f804d41ec9d3 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1141,9 +1141,9 @@ static int nilfs_test_bdev_super(struct super_block *s, void *data)
return (void *)s->s_bdev == data;
}
-static int
-nilfs_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *
+nilfs_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
{
struct nilfs_super_data sd;
struct super_block *s;
@@ -1156,7 +1156,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
sd.bdev = open_bdev_exclusive(dev_name, mode, fs_type);
if (IS_ERR(sd.bdev))
- return PTR_ERR(sd.bdev);
+ return ERR_CAST(sd.bdev);
sd.cno = 0;
sd.flags = flags;
@@ -1235,9 +1235,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
if (!s_new)
close_bdev_exclusive(sd.bdev, mode);
- mnt->mnt_sb = s;
- mnt->mnt_root = root_dentry;
- return 0;
+ return root_dentry;
failed_super:
deactivate_locked_super(s);
@@ -1245,13 +1243,13 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
failed:
if (!s_new)
close_bdev_exclusive(sd.bdev, mode);
- return err;
+ return ERR_PTR(err);
}
struct file_system_type nilfs_fs_type = {
.owner = THIS_MODULE,
.name = "nilfs2",
- .get_sb = nilfs_get_sb,
+ .mount = nilfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
index b388443c3a09..22c629eedd82 100644
--- a/fs/notify/Kconfig
+++ b/fs/notify/Kconfig
@@ -3,4 +3,4 @@ config FSNOTIFY
source "fs/notify/dnotify/Kconfig"
source "fs/notify/inotify/Kconfig"
-#source "fs/notify/fanotify/Kconfig"
+source "fs/notify/fanotify/Kconfig"
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 85366c78cc37..b04f88eed09e 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -131,6 +131,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
+ BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
@@ -160,20 +161,21 @@ static bool fanotify_should_send_event(struct fsnotify_group *group,
__u32 event_mask, void *data, int data_type)
{
__u32 marks_mask, marks_ignored_mask;
+ struct path *path = data;
pr_debug("%s: group=%p to_tell=%p inode_mark=%p vfsmnt_mark=%p "
"mask=%x data=%p data_type=%d\n", __func__, group, to_tell,
inode_mark, vfsmnt_mark, event_mask, data, data_type);
- /* sorry, fanotify only gives a damn about files and dirs */
- if (!S_ISREG(to_tell->i_mode) &&
- !S_ISDIR(to_tell->i_mode))
- return false;
-
/* if we don't have enough info to send an event to userspace say no */
if (data_type != FSNOTIFY_EVENT_PATH)
return false;
+ /* sorry, fanotify only gives a damn about files and dirs */
+ if (!S_ISREG(path->dentry->d_inode->i_mode) &&
+ !S_ISDIR(path->dentry->d_inode->i_mode))
+ return false;
+
if (inode_mark && vfsmnt_mark) {
marks_mask = (vfsmnt_mark->mask | inode_mark->mask);
marks_ignored_mask = (vfsmnt_mark->ignored_mask | inode_mark->ignored_mask);
@@ -194,16 +196,29 @@ static bool fanotify_should_send_event(struct fsnotify_group *group,
BUG();
}
+ if (S_ISDIR(path->dentry->d_inode->i_mode) &&
+ (marks_ignored_mask & FS_ISDIR))
+ return false;
+
if (event_mask & marks_mask & ~marks_ignored_mask)
return true;
return false;
}
+static void fanotify_free_group_priv(struct fsnotify_group *group)
+{
+ struct user_struct *user;
+
+ user = group->fanotify_data.user;
+ atomic_dec(&user->fanotify_listeners);
+ free_uid(user);
+}
+
const struct fsnotify_ops fanotify_fsnotify_ops = {
.handle_event = fanotify_handle_event,
.should_send_event = fanotify_should_send_event,
- .free_group_priv = NULL,
+ .free_group_priv = fanotify_free_group_priv,
.free_event_priv = NULL,
.freeing_mark = NULL,
};
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index bbcb98e7fcc6..063224812b7e 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -16,6 +16,10 @@
#include <asm/ioctls.h>
+#define FANOTIFY_DEFAULT_MAX_EVENTS 16384
+#define FANOTIFY_DEFAULT_MAX_MARKS 8192
+#define FANOTIFY_DEFAULT_MAX_LISTENERS 128
+
extern const struct fsnotify_ops fanotify_fsnotify_ops;
static struct kmem_cache *fanotify_mark_cache __read_mostly;
@@ -326,7 +330,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
ret = -EAGAIN;
if (file->f_flags & O_NONBLOCK)
break;
- ret = -EINTR;
+ ret = -ERESTARTSYS;
if (signal_pending(current))
break;
@@ -372,11 +376,10 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t
static int fanotify_release(struct inode *ignored, struct file *file)
{
struct fsnotify_group *group = file->private_data;
- struct fanotify_response_event *re, *lre;
-
- pr_debug("%s: file=%p group=%p\n", __func__, file, group);
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+ struct fanotify_response_event *re, *lre;
+
mutex_lock(&group->fanotify_data.access_mutex);
group->fanotify_data.bypass_perm = true;
@@ -554,18 +557,24 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
__u32 mask,
unsigned int flags)
{
- __u32 oldmask;
+ __u32 oldmask = -1;
spin_lock(&fsn_mark->lock);
if (!(flags & FAN_MARK_IGNORED_MASK)) {
oldmask = fsn_mark->mask;
fsnotify_set_mark_mask_locked(fsn_mark, (oldmask | mask));
} else {
- oldmask = fsn_mark->ignored_mask;
- fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask | mask));
+ __u32 tmask = fsn_mark->ignored_mask | mask;
+ fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
}
+
+ if (!(flags & FAN_MARK_ONDIR)) {
+ __u32 tmask = fsn_mark->ignored_mask | FAN_ONDIR;
+ fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
+ }
+
spin_unlock(&fsn_mark->lock);
return mask & ~oldmask;
@@ -582,6 +591,9 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
if (!fsn_mark) {
int ret;
+ if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
+ return -ENOSPC;
+
fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
if (!fsn_mark)
return -ENOMEM;
@@ -610,10 +622,23 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
+ /*
+ * If some other task has this inode open for write we should not add
+ * an ignored mark, unless that ignored mark is supposed to survive
+ * modification changes anyway.
+ */
+ if ((flags & FAN_MARK_IGNORED_MASK) &&
+ !(flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
+ (atomic_read(&inode->i_writecount) > 0))
+ return 0;
+
fsn_mark = fsnotify_find_inode_mark(group, inode);
if (!fsn_mark) {
int ret;
+ if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
+ return -ENOSPC;
+
fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
if (!fsn_mark)
return -ENOMEM;
@@ -637,6 +662,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
{
struct fsnotify_group *group;
int f_flags, fd;
+ struct user_struct *user;
pr_debug("%s: flags=%d event_f_flags=%d\n",
__func__, flags, event_f_flags);
@@ -647,6 +673,12 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
if (flags & ~FAN_ALL_INIT_FLAGS)
return -EINVAL;
+ user = get_current_user();
+ if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) {
+ free_uid(user);
+ return -EMFILE;
+ }
+
f_flags = O_RDWR | FMODE_NONOTIFY;
if (flags & FAN_CLOEXEC)
f_flags |= O_CLOEXEC;
@@ -658,12 +690,47 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
if (IS_ERR(group))
return PTR_ERR(group);
+ group->fanotify_data.user = user;
+ atomic_inc(&user->fanotify_listeners);
+
group->fanotify_data.f_flags = event_f_flags;
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
mutex_init(&group->fanotify_data.access_mutex);
init_waitqueue_head(&group->fanotify_data.access_waitq);
INIT_LIST_HEAD(&group->fanotify_data.access_list);
#endif
+ switch (flags & FAN_ALL_CLASS_BITS) {
+ case FAN_CLASS_NOTIF:
+ group->priority = FS_PRIO_0;
+ break;
+ case FAN_CLASS_CONTENT:
+ group->priority = FS_PRIO_1;
+ break;
+ case FAN_CLASS_PRE_CONTENT:
+ group->priority = FS_PRIO_2;
+ break;
+ default:
+ fd = -EINVAL;
+ goto out_put_group;
+ }
+
+ if (flags & FAN_UNLIMITED_QUEUE) {
+ fd = -EPERM;
+ if (!capable(CAP_SYS_ADMIN))
+ goto out_put_group;
+ group->max_events = UINT_MAX;
+ } else {
+ group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS;
+ }
+
+ if (flags & FAN_UNLIMITED_MARKS) {
+ fd = -EPERM;
+ if (!capable(CAP_SYS_ADMIN))
+ goto out_put_group;
+ group->fanotify_data.max_marks = UINT_MAX;
+ } else {
+ group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS;
+ }
fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
if (fd < 0)
@@ -704,6 +771,12 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
default:
return -EINVAL;
}
+
+ if (mask & FAN_ONDIR) {
+ flags |= FAN_MARK_ONDIR;
+ mask &= ~FAN_ONDIR;
+ }
+
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
if (mask & ~(FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD))
#else
@@ -719,6 +792,16 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
ret = -EINVAL;
if (unlikely(filp->f_op != &fanotify_fops))
goto fput_and_out;
+ group = filp->private_data;
+
+ /*
+ * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not
+ * allowed to set permissions events.
+ */
+ ret = -EINVAL;
+ if (mask & FAN_ALL_PERM_EVENTS &&
+ group->priority == FS_PRIO_0)
+ goto fput_and_out;
ret = fanotify_find_path(dfd, pathname, &path, flags);
if (ret)
@@ -729,7 +812,6 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
inode = path.dentry->d_inode;
else
mnt = path.mnt;
- group = filp->private_data;
/* create/update an inode mark */
switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 4498a208df94..20dc218707ca 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -84,16 +84,17 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
}
/* Notify this dentry's parent about a child's events. */
-void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
+int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
{
struct dentry *parent;
struct inode *p_inode;
+ int ret = 0;
if (!dentry)
dentry = path->dentry;
if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
- return;
+ return 0;
parent = dget_parent(dentry);
p_inode = parent->d_inode;
@@ -106,14 +107,16 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
mask |= FS_EVENT_ON_CHILD;
if (path)
- fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH,
- dentry->d_name.name, 0);
+ ret = fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH,
+ dentry->d_name.name, 0);
else
- fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
- dentry->d_name.name, 0);
+ ret = fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
+ dentry->d_name.name, 0);
}
dput(parent);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(__fsnotify_parent);
@@ -252,20 +255,23 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
if (inode_group > vfsmount_group) {
/* handle inode */
- send_to_group(to_tell, NULL, inode_mark, NULL, mask, data,
- data_is, cookie, file_name, &event);
+ ret = send_to_group(to_tell, NULL, inode_mark, NULL, mask, data,
+ data_is, cookie, file_name, &event);
/* we didn't use the vfsmount_mark */
vfsmount_group = NULL;
} else if (vfsmount_group > inode_group) {
- send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data,
- data_is, cookie, file_name, &event);
+ ret = send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data,
+ data_is, cookie, file_name, &event);
inode_group = NULL;
} else {
- send_to_group(to_tell, mnt, inode_mark, vfsmount_mark,
- mask, data, data_is, cookie, file_name,
- &event);
+ ret = send_to_group(to_tell, mnt, inode_mark, vfsmount_mark,
+ mask, data, data_is, cookie, file_name,
+ &event);
}
+ if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
+ goto out;
+
if (inode_group)
inode_node = srcu_dereference(inode_node->next,
&fsnotify_mark_srcu);
@@ -273,7 +279,8 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
vfsmount_node = srcu_dereference(vfsmount_node->next,
&fsnotify_mark_srcu);
}
-
+ ret = 0;
+out:
srcu_read_unlock(&fsnotify_mark_srcu, idx);
/*
* fsnotify_create_event() took a reference so the event can't be cleaned
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 21ed10660b80..4c29fcf557d1 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -177,7 +177,8 @@ void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *mark,
* Attach an initialized mark to a given inode.
* These marks may be used for the fsnotify backend to determine which
* event types should be delivered to which group and for which inodes. These
- * marks are ordered according to the group's location in memory.
+ * marks are ordered according to priority, highest number first, and then by
+ * the group's location in memory.
*/
int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
struct fsnotify_group *group, struct inode *inode,
@@ -211,7 +212,11 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
goto out;
}
- if (mark->group < lmark->group)
+ if (mark->group->priority < lmark->group->priority)
+ continue;
+
+ if ((mark->group->priority == lmark->group->priority) &&
+ (mark->group < lmark->group))
continue;
hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 24edc1185d53..444c305a468c 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -862,7 +862,7 @@ static int __init inotify_user_setup(void)
BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK);
- BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR);
+ BUILD_BUG_ON(IN_ISDIR != FS_ISDIR);
BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 56772b578fbd..85eebff6d0d7 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -169,7 +169,11 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
goto out;
}
- if (mark->group < lmark->group)
+ if (mark->group->priority < lmark->group->priority)
+ continue;
+
+ if ((mark->group->priority == lmark->group->priority) &&
+ (mark->group < lmark->group))
continue;
hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list);
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index d3fbe5730bfc..a30ecacc01f2 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3059,17 +3059,16 @@ struct kmem_cache *ntfs_index_ctx_cache;
/* Driver wide mutex. */
DEFINE_MUTEX(ntfs_lock);
-static int ntfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ntfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, ntfs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, ntfs_fill_super);
}
static struct file_system_type ntfs_fs_type = {
.owner = THIS_MODULE,
.name = "ntfs",
- .get_sb = ntfs_get_sb,
+ .mount = ntfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 75e115f1bd73..b2df490a19ed 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -643,16 +643,16 @@ static const struct inode_operations dlmfs_file_inode_operations = {
.setattr = dlmfs_file_setattr,
};
-static int dlmfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *dlmfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super, mnt);
+ return mount_nodev(fs_type, flags, data, dlmfs_fill_super);
}
static struct file_system_type dlmfs_fs_type = {
.owner = THIS_MODULE,
.name = "ocfs2_dlmfs",
- .get_sb = dlmfs_get_sb,
+ .mount = dlmfs_mount,
.kill_sb = kill_litter_super,
};
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 56f0cb395820..f02c0ef31578 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1236,14 +1236,12 @@ read_super_error:
return status;
}
-static int ocfs2_get_sb(struct file_system_type *fs_type,
+static struct dentry *ocfs2_mount(struct file_system_type *fs_type,
int flags,
const char *dev_name,
- void *data,
- struct vfsmount *mnt)
+ void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super);
}
static void ocfs2_kill_sb(struct super_block *sb)
@@ -1267,8 +1265,7 @@ out:
static struct file_system_type ocfs2_fs_type = {
.owner = THIS_MODULE,
.name = "ocfs2",
- .get_sb = ocfs2_get_sb, /* is this called when we mount
- * the fs? */
+ .mount = ocfs2_mount,
.kill_sb = ocfs2_kill_sb,
.fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 14a22863291a..e043c4cb9a97 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -557,17 +557,16 @@ end:
return ret;
}
-static int omfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name,
- void *data, struct vfsmount *m)
+static struct dentry *omfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, omfs_fill_super, m);
+ return mount_bdev(fs_type, flags, dev_name, data, omfs_fill_super);
}
static struct file_system_type omfs_fs_type = {
.owner = THIS_MODULE,
.name = "omfs",
- .get_sb = omfs_get_sb,
+ .mount = omfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/open.c b/fs/open.c
index d74e1983e8dc..4197b9ed023d 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -786,11 +786,11 @@ struct file *nameidata_to_filp(struct nameidata *nd)
/* Pick up the filp from the open intent */
filp = nd->intent.open.file;
/* Has the filesystem initialised the file for us? */
- if (filp->f_path.dentry == NULL)
+ if (filp->f_path.dentry == NULL) {
+ path_get(&nd->path);
filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp,
NULL, cred);
- else
- path_put(&nd->path);
+ }
return filp;
}
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index ffcd04f0012c..911e61f348fc 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -415,16 +415,16 @@ out_no_root:
return ret;
}
-static int openprom_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *openprom_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_single(fs_type, flags, data, openprom_fill_super, mnt);
+ return mount_single(fs_type, flags, data, openprom_fill_super);
}
static struct file_system_type openprom_fs_type = {
.owner = THIS_MODULE,
.name = "openpromfs",
- .get_sb = openprom_get_sb,
+ .mount = openprom_mount,
.kill_sb = kill_anon_super,
};
diff --git a/fs/pipe.c b/fs/pipe.c
index d2d7566ce68e..a8012a955720 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1247,16 +1247,15 @@ out:
* any operations on the root directory. However, we need a non-trivial
* d_name - pipe: will go nicely and kill the special-casing in procfs.
*/
-static int pipefs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data,
- struct vfsmount *mnt)
+static struct dentry *pipefs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt);
+ return mount_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
}
static struct file_system_type pipe_fs_type = {
.name = "pipefs",
- .get_sb = pipefs_get_sb,
+ .mount = pipefs_mount,
.kill_sb = kill_anon_super,
};
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9b094c1c8465..f3d02ca461ec 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -226,7 +226,7 @@ struct mm_struct *mm_for_maps(struct task_struct *task)
{
struct mm_struct *mm;
- if (mutex_lock_killable(&task->cred_guard_mutex))
+ if (mutex_lock_killable(&task->signal->cred_guard_mutex))
return NULL;
mm = get_task_mm(task);
@@ -235,7 +235,7 @@ struct mm_struct *mm_for_maps(struct task_struct *task)
mmput(mm);
mm = NULL;
}
- mutex_unlock(&task->cred_guard_mutex);
+ mutex_unlock(&task->signal->cred_guard_mutex);
return mm;
}
@@ -2354,14 +2354,14 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
goto out_free;
/* Guard against adverse ptrace interaction */
- length = mutex_lock_interruptible(&task->cred_guard_mutex);
+ length = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
if (length < 0)
goto out_free;
length = security_setprocattr(task,
(char*)file->f_path.dentry->d_name.name,
(void*)page, count);
- mutex_unlock(&task->cred_guard_mutex);
+ mutex_unlock(&task->signal->cred_guard_mutex);
out_free:
free_page((unsigned long) page);
out:
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 93d99b316325..ef9fa8e24ad6 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -35,8 +35,8 @@ static int proc_set_super(struct super_block *sb, void *data)
return set_anon_super(sb, NULL);
}
-static int proc_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *proc_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
int err;
struct super_block *sb;
@@ -61,14 +61,14 @@ static int proc_get_sb(struct file_system_type *fs_type,
sb = sget(fs_type, proc_test_super, proc_set_super, ns);
if (IS_ERR(sb))
- return PTR_ERR(sb);
+ return ERR_CAST(sb);
if (!sb->s_root) {
sb->s_flags = flags;
err = proc_fill_super(sb);
if (err) {
deactivate_locked_super(sb);
- return err;
+ return ERR_PTR(err);
}
ei = PROC_I(sb->s_root->d_inode);
@@ -79,11 +79,9 @@ static int proc_get_sb(struct file_system_type *fs_type,
}
sb->s_flags |= MS_ACTIVE;
- ns->proc_mnt = mnt;
}
- simple_set_mnt(mnt, sb);
- return 0;
+ return dget(sb->s_root);
}
static void proc_kill_sb(struct super_block *sb)
@@ -97,7 +95,7 @@ static void proc_kill_sb(struct super_block *sb)
static struct file_system_type proc_fs_type = {
.name = "proc",
- .get_sb = proc_get_sb,
+ .mount = proc_mount,
.kill_sb = proc_kill_sb,
};
@@ -115,6 +113,7 @@ void __init proc_root_init(void)
return;
}
+ init_pid_ns.proc_mnt = proc_mnt;
proc_symlink("mounts", NULL, "self/mounts");
proc_net_init();
@@ -213,6 +212,7 @@ int pid_ns_prepare_proc(struct pid_namespace *ns)
if (IS_ERR(mnt))
return PTR_ERR(mnt);
+ ns->proc_mnt = mnt;
return 0;
}
diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c
index 1807c2419f17..37994737c983 100644
--- a/fs/proc/softirqs.c
+++ b/fs/proc/softirqs.c
@@ -10,13 +10,13 @@ static int show_softirqs(struct seq_file *p, void *v)
{
int i, j;
- seq_printf(p, " ");
+ seq_printf(p, " ");
for_each_possible_cpu(i)
seq_printf(p, "CPU%-8d", i);
seq_printf(p, "\n");
for (i = 0; i < NR_SOFTIRQS; i++) {
- seq_printf(p, "%8s:", softirq_to_name[i]);
+ seq_printf(p, "%12s:", softirq_to_name[i]);
for_each_possible_cpu(j)
seq_printf(p, " %10u", kstat_softirqs_cpu(i, j));
seq_printf(p, "\n");
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index bf31b03fc275..e15a19c93bae 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -31,7 +31,6 @@ static int show_stat(struct seq_file *p, void *v)
u64 sum_softirq = 0;
unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
struct timespec boottime;
- unsigned int per_irq_sum;
user = nice = system = idle = iowait =
irq = softirq = steal = cputime64_zero;
@@ -52,9 +51,7 @@ static int show_stat(struct seq_file *p, void *v)
guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
guest_nice = cputime64_add(guest_nice,
kstat_cpu(i).cpustat.guest_nice);
- for_each_irq_nr(j) {
- sum += kstat_irqs_cpu(j, i);
- }
+ sum += kstat_cpu_irqs_sum(i);
sum += arch_irq_stat_cpu(i);
for (j = 0; j < NR_SOFTIRQS; j++) {
@@ -110,13 +107,8 @@ static int show_stat(struct seq_file *p, void *v)
seq_printf(p, "intr %llu", (unsigned long long)sum);
/* sum again ? it could be updated? */
- for_each_irq_nr(j) {
- per_irq_sum = 0;
- for_each_possible_cpu(i)
- per_irq_sum += kstat_irqs_cpu(j, i);
-
- seq_printf(p, " %u", per_irq_sum);
- }
+ for_each_irq_nr(j)
+ seq_printf(p, " %u", kstat_irqs(j));
seq_printf(p,
"\nctxt %llu\n"
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 871e25ed0069..da6b01d70f01 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -327,6 +327,7 @@ struct mem_size_stats {
unsigned long private_clean;
unsigned long private_dirty;
unsigned long referenced;
+ unsigned long anonymous;
unsigned long swap;
u64 pss;
};
@@ -357,6 +358,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
if (!page)
continue;
+ if (PageAnon(page))
+ mss->anonymous += PAGE_SIZE;
+
mss->resident += PAGE_SIZE;
/* Accumulate the size in pages that have been accessed. */
if (pte_young(ptent) || PageReferenced(page))
@@ -410,6 +414,7 @@ static int show_smap(struct seq_file *m, void *v)
"Private_Clean: %8lu kB\n"
"Private_Dirty: %8lu kB\n"
"Referenced: %8lu kB\n"
+ "Anonymous: %8lu kB\n"
"Swap: %8lu kB\n"
"KernelPageSize: %8lu kB\n"
"MMUPageSize: %8lu kB\n",
@@ -421,6 +426,7 @@ static int show_smap(struct seq_file *m, void *v)
mss.private_clean >> 10,
mss.private_dirty >> 10,
mss.referenced >> 10,
+ mss.anonymous >> 10,
mss.swap >> 10,
vma_kernel_pagesize(vma) >> 10,
vma_mmu_pagesize(vma) >> 10);
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 01bad30026fc..fcada42f1aa3 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -454,17 +454,16 @@ static void destroy_inodecache(void)
kmem_cache_destroy(qnx4_inode_cachep);
}
-static int qnx4_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *qnx4_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, qnx4_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, qnx4_fill_super);
}
static struct file_system_type qnx4_fs_type = {
.owner = THIS_MODULE,
.name = "qnx4",
- .get_sb = qnx4_get_sb,
+ .mount = qnx4_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig
index 3e21b1e2ad3a..880fd9884366 100644
--- a/fs/quota/Kconfig
+++ b/fs/quota/Kconfig
@@ -4,6 +4,7 @@
config QUOTA
bool "Quota support"
+ select QUOTACTL
help
If you say Y here, you will be able to set per user limits for disk
usage (also called disk quotas). Currently, it works for the
@@ -65,8 +66,7 @@ config QFMT_V2
config QUOTACTL
bool
- depends on XFS_QUOTA || QUOTA
- default y
+ default n
config QUOTACTL_COMPAT
bool
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index aad1316a977f..0fed41e6efcd 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1386,6 +1386,9 @@ static void __dquot_initialize(struct inode *inode, int type)
/* Avoid races with quotaoff() */
if (!sb_has_quota_active(sb, cnt))
continue;
+ /* We could race with quotaon or dqget() could have failed */
+ if (!got[cnt])
+ continue;
if (!inode->i_dquot[cnt]) {
inode->i_dquot[cnt] = got[cnt];
got[cnt] = NULL;
@@ -1736,6 +1739,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
qsize_t rsv_space = 0;
struct dquot *transfer_from[MAXQUOTAS] = {};
int cnt, ret = 0;
+ char is_valid[MAXQUOTAS] = {};
char warntype_to[MAXQUOTAS];
char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
@@ -1757,8 +1761,15 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
space = cur_space + rsv_space;
/* Build the transfer_from list and check the limits */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ /*
+ * Skip changes for same uid or gid or for turned off quota-type.
+ */
if (!transfer_to[cnt])
continue;
+ /* Avoid races with quotaoff() */
+ if (!sb_has_quota_active(inode->i_sb, cnt))
+ continue;
+ is_valid[cnt] = 1;
transfer_from[cnt] = inode->i_dquot[cnt];
ret = check_idq(transfer_to[cnt], 1, warntype_to + cnt);
if (ret)
@@ -1772,12 +1783,8 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
* Finally perform the needed transfer from transfer_from to transfer_to
*/
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- /*
- * Skip changes for same uid or gid or for turned off quota-type.
- */
- if (!transfer_to[cnt])
+ if (!is_valid[cnt])
continue;
-
/* Due to IO error we might not have transfer_from[] structure */
if (transfer_from[cnt]) {
warntype_from_inodes[cnt] =
@@ -1801,18 +1808,19 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
mark_all_dquot_dirty(transfer_from);
mark_all_dquot_dirty(transfer_to);
- /* Pass back references to put */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- transfer_to[cnt] = transfer_from[cnt];
-warn:
flush_warnings(transfer_to, warntype_to);
flush_warnings(transfer_from, warntype_from_inodes);
flush_warnings(transfer_from, warntype_from_space);
- return ret;
+ /* Pass back references to put */
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ if (is_valid[cnt])
+ transfer_to[cnt] = transfer_from[cnt];
+ return 0;
over_quota:
spin_unlock(&dq_data_lock);
up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
- goto warn;
+ flush_warnings(transfer_to, warntype_to);
+ return ret;
}
EXPORT_SYMBOL(__dquot_transfer);
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 67fadb1ad2c1..eacb166fb259 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -255,17 +255,16 @@ fail:
return err;
}
-int ramfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+struct dentry *ramfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_nodev(fs_type, flags, data, ramfs_fill_super, mnt);
+ return mount_nodev(fs_type, flags, data, ramfs_fill_super);
}
-static int rootfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *rootfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super,
- mnt);
+ return mount_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super);
}
static void ramfs_kill_sb(struct super_block *sb)
@@ -276,12 +275,12 @@ static void ramfs_kill_sb(struct super_block *sb)
static struct file_system_type ramfs_fs_type = {
.name = "ramfs",
- .get_sb = ramfs_get_sb,
+ .mount = ramfs_mount,
.kill_sb = ramfs_kill_sb,
};
static struct file_system_type rootfs_fs_type = {
.name = "rootfs",
- .get_sb = rootfs_get_sb,
+ .mount = rootfs_mount,
.kill_sb = kill_litter_super,
};
diff --git a/fs/read_write.c b/fs/read_write.c
index 9cd9d148105d..431a0ed610c8 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -243,8 +243,6 @@ bad:
* them to something that fits in "int" so that others
* won't have to do range checks all the time.
*/
-#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
-
int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
{
struct inode *inode;
@@ -584,65 +582,71 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
unsigned long nr_segs, unsigned long fast_segs,
struct iovec *fast_pointer,
struct iovec **ret_pointer)
- {
+{
unsigned long seg;
- ssize_t ret;
+ ssize_t ret;
struct iovec *iov = fast_pointer;
- /*
- * SuS says "The readv() function *may* fail if the iovcnt argument
- * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
- * traditionally returned zero for zero segments, so...
- */
+ /*
+ * SuS says "The readv() function *may* fail if the iovcnt argument
+ * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
+ * traditionally returned zero for zero segments, so...
+ */
if (nr_segs == 0) {
ret = 0;
- goto out;
+ goto out;
}
- /*
- * First get the "struct iovec" from user memory and
- * verify all the pointers
- */
+ /*
+ * First get the "struct iovec" from user memory and
+ * verify all the pointers
+ */
if (nr_segs > UIO_MAXIOV) {
ret = -EINVAL;
- goto out;
+ goto out;
}
if (nr_segs > fast_segs) {
- iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
+ iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
if (iov == NULL) {
ret = -ENOMEM;
- goto out;
+ goto out;
}
- }
+ }
if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
ret = -EFAULT;
- goto out;
+ goto out;
}
- /*
+ /*
* According to the Single Unix Specification we should return EINVAL
* if an element length is < 0 when cast to ssize_t or if the
* total length would overflow the ssize_t return value of the
* system call.
- */
+ *
+ * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
+ * overflow case.
+ */
ret = 0;
- for (seg = 0; seg < nr_segs; seg++) {
- void __user *buf = iov[seg].iov_base;
- ssize_t len = (ssize_t)iov[seg].iov_len;
+ for (seg = 0; seg < nr_segs; seg++) {
+ void __user *buf = iov[seg].iov_base;
+ ssize_t len = (ssize_t)iov[seg].iov_len;
/* see if we we're about to use an invalid len or if
* it's about to overflow ssize_t */
- if (len < 0 || (ret + len < ret)) {
+ if (len < 0) {
ret = -EINVAL;
- goto out;
+ goto out;
}
if (unlikely(!access_ok(vrfy_dir(type), buf, len))) {
ret = -EFAULT;
- goto out;
+ goto out;
+ }
+ if (len > MAX_RW_COUNT - ret) {
+ len = MAX_RW_COUNT - ret;
+ iov[seg].iov_len = len;
}
-
ret += len;
- }
+ }
out:
*ret_pointer = iov;
return ret;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index e15ff612002d..3bf7a6457f4d 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2213,12 +2213,11 @@ out:
#endif
-static int get_super_block(struct file_system_type *fs_type,
+static struct dentry *get_super_block(struct file_system_type *fs_type,
int flags, const char *dev_name,
- void *data, struct vfsmount *mnt)
+ void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super);
}
static int __init init_reiserfs_fs(void)
@@ -2253,7 +2252,7 @@ static void __exit exit_reiserfs_fs(void)
struct file_system_type reiserfs_fs_type = {
.owner = THIS_MODULE,
.name = "reiserfs",
- .get_sb = get_super_block,
+ .mount = get_super_block,
.kill_sb = reiserfs_kill_sb,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 268580535c92..6647f90e55cd 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -552,20 +552,19 @@ error_rsb:
/*
* get a superblock for mounting
*/
-static int romfs_get_sb(struct file_system_type *fs_type,
+static struct dentry *romfs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name,
- void *data, struct vfsmount *mnt)
+ void *data)
{
- int ret = -EINVAL;
+ struct dentry *ret = ERR_PTR(-EINVAL);
#ifdef CONFIG_ROMFS_ON_MTD
- ret = get_sb_mtd(fs_type, flags, dev_name, data, romfs_fill_super,
- mnt);
+ ret = mount_mtd(fs_type, flags, dev_name, data, romfs_fill_super);
#endif
#ifdef CONFIG_ROMFS_ON_BLOCK
- if (ret == -EINVAL)
- ret = get_sb_bdev(fs_type, flags, dev_name, data,
- romfs_fill_super, mnt);
+ if (ret == ERR_PTR(-EINVAL))
+ ret = mount_bdev(fs_type, flags, dev_name, data,
+ romfs_fill_super);
#endif
return ret;
}
@@ -592,7 +591,7 @@ static void romfs_kill_sb(struct super_block *sb)
static struct file_system_type romfs_fs_type = {
.owner = THIS_MODULE,
.name = "romfs",
- .get_sb = romfs_get_sb,
+ .mount = romfs_mount,
.kill_sb = romfs_kill_sb,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/select.c b/fs/select.c
index 500a669f7790..b7b10aa30861 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -67,7 +67,7 @@ static long __estimate_accuracy(struct timespec *tv)
return slack;
}
-static long estimate_accuracy(struct timespec *tv)
+long select_estimate_accuracy(struct timespec *tv)
{
unsigned long ret;
struct timespec now;
@@ -417,7 +417,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
}
if (end_time && !timed_out)
- slack = estimate_accuracy(end_time);
+ slack = select_estimate_accuracy(end_time);
retval = 0;
for (;;) {
@@ -769,7 +769,7 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
}
if (end_time && !timed_out)
- slack = estimate_accuracy(end_time);
+ slack = select_estimate_accuracy(end_time);
for (;;) {
struct poll_list *walk;
diff --git a/fs/smbfs/Kconfig b/fs/smbfs/Kconfig
deleted file mode 100644
index 2bc24a8c4039..000000000000
--- a/fs/smbfs/Kconfig
+++ /dev/null
@@ -1,56 +0,0 @@
-config SMB_FS
- tristate "SMB file system support (OBSOLETE, please use CIFS)"
- depends on BKL # probably unfixable
- depends on INET
- select NLS
- help
- SMB (Server Message Block) is the protocol Windows for Workgroups
- (WfW), Windows 95/98, Windows NT and OS/2 Lan Manager use to share
- files and printers over local networks. Saying Y here allows you to
- mount their file systems (often called "shares" in this context) and
- access them just like any other Unix directory. Currently, this
- works only if the Windows machines use TCP/IP as the underlying
- transport protocol, and not NetBEUI. For details, read
- <file:Documentation/filesystems/smbfs.txt> and the SMB-HOWTO,
- available from <http://www.tldp.org/docs.html#howto>.
-
- Note: if you just want your box to act as an SMB *server* and make
- files and printing services available to Windows clients (which need
- to have a TCP/IP stack), you don't need to say Y here; you can use
- the program SAMBA (available from <ftp://ftp.samba.org/pub/samba/>)
- for that.
-
- General information about how to connect Linux, Windows machines and
- Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>.
-
- To compile the SMB support as a module, choose M here:
- the module will be called smbfs. Most people say N, however.
-
-config SMB_NLS_DEFAULT
- bool "Use a default NLS"
- depends on SMB_FS
- help
- Enabling this will make smbfs use nls translations by default. You
- need to specify the local charset (CONFIG_NLS_DEFAULT) in the nls
- settings and you need to give the default nls for the SMB server as
- CONFIG_SMB_NLS_REMOTE.
-
- The nls settings can be changed at mount time, if your smbmount
- supports that, using the codepage and iocharset parameters.
-
- smbmount from samba 2.2.0 or later supports this.
-
-config SMB_NLS_REMOTE
- string "Default Remote NLS Option"
- depends on SMB_NLS_DEFAULT
- default "cp437"
- help
- This setting allows you to specify a default value for which
- codepage the server uses. If this field is left blank no
- translations will be done by default. The local codepage/charset
- default to CONFIG_NLS_DEFAULT.
-
- The nls settings can be changed at mount time, if your smbmount
- supports that, using the codepage and iocharset parameters.
-
- smbmount from samba 2.2.0 or later supports this.
diff --git a/fs/smbfs/Makefile b/fs/smbfs/Makefile
deleted file mode 100644
index 4faf8c4722c3..000000000000
--- a/fs/smbfs/Makefile
+++ /dev/null
@@ -1,18 +0,0 @@
-#
-# Makefile for the linux smb-filesystem routines.
-#
-
-obj-$(CONFIG_SMB_FS) += smbfs.o
-
-smbfs-objs := proc.o dir.o cache.o sock.o inode.o file.o ioctl.o getopt.o \
- symlink.o smbiod.o request.o
-
-# If you want debugging output, you may add these flags to the EXTRA_CFLAGS
-# SMBFS_PARANOIA should normally be enabled.
-
-EXTRA_CFLAGS += -DSMBFS_PARANOIA
-#EXTRA_CFLAGS += -DSMBFS_DEBUG
-#EXTRA_CFLAGS += -DSMBFS_DEBUG_VERBOSE
-#EXTRA_CFLAGS += -DDEBUG_SMB_TIMESTAMP
-#EXTRA_CFLAGS += -Werror
-
diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c
deleted file mode 100644
index 8c177eb7e344..000000000000
--- a/fs/smbfs/cache.c
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * cache.c
- *
- * Copyright (C) 1997 by Bill Hawes
- *
- * Routines to support directory cacheing using the page cache.
- * This cache code is almost directly taken from ncpfs.
- *
- * Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/time.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/smb_fs.h>
-#include <linux/pagemap.h>
-#include <linux/net.h>
-
-#include <asm/page.h>
-
-#include "smb_debug.h"
-#include "proto.h"
-
-/*
- * Force the next attempt to use the cache to be a timeout.
- * If we can't find the page that's fine, it will cause a refresh.
- */
-void
-smb_invalid_dir_cache(struct inode * dir)
-{
- struct smb_sb_info *server = server_from_inode(dir);
- union smb_dir_cache *cache = NULL;
- struct page *page = NULL;
-
- page = grab_cache_page(&dir->i_data, 0);
- if (!page)
- goto out;
-
- if (!PageUptodate(page))
- goto out_unlock;
-
- cache = kmap(page);
- cache->head.time = jiffies - SMB_MAX_AGE(server);
-
- kunmap(page);
- SetPageUptodate(page);
-out_unlock:
- unlock_page(page);
- page_cache_release(page);
-out:
- return;
-}
-
-/*
- * Mark all dentries for 'parent' as invalid, forcing them to be re-read
- */
-void
-smb_invalidate_dircache_entries(struct dentry *parent)
-{
- struct smb_sb_info *server = server_from_dentry(parent);
- struct list_head *next;
- struct dentry *dentry;
-
- spin_lock(&dcache_lock);
- next = parent->d_subdirs.next;
- while (next != &parent->d_subdirs) {
- dentry = list_entry(next, struct dentry, d_u.d_child);
- dentry->d_fsdata = NULL;
- smb_age_dentry(server, dentry);
- next = next->next;
- }
- spin_unlock(&dcache_lock);
-}
-
-/*
- * dget, but require that fpos and parent matches what the dentry contains.
- * dentry is not known to be a valid pointer at entry.
- */
-struct dentry *
-smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
-{
- struct dentry *dent = dentry;
- struct list_head *next;
-
- if (d_validate(dent, parent)) {
- if (dent->d_name.len <= SMB_MAXNAMELEN &&
- (unsigned long)dent->d_fsdata == fpos) {
- if (!dent->d_inode) {
- dput(dent);
- dent = NULL;
- }
- return dent;
- }
- dput(dent);
- }
-
- /* If a pointer is invalid, we search the dentry. */
- spin_lock(&dcache_lock);
- next = parent->d_subdirs.next;
- while (next != &parent->d_subdirs) {
- dent = list_entry(next, struct dentry, d_u.d_child);
- if ((unsigned long)dent->d_fsdata == fpos) {
- if (dent->d_inode)
- dget_locked(dent);
- else
- dent = NULL;
- goto out_unlock;
- }
- next = next->next;
- }
- dent = NULL;
-out_unlock:
- spin_unlock(&dcache_lock);
- return dent;
-}
-
-
-/*
- * Create dentry/inode for this file and add it to the dircache.
- */
-int
-smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
- struct smb_cache_control *ctrl, struct qstr *qname,
- struct smb_fattr *entry)
-{
- struct dentry *newdent, *dentry = filp->f_path.dentry;
- struct inode *newino, *inode = dentry->d_inode;
- struct smb_cache_control ctl = *ctrl;
- int valid = 0;
- int hashed = 0;
- ino_t ino = 0;
-
- qname->hash = full_name_hash(qname->name, qname->len);
-
- if (dentry->d_op && dentry->d_op->d_hash)
- if (dentry->d_op->d_hash(dentry, qname) != 0)
- goto end_advance;
-
- newdent = d_lookup(dentry, qname);
-
- if (!newdent) {
- newdent = d_alloc(dentry, qname);
- if (!newdent)
- goto end_advance;
- } else {
- hashed = 1;
- memcpy((char *) newdent->d_name.name, qname->name,
- newdent->d_name.len);
- }
-
- if (!newdent->d_inode) {
- smb_renew_times(newdent);
- entry->f_ino = iunique(inode->i_sb, 2);
- newino = smb_iget(inode->i_sb, entry);
- if (newino) {
- smb_new_dentry(newdent);
- d_instantiate(newdent, newino);
- if (!hashed)
- d_rehash(newdent);
- }
- } else
- smb_set_inode_attr(newdent->d_inode, entry);
-
- if (newdent->d_inode) {
- ino = newdent->d_inode->i_ino;
- newdent->d_fsdata = (void *) ctl.fpos;
- smb_new_dentry(newdent);
- }
-
- if (ctl.idx >= SMB_DIRCACHE_SIZE) {
- if (ctl.page) {
- kunmap(ctl.page);
- SetPageUptodate(ctl.page);
- unlock_page(ctl.page);
- page_cache_release(ctl.page);
- }
- ctl.cache = NULL;
- ctl.idx -= SMB_DIRCACHE_SIZE;
- ctl.ofs += 1;
- ctl.page = grab_cache_page(&inode->i_data, ctl.ofs);
- if (ctl.page)
- ctl.cache = kmap(ctl.page);
- }
- if (ctl.cache) {
- ctl.cache->dentry[ctl.idx] = newdent;
- valid = 1;
- }
- dput(newdent);
-
-end_advance:
- if (!valid)
- ctl.valid = 0;
- if (!ctl.filled && (ctl.fpos == filp->f_pos)) {
- if (!ino)
- ino = find_inode_number(dentry, qname);
- if (!ino)
- ino = iunique(inode->i_sb, 2);
- ctl.filled = filldir(dirent, qname->name, qname->len,
- filp->f_pos, ino, DT_UNKNOWN);
- if (!ctl.filled)
- filp->f_pos += 1;
- }
- ctl.fpos += 1;
- ctl.idx += 1;
- *ctrl = ctl;
- return (ctl.valid || !ctl.filled);
-}
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c
deleted file mode 100644
index f678d421e541..000000000000
--- a/fs/smbfs/dir.c
+++ /dev/null
@@ -1,696 +0,0 @@
-/*
- * dir.c
- *
- * Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
- * Copyright (C) 1997 by Volker Lendecke
- *
- * Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/time.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/smp_lock.h>
-#include <linux/ctype.h>
-#include <linux/net.h>
-#include <linux/sched.h>
-
-#include <linux/smb_fs.h>
-#include <linux/smb_mount.h>
-#include <linux/smbno.h>
-
-#include "smb_debug.h"
-#include "proto.h"
-
-static int smb_readdir(struct file *, void *, filldir_t);
-static int smb_dir_open(struct inode *, struct file *);
-
-static struct dentry *smb_lookup(struct inode *, struct dentry *, struct nameidata *);
-static int smb_create(struct inode *, struct dentry *, int, struct nameidata *);
-static int smb_mkdir(struct inode *, struct dentry *, int);
-static int smb_rmdir(struct inode *, struct dentry *);
-static int smb_unlink(struct inode *, struct dentry *);
-static int smb_rename(struct inode *, struct dentry *,
- struct inode *, struct dentry *);
-static int smb_make_node(struct inode *,struct dentry *,int,dev_t);
-static int smb_link(struct dentry *, struct inode *, struct dentry *);
-
-const struct file_operations smb_dir_operations =
-{
- .llseek = generic_file_llseek,
- .read = generic_read_dir,
- .readdir = smb_readdir,
- .unlocked_ioctl = smb_ioctl,
- .open = smb_dir_open,
-};
-
-const struct inode_operations smb_dir_inode_operations =
-{
- .create = smb_create,
- .lookup = smb_lookup,
- .unlink = smb_unlink,
- .mkdir = smb_mkdir,
- .rmdir = smb_rmdir,
- .rename = smb_rename,
- .getattr = smb_getattr,
- .setattr = smb_notify_change,
-};
-
-const struct inode_operations smb_dir_inode_operations_unix =
-{
- .create = smb_create,
- .lookup = smb_lookup,
- .unlink = smb_unlink,
- .mkdir = smb_mkdir,
- .rmdir = smb_rmdir,
- .rename = smb_rename,
- .getattr = smb_getattr,
- .setattr = smb_notify_change,
- .symlink = smb_symlink,
- .mknod = smb_make_node,
- .link = smb_link,
-};
-
-/*
- * Read a directory, using filldir to fill the dirent memory.
- * smb_proc_readdir does the actual reading from the smb server.
- *
- * The cache code is almost directly taken from ncpfs
- */
-static int
-smb_readdir(struct file *filp, void *dirent, filldir_t filldir)
-{
- struct dentry *dentry = filp->f_path.dentry;
- struct inode *dir = dentry->d_inode;
- struct smb_sb_info *server = server_from_dentry(dentry);
- union smb_dir_cache *cache = NULL;
- struct smb_cache_control ctl;
- struct page *page = NULL;
- int result;
-
- ctl.page = NULL;
- ctl.cache = NULL;
-
- VERBOSE("reading %s/%s, f_pos=%d\n",
- DENTRY_PATH(dentry), (int) filp->f_pos);
-
- result = 0;
-
- lock_kernel();
-
- switch ((unsigned int) filp->f_pos) {
- case 0:
- if (filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR) < 0)
- goto out;
- filp->f_pos = 1;
- /* fallthrough */
- case 1:
- if (filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR) < 0)
- goto out;
- filp->f_pos = 2;
- }
-
- /*
- * Make sure our inode is up-to-date.
- */
- result = smb_revalidate_inode(dentry);
- if (result)
- goto out;
-
-
- page = grab_cache_page(&dir->i_data, 0);
- if (!page)
- goto read_really;
-
- ctl.cache = cache = kmap(page);
- ctl.head = cache->head;
-
- if (!PageUptodate(page) || !ctl.head.eof) {
- VERBOSE("%s/%s, page uptodate=%d, eof=%d\n",
- DENTRY_PATH(dentry), PageUptodate(page),ctl.head.eof);
- goto init_cache;
- }
-
- if (filp->f_pos == 2) {
- if (jiffies - ctl.head.time >= SMB_MAX_AGE(server))
- goto init_cache;
-
- /*
- * N.B. ncpfs checks mtime of dentry too here, we don't.
- * 1. common smb servers do not update mtime on dir changes
- * 2. it requires an extra smb request
- * (revalidate has the same timeout as ctl.head.time)
- *
- * Instead smbfs invalidates its own cache on local changes
- * and remote changes are not seen until timeout.
- */
- }
-
- if (filp->f_pos > ctl.head.end)
- goto finished;
-
- ctl.fpos = filp->f_pos + (SMB_DIRCACHE_START - 2);
- ctl.ofs = ctl.fpos / SMB_DIRCACHE_SIZE;
- ctl.idx = ctl.fpos % SMB_DIRCACHE_SIZE;
-
- for (;;) {
- if (ctl.ofs != 0) {
- ctl.page = find_lock_page(&dir->i_data, ctl.ofs);
- if (!ctl.page)
- goto invalid_cache;
- ctl.cache = kmap(ctl.page);
- if (!PageUptodate(ctl.page))
- goto invalid_cache;
- }
- while (ctl.idx < SMB_DIRCACHE_SIZE) {
- struct dentry *dent;
- int res;
-
- dent = smb_dget_fpos(ctl.cache->dentry[ctl.idx],
- dentry, filp->f_pos);
- if (!dent)
- goto invalid_cache;
-
- res = filldir(dirent, dent->d_name.name,
- dent->d_name.len, filp->f_pos,
- dent->d_inode->i_ino, DT_UNKNOWN);
- dput(dent);
- if (res)
- goto finished;
- filp->f_pos += 1;
- ctl.idx += 1;
- if (filp->f_pos > ctl.head.end)
- goto finished;
- }
- if (ctl.page) {
- kunmap(ctl.page);
- SetPageUptodate(ctl.page);
- unlock_page(ctl.page);
- page_cache_release(ctl.page);
- ctl.page = NULL;
- }
- ctl.idx = 0;
- ctl.ofs += 1;
- }
-invalid_cache:
- if (ctl.page) {
- kunmap(ctl.page);
- unlock_page(ctl.page);
- page_cache_release(ctl.page);
- ctl.page = NULL;
- }
- ctl.cache = cache;
-init_cache:
- smb_invalidate_dircache_entries(dentry);
- ctl.head.time = jiffies;
- ctl.head.eof = 0;
- ctl.fpos = 2;
- ctl.ofs = 0;
- ctl.idx = SMB_DIRCACHE_START;
- ctl.filled = 0;
- ctl.valid = 1;
-read_really:
- result = server->ops->readdir(filp, dirent, filldir, &ctl);
- if (result == -ERESTARTSYS && page)
- ClearPageUptodate(page);
- if (ctl.idx == -1)
- goto invalid_cache; /* retry */
- ctl.head.end = ctl.fpos - 1;
- ctl.head.eof = ctl.valid;
-finished:
- if (page) {
- cache->head = ctl.head;
- kunmap(page);
- if (result != -ERESTARTSYS)
- SetPageUptodate(page);
- unlock_page(page);
- page_cache_release(page);
- }
- if (ctl.page) {
- kunmap(ctl.page);
- SetPageUptodate(ctl.page);
- unlock_page(ctl.page);
- page_cache_release(ctl.page);
- }
-out:
- unlock_kernel();
- return result;
-}
-
-static int
-smb_dir_open(struct inode *dir, struct file *file)
-{
- struct dentry *dentry = file->f_path.dentry;
- struct smb_sb_info *server;
- int error = 0;
-
- VERBOSE("(%s/%s)\n", dentry->d_parent->d_name.name,
- file->f_path.dentry->d_name.name);
-
- /*
- * Directory timestamps in the core protocol aren't updated
- * when a file is added, so we give them a very short TTL.
- */
- lock_kernel();
- server = server_from_dentry(dentry);
- if (server->opt.protocol < SMB_PROTOCOL_LANMAN2) {
- unsigned long age = jiffies - SMB_I(dir)->oldmtime;
- if (age > 2*HZ)
- smb_invalid_dir_cache(dir);
- }
-
- /*
- * Note: in order to allow the smbmount process to open the
- * mount point, we only revalidate if the connection is valid or
- * if the process is trying to access something other than the root.
- */
- if (server->state == CONN_VALID || !IS_ROOT(dentry))
- error = smb_revalidate_inode(dentry);
- unlock_kernel();
- return error;
-}
-
-/*
- * Dentry operations routines
- */
-static int smb_lookup_validate(struct dentry *, struct nameidata *);
-static int smb_hash_dentry(struct dentry *, struct qstr *);
-static int smb_compare_dentry(struct dentry *, struct qstr *, struct qstr *);
-static int smb_delete_dentry(struct dentry *);
-
-static const struct dentry_operations smbfs_dentry_operations =
-{
- .d_revalidate = smb_lookup_validate,
- .d_hash = smb_hash_dentry,
- .d_compare = smb_compare_dentry,
- .d_delete = smb_delete_dentry,
-};
-
-static const struct dentry_operations smbfs_dentry_operations_case =
-{
- .d_revalidate = smb_lookup_validate,
- .d_delete = smb_delete_dentry,
-};
-
-
-/*
- * This is the callback when the dcache has a lookup hit.
- */
-static int
-smb_lookup_validate(struct dentry * dentry, struct nameidata *nd)
-{
- struct smb_sb_info *server = server_from_dentry(dentry);
- struct inode * inode = dentry->d_inode;
- unsigned long age = jiffies - dentry->d_time;
- int valid;
-
- /*
- * The default validation is based on dentry age:
- * we believe in dentries for a few seconds. (But each
- * successful server lookup renews the timestamp.)
- */
- valid = (age <= SMB_MAX_AGE(server));
-#ifdef SMBFS_DEBUG_VERBOSE
- if (!valid)
- VERBOSE("%s/%s not valid, age=%lu\n",
- DENTRY_PATH(dentry), age);
-#endif
-
- if (inode) {
- lock_kernel();
- if (is_bad_inode(inode)) {
- PARANOIA("%s/%s has dud inode\n", DENTRY_PATH(dentry));
- valid = 0;
- } else if (!valid)
- valid = (smb_revalidate_inode(dentry) == 0);
- unlock_kernel();
- } else {
- /*
- * What should we do for negative dentries?
- */
- }
- return valid;
-}
-
-static int
-smb_hash_dentry(struct dentry *dir, struct qstr *this)
-{
- unsigned long hash;
- int i;
-
- hash = init_name_hash();
- for (i=0; i < this->len ; i++)
- hash = partial_name_hash(tolower(this->name[i]), hash);
- this->hash = end_name_hash(hash);
-
- return 0;
-}
-
-static int
-smb_compare_dentry(struct dentry *dir, struct qstr *a, struct qstr *b)
-{
- int i, result = 1;
-
- if (a->len != b->len)
- goto out;
- for (i=0; i < a->len; i++) {
- if (tolower(a->name[i]) != tolower(b->name[i]))
- goto out;
- }
- result = 0;
-out:
- return result;
-}
-
-/*
- * This is the callback from dput() when d_count is going to 0.
- * We use this to unhash dentries with bad inodes.
- */
-static int
-smb_delete_dentry(struct dentry * dentry)
-{
- if (dentry->d_inode) {
- if (is_bad_inode(dentry->d_inode)) {
- PARANOIA("bad inode, unhashing %s/%s\n",
- DENTRY_PATH(dentry));
- return 1;
- }
- } else {
- /* N.B. Unhash negative dentries? */
- }
- return 0;
-}
-
-/*
- * Initialize a new dentry
- */
-void
-smb_new_dentry(struct dentry *dentry)
-{
- struct smb_sb_info *server = server_from_dentry(dentry);
-
- if (server->mnt->flags & SMB_MOUNT_CASE)
- dentry->d_op = &smbfs_dentry_operations_case;
- else
- dentry->d_op = &smbfs_dentry_operations;
- dentry->d_time = jiffies;
-}
-
-
-/*
- * Whenever a lookup succeeds, we know the parent directories
- * are all valid, so we want to update the dentry timestamps.
- * N.B. Move this to dcache?
- */
-void
-smb_renew_times(struct dentry * dentry)
-{
- dget(dentry);
- dentry->d_time = jiffies;
-
- while (!IS_ROOT(dentry)) {
- struct dentry *parent = dget_parent(dentry);
- dput(dentry);
- dentry = parent;
-
- dentry->d_time = jiffies;
- }
- dput(dentry);
-}
-
-static struct dentry *
-smb_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
-{
- struct smb_fattr finfo;
- struct inode *inode;
- int error;
- struct smb_sb_info *server;
-
- error = -ENAMETOOLONG;
- if (dentry->d_name.len > SMB_MAXNAMELEN)
- goto out;
-
- /* Do not allow lookup of names with backslashes in */
- error = -EINVAL;
- if (memchr(dentry->d_name.name, '\\', dentry->d_name.len))
- goto out;
-
- lock_kernel();
- error = smb_proc_getattr(dentry, &finfo);
-#ifdef SMBFS_PARANOIA
- if (error && error != -ENOENT)
- PARANOIA("find %s/%s failed, error=%d\n",
- DENTRY_PATH(dentry), error);
-#endif
-
- inode = NULL;
- if (error == -ENOENT)
- goto add_entry;
- if (!error) {
- error = -EACCES;
- finfo.f_ino = iunique(dentry->d_sb, 2);
- inode = smb_iget(dir->i_sb, &finfo);
- if (inode) {
- add_entry:
- server = server_from_dentry(dentry);
- if (server->mnt->flags & SMB_MOUNT_CASE)
- dentry->d_op = &smbfs_dentry_operations_case;
- else
- dentry->d_op = &smbfs_dentry_operations;
-
- d_add(dentry, inode);
- smb_renew_times(dentry);
- error = 0;
- }
- }
- unlock_kernel();
-out:
- return ERR_PTR(error);
-}
-
-/*
- * This code is common to all routines creating a new inode.
- */
-static int
-smb_instantiate(struct dentry *dentry, __u16 fileid, int have_id)
-{
- struct smb_sb_info *server = server_from_dentry(dentry);
- struct inode *inode;
- int error;
- struct smb_fattr fattr;
-
- VERBOSE("file %s/%s, fileid=%u\n", DENTRY_PATH(dentry), fileid);
-
- error = smb_proc_getattr(dentry, &fattr);
- if (error)
- goto out_close;
-
- smb_renew_times(dentry);
- fattr.f_ino = iunique(dentry->d_sb, 2);
- inode = smb_iget(dentry->d_sb, &fattr);
- if (!inode)
- goto out_no_inode;
-
- if (have_id) {
- struct smb_inode_info *ei = SMB_I(inode);
- ei->fileid = fileid;
- ei->access = SMB_O_RDWR;
- ei->open = server->generation;
- }
- d_instantiate(dentry, inode);
-out:
- return error;
-
-out_no_inode:
- error = -EACCES;
-out_close:
- if (have_id) {
- PARANOIA("%s/%s failed, error=%d, closing %u\n",
- DENTRY_PATH(dentry), error, fileid);
- smb_close_fileid(dentry, fileid);
- }
- goto out;
-}
-
-/* N.B. How should the mode argument be used? */
-static int
-smb_create(struct inode *dir, struct dentry *dentry, int mode,
- struct nameidata *nd)
-{
- struct smb_sb_info *server = server_from_dentry(dentry);
- __u16 fileid;
- int error;
- struct iattr attr;
-
- VERBOSE("creating %s/%s, mode=%d\n", DENTRY_PATH(dentry), mode);
-
- lock_kernel();
- smb_invalid_dir_cache(dir);
- error = smb_proc_create(dentry, 0, get_seconds(), &fileid);
- if (!error) {
- if (server->opt.capabilities & SMB_CAP_UNIX) {
- /* Set attributes for new file */
- attr.ia_valid = ATTR_MODE;
- attr.ia_mode = mode;
- error = smb_proc_setattr_unix(dentry, &attr, 0, 0);
- }
- error = smb_instantiate(dentry, fileid, 1);
- } else {
- PARANOIA("%s/%s failed, error=%d\n",
- DENTRY_PATH(dentry), error);
- }
- unlock_kernel();
- return error;
-}
-
-/* N.B. How should the mode argument be used? */
-static int
-smb_mkdir(struct inode *dir, struct dentry *dentry, int mode)
-{
- struct smb_sb_info *server = server_from_dentry(dentry);
- int error;
- struct iattr attr;
-
- lock_kernel();
- smb_invalid_dir_cache(dir);
- error = smb_proc_mkdir(dentry);
- if (!error) {
- if (server->opt.capabilities & SMB_CAP_UNIX) {
- /* Set attributes for new directory */
- attr.ia_valid = ATTR_MODE;
- attr.ia_mode = mode;
- error = smb_proc_setattr_unix(dentry, &attr, 0, 0);
- }
- error = smb_instantiate(dentry, 0, 0);
- }
- unlock_kernel();
- return error;
-}
-
-static int
-smb_rmdir(struct inode *dir, struct dentry *dentry)
-{
- struct inode *inode = dentry->d_inode;
- int error;
-
- /*
- * Close the directory if it's open.
- */
- lock_kernel();
- smb_close(inode);
-
- /*
- * Check that nobody else is using the directory..
- */
- error = -EBUSY;
- if (!d_unhashed(dentry))
- goto out;
-
- smb_invalid_dir_cache(dir);
- error = smb_proc_rmdir(dentry);
-
-out:
- unlock_kernel();
- return error;
-}
-
-static int
-smb_unlink(struct inode *dir, struct dentry *dentry)
-{
- int error;
-
- /*
- * Close the file if it's open.
- */
- lock_kernel();
- smb_close(dentry->d_inode);
-
- smb_invalid_dir_cache(dir);
- error = smb_proc_unlink(dentry);
- if (!error)
- smb_renew_times(dentry);
- unlock_kernel();
- return error;
-}
-
-static int
-smb_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
-{
- int error;
-
- /*
- * Close any open files, and check whether to delete the
- * target before attempting the rename.
- */
- lock_kernel();
- if (old_dentry->d_inode)
- smb_close(old_dentry->d_inode);
- if (new_dentry->d_inode) {
- smb_close(new_dentry->d_inode);
- error = smb_proc_unlink(new_dentry);
- if (error) {
- VERBOSE("unlink %s/%s, error=%d\n",
- DENTRY_PATH(new_dentry), error);
- goto out;
- }
- /* FIXME */
- d_delete(new_dentry);
- }
-
- smb_invalid_dir_cache(old_dir);
- smb_invalid_dir_cache(new_dir);
- error = smb_proc_mv(old_dentry, new_dentry);
- if (!error) {
- smb_renew_times(old_dentry);
- smb_renew_times(new_dentry);
- }
-out:
- unlock_kernel();
- return error;
-}
-
-/*
- * FIXME: samba servers won't let you create device nodes unless uid/gid
- * matches the connection credentials (and we don't know which those are ...)
- */
-static int
-smb_make_node(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
-{
- int error;
- struct iattr attr;
-
- attr.ia_valid = ATTR_MODE | ATTR_UID | ATTR_GID;
- attr.ia_mode = mode;
- current_euid_egid(&attr.ia_uid, &attr.ia_gid);
-
- if (!new_valid_dev(dev))
- return -EINVAL;
-
- smb_invalid_dir_cache(dir);
- error = smb_proc_setattr_unix(dentry, &attr, MAJOR(dev), MINOR(dev));
- if (!error) {
- error = smb_instantiate(dentry, 0, 0);
- }
- return error;
-}
-
-/*
- * dentry = existing file
- * new_dentry = new file
- */
-static int
-smb_link(struct dentry *dentry, struct inode *dir, struct dentry *new_dentry)
-{
- int error;
-
- DEBUG1("smb_link old=%s/%s new=%s/%s\n",
- DENTRY_PATH(dentry), DENTRY_PATH(new_dentry));
- smb_invalid_dir_cache(dir);
- error = smb_proc_link(server_from_dentry(dentry), dentry, new_dentry);
- if (!error) {
- smb_renew_times(dentry);
- error = smb_instantiate(new_dentry, 0, 0);
- }
- return error;
-}
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
deleted file mode 100644
index 8e187a0f94bb..000000000000
--- a/fs/smbfs/file.c
+++ /dev/null
@@ -1,454 +0,0 @@
-/*
- * file.c
- *
- * Copyright (C) 1995, 1996, 1997 by Paal-Kr. Engstad and Volker Lendecke
- * Copyright (C) 1997 by Volker Lendecke
- *
- * Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/time.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/fcntl.h>
-#include <linux/stat.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/smp_lock.h>
-#include <linux/net.h>
-#include <linux/aio.h>
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
-#include <linux/smbno.h>
-#include <linux/smb_fs.h>
-
-#include "smb_debug.h"
-#include "proto.h"
-
-static int
-smb_fsync(struct file *file, int datasync)
-{
- struct dentry *dentry = file->f_path.dentry;
- struct smb_sb_info *server = server_from_dentry(dentry);
- int result;
-
- VERBOSE("sync file %s/%s\n", DENTRY_PATH(dentry));
-
- /*
- * The VFS will writepage() all dirty pages for us, but we
- * should send a SMBflush to the server, letting it know that
- * we want things synchronized with actual storage.
- *
- * Note: this function requires all pages to have been written already
- * (should be ok with writepage_sync)
- */
- result = smb_proc_flush(server, SMB_I(dentry->d_inode)->fileid);
- return result;
-}
-
-/*
- * Read a page synchronously.
- */
-static int
-smb_readpage_sync(struct dentry *dentry, struct page *page)
-{
- char *buffer = kmap(page);
- loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
- struct smb_sb_info *server = server_from_dentry(dentry);
- unsigned int rsize = smb_get_rsize(server);
- int count = PAGE_SIZE;
- int result;
-
- VERBOSE("file %s/%s, count=%d@%Ld, rsize=%d\n",
- DENTRY_PATH(dentry), count, offset, rsize);
-
- result = smb_open(dentry, SMB_O_RDONLY);
- if (result < 0)
- goto io_error;
-
- do {
- if (count < rsize)
- rsize = count;
-
- result = server->ops->read(dentry->d_inode,offset,rsize,buffer);
- if (result < 0)
- goto io_error;
-
- count -= result;
- offset += result;
- buffer += result;
- dentry->d_inode->i_atime =
- current_fs_time(dentry->d_inode->i_sb);
- if (result < rsize)
- break;
- } while (count);
-
- memset(buffer, 0, count);
- flush_dcache_page(page);
- SetPageUptodate(page);
- result = 0;
-
-io_error:
- kunmap(page);
- unlock_page(page);
- return result;
-}
-
-/*
- * We are called with the page locked and we unlock it when done.
- */
-static int
-smb_readpage(struct file *file, struct page *page)
-{
- int error;
- struct dentry *dentry = file->f_path.dentry;
-
- page_cache_get(page);
- error = smb_readpage_sync(dentry, page);
- page_cache_release(page);
- return error;
-}
-
-/*
- * Write a page synchronously.
- * Offset is the data offset within the page.
- */
-static int
-smb_writepage_sync(struct inode *inode, struct page *page,
- unsigned long pageoffset, unsigned int count)
-{
- loff_t offset;
- char *buffer = kmap(page) + pageoffset;
- struct smb_sb_info *server = server_from_inode(inode);
- unsigned int wsize = smb_get_wsize(server);
- int ret = 0;
-
- offset = ((loff_t)page->index << PAGE_CACHE_SHIFT) + pageoffset;
- VERBOSE("file ino=%ld, fileid=%d, count=%d@%Ld, wsize=%d\n",
- inode->i_ino, SMB_I(inode)->fileid, count, offset, wsize);
-
- do {
- int write_ret;
-
- if (count < wsize)
- wsize = count;
-
- write_ret = server->ops->write(inode, offset, wsize, buffer);
- if (write_ret < 0) {
- PARANOIA("failed write, wsize=%d, write_ret=%d\n",
- wsize, write_ret);
- ret = write_ret;
- break;
- }
- /* N.B. what if result < wsize?? */
-#ifdef SMBFS_PARANOIA
- if (write_ret < wsize)
- PARANOIA("short write, wsize=%d, write_ret=%d\n",
- wsize, write_ret);
-#endif
- buffer += wsize;
- offset += wsize;
- count -= wsize;
- /*
- * Update the inode now rather than waiting for a refresh.
- */
- inode->i_mtime = inode->i_atime = current_fs_time(inode->i_sb);
- SMB_I(inode)->flags |= SMB_F_LOCALWRITE;
- if (offset > inode->i_size)
- inode->i_size = offset;
- } while (count);
-
- kunmap(page);
- return ret;
-}
-
-/*
- * Write a page to the server. This will be used for NFS swapping only
- * (for now), and we currently do this synchronously only.
- *
- * We are called with the page locked and we unlock it when done.
- */
-static int
-smb_writepage(struct page *page, struct writeback_control *wbc)
-{
- struct address_space *mapping = page->mapping;
- struct inode *inode;
- unsigned long end_index;
- unsigned offset = PAGE_CACHE_SIZE;
- int err;
-
- BUG_ON(!mapping);
- inode = mapping->host;
- BUG_ON(!inode);
-
- end_index = inode->i_size >> PAGE_CACHE_SHIFT;
-
- /* easy case */
- if (page->index < end_index)
- goto do_it;
- /* things got complicated... */
- offset = inode->i_size & (PAGE_CACHE_SIZE-1);
- /* OK, are we completely out? */
- if (page->index >= end_index+1 || !offset)
- return 0; /* truncated - don't care */
-do_it:
- page_cache_get(page);
- err = smb_writepage_sync(inode, page, 0, offset);
- SetPageUptodate(page);
- unlock_page(page);
- page_cache_release(page);
- return err;
-}
-
-static int
-smb_updatepage(struct file *file, struct page *page, unsigned long offset,
- unsigned int count)
-{
- struct dentry *dentry = file->f_path.dentry;
-
- DEBUG1("(%s/%s %d@%lld)\n", DENTRY_PATH(dentry), count,
- ((unsigned long long)page->index << PAGE_CACHE_SHIFT) + offset);
-
- return smb_writepage_sync(dentry->d_inode, page, offset, count);
-}
-
-static ssize_t
-smb_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
-{
- struct file * file = iocb->ki_filp;
- struct dentry * dentry = file->f_path.dentry;
- ssize_t status;
-
- VERBOSE("file %s/%s, count=%lu@%lu\n", DENTRY_PATH(dentry),
- (unsigned long) iocb->ki_left, (unsigned long) pos);
-
- status = smb_revalidate_inode(dentry);
- if (status) {
- PARANOIA("%s/%s validation failed, error=%Zd\n",
- DENTRY_PATH(dentry), status);
- goto out;
- }
-
- VERBOSE("before read, size=%ld, flags=%x, atime=%ld\n",
- (long)dentry->d_inode->i_size,
- dentry->d_inode->i_flags, dentry->d_inode->i_atime.tv_sec);
-
- status = generic_file_aio_read(iocb, iov, nr_segs, pos);
-out:
- return status;
-}
-
-static int
-smb_file_mmap(struct file * file, struct vm_area_struct * vma)
-{
- struct dentry * dentry = file->f_path.dentry;
- int status;
-
- VERBOSE("file %s/%s, address %lu - %lu\n",
- DENTRY_PATH(dentry), vma->vm_start, vma->vm_end);
-
- status = smb_revalidate_inode(dentry);
- if (status) {
- PARANOIA("%s/%s validation failed, error=%d\n",
- DENTRY_PATH(dentry), status);
- goto out;
- }
- status = generic_file_mmap(file, vma);
-out:
- return status;
-}
-
-static ssize_t
-smb_file_splice_read(struct file *file, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t count,
- unsigned int flags)
-{
- struct dentry *dentry = file->f_path.dentry;
- ssize_t status;
-
- VERBOSE("file %s/%s, pos=%Ld, count=%lu\n",
- DENTRY_PATH(dentry), *ppos, count);
-
- status = smb_revalidate_inode(dentry);
- if (status) {
- PARANOIA("%s/%s validation failed, error=%Zd\n",
- DENTRY_PATH(dentry), status);
- goto out;
- }
- status = generic_file_splice_read(file, ppos, pipe, count, flags);
-out:
- return status;
-}
-
-/*
- * This does the "real" work of the write. The generic routine has
- * allocated the page, locked it, done all the page alignment stuff
- * calculations etc. Now we should just copy the data from user
- * space and write it back to the real medium..
- *
- * If the writer ends up delaying the write, the writer needs to
- * increment the page use counts until he is done with the page.
- */
-static int smb_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata)
-{
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
- *pagep = grab_cache_page_write_begin(mapping, index, flags);
- if (!*pagep)
- return -ENOMEM;
- return 0;
-}
-
-static int smb_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
-{
- int status;
- unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
-
- lock_kernel();
- status = smb_updatepage(file, page, offset, copied);
- unlock_kernel();
-
- if (!status) {
- if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
- SetPageUptodate(page);
- status = copied;
- }
-
- unlock_page(page);
- page_cache_release(page);
-
- return status;
-}
-
-const struct address_space_operations smb_file_aops = {
- .readpage = smb_readpage,
- .writepage = smb_writepage,
- .write_begin = smb_write_begin,
- .write_end = smb_write_end,
-};
-
-/*
- * Write to a file (through the page cache).
- */
-static ssize_t
-smb_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
-{
- struct file * file = iocb->ki_filp;
- struct dentry * dentry = file->f_path.dentry;
- ssize_t result;
-
- VERBOSE("file %s/%s, count=%lu@%lu\n",
- DENTRY_PATH(dentry),
- (unsigned long) iocb->ki_left, (unsigned long) pos);
-
- result = smb_revalidate_inode(dentry);
- if (result) {
- PARANOIA("%s/%s validation failed, error=%Zd\n",
- DENTRY_PATH(dentry), result);
- goto out;
- }
-
- result = smb_open(dentry, SMB_O_WRONLY);
- if (result)
- goto out;
-
- if (iocb->ki_left > 0) {
- result = generic_file_aio_write(iocb, iov, nr_segs, pos);
- VERBOSE("pos=%ld, size=%ld, mtime=%ld, atime=%ld\n",
- (long) file->f_pos, (long) dentry->d_inode->i_size,
- dentry->d_inode->i_mtime.tv_sec,
- dentry->d_inode->i_atime.tv_sec);
- }
-out:
- return result;
-}
-
-static int
-smb_file_open(struct inode *inode, struct file * file)
-{
- int result;
- struct dentry *dentry = file->f_path.dentry;
- int smb_mode = (file->f_mode & O_ACCMODE) - 1;
-
- lock_kernel();
- result = smb_open(dentry, smb_mode);
- if (result)
- goto out;
- SMB_I(inode)->openers++;
-out:
- unlock_kernel();
- return result;
-}
-
-static int
-smb_file_release(struct inode *inode, struct file * file)
-{
- lock_kernel();
- if (!--SMB_I(inode)->openers) {
- /* We must flush any dirty pages now as we won't be able to
- write anything after close. mmap can trigger this.
- "openers" should perhaps include mmap'ers ... */
- filemap_write_and_wait(inode->i_mapping);
- smb_close(inode);
- }
- unlock_kernel();
- return 0;
-}
-
-/*
- * Check whether the required access is compatible with
- * an inode's permission. SMB doesn't recognize superuser
- * privileges, so we need our own check for this.
- */
-static int
-smb_file_permission(struct inode *inode, int mask)
-{
- int mode = inode->i_mode;
- int error = 0;
-
- VERBOSE("mode=%x, mask=%x\n", mode, mask);
-
- /* Look at user permissions */
- mode >>= 6;
- if (mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC))
- error = -EACCES;
- return error;
-}
-
-static loff_t smb_remote_llseek(struct file *file, loff_t offset, int origin)
-{
- loff_t ret;
- lock_kernel();
- ret = generic_file_llseek_unlocked(file, offset, origin);
- unlock_kernel();
- return ret;
-}
-
-const struct file_operations smb_file_operations =
-{
- .llseek = smb_remote_llseek,
- .read = do_sync_read,
- .aio_read = smb_file_aio_read,
- .write = do_sync_write,
- .aio_write = smb_file_aio_write,
- .unlocked_ioctl = smb_ioctl,
- .mmap = smb_file_mmap,
- .open = smb_file_open,
- .release = smb_file_release,
- .fsync = smb_fsync,
- .splice_read = smb_file_splice_read,
-};
-
-const struct inode_operations smb_file_inode_operations =
-{
- .permission = smb_file_permission,
- .getattr = smb_getattr,
- .setattr = smb_notify_change,
-};
diff --git a/fs/smbfs/getopt.c b/fs/smbfs/getopt.c
deleted file mode 100644
index 7ae0f5273ab1..000000000000
--- a/fs/smbfs/getopt.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * getopt.c
- */
-
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/net.h>
-
-#include "getopt.h"
-
-/**
- * smb_getopt - option parser
- * @caller: name of the caller, for error messages
- * @options: the options string
- * @opts: an array of &struct option entries controlling parser operations
- * @optopt: output; will contain the current option
- * @optarg: output; will contain the value (if one exists)
- * @flag: output; may be NULL; should point to a long for or'ing flags
- * @value: output; may be NULL; will be overwritten with the integer value
- * of the current argument.
- *
- * Helper to parse options on the format used by mount ("a=b,c=d,e,f").
- * Returns opts->val if a matching entry in the 'opts' array is found,
- * 0 when no more tokens are found, -1 if an error is encountered.
- */
-int smb_getopt(char *caller, char **options, struct option *opts,
- char **optopt, char **optarg, unsigned long *flag,
- unsigned long *value)
-{
- char *token;
- char *val;
- int i;
-
- do {
- if ((token = strsep(options, ",")) == NULL)
- return 0;
- } while (*token == '\0');
- *optopt = token;
-
- *optarg = NULL;
- if ((val = strchr (token, '=')) != NULL) {
- *val++ = 0;
- if (value)
- *value = simple_strtoul(val, NULL, 0);
- *optarg = val;
- }
-
- for (i = 0; opts[i].name != NULL; i++) {
- if (!strcmp(opts[i].name, token)) {
- if (!opts[i].flag && (!val || !*val)) {
- printk("%s: the %s option requires an argument\n",
- caller, token);
- return -1;
- }
-
- if (flag && opts[i].flag)
- *flag |= opts[i].flag;
-
- return opts[i].val;
- }
- }
- printk("%s: Unrecognized mount option %s\n", caller, token);
- return -1;
-}
diff --git a/fs/smbfs/getopt.h b/fs/smbfs/getopt.h
deleted file mode 100644
index 146219ac7c46..000000000000
--- a/fs/smbfs/getopt.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _LINUX_GETOPT_H
-#define _LINUX_GETOPT_H
-
-struct option {
- const char *name;
- unsigned long flag;
- int val;
-};
-
-extern int smb_getopt(char *caller, char **options, struct option *opts,
- char **optopt, char **optarg, unsigned long *flag,
- unsigned long *value);
-
-#endif /* _LINUX_GETOPT_H */
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
deleted file mode 100644
index f6e9ee59757e..000000000000
--- a/fs/smbfs/inode.c
+++ /dev/null
@@ -1,843 +0,0 @@
-/*
- * inode.c
- *
- * Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
- * Copyright (C) 1997 by Volker Lendecke
- *
- * Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/module.h>
-#include <linux/time.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/file.h>
-#include <linux/dcache.h>
-#include <linux/smp_lock.h>
-#include <linux/nls.h>
-#include <linux/seq_file.h>
-#include <linux/mount.h>
-#include <linux/net.h>
-#include <linux/vfs.h>
-#include <linux/highuid.h>
-#include <linux/sched.h>
-#include <linux/smb_fs.h>
-#include <linux/smbno.h>
-#include <linux/smb_mount.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#include "smb_debug.h"
-#include "getopt.h"
-#include "proto.h"
-
-/* Always pick a default string */
-#ifdef CONFIG_SMB_NLS_REMOTE
-#define SMB_NLS_REMOTE CONFIG_SMB_NLS_REMOTE
-#else
-#define SMB_NLS_REMOTE ""
-#endif
-
-#define SMB_TTL_DEFAULT 1000
-
-static void smb_evict_inode(struct inode *);
-static void smb_put_super(struct super_block *);
-static int smb_statfs(struct dentry *, struct kstatfs *);
-static int smb_show_options(struct seq_file *, struct vfsmount *);
-
-static struct kmem_cache *smb_inode_cachep;
-
-static struct inode *smb_alloc_inode(struct super_block *sb)
-{
- struct smb_inode_info *ei;
- ei = (struct smb_inode_info *)kmem_cache_alloc(smb_inode_cachep, GFP_KERNEL);
- if (!ei)
- return NULL;
- return &ei->vfs_inode;
-}
-
-static void smb_destroy_inode(struct inode *inode)
-{
- kmem_cache_free(smb_inode_cachep, SMB_I(inode));
-}
-
-static void init_once(void *foo)
-{
- struct smb_inode_info *ei = (struct smb_inode_info *) foo;
-
- inode_init_once(&ei->vfs_inode);
-}
-
-static int init_inodecache(void)
-{
- smb_inode_cachep = kmem_cache_create("smb_inode_cache",
- sizeof(struct smb_inode_info),
- 0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
- init_once);
- if (smb_inode_cachep == NULL)
- return -ENOMEM;
- return 0;
-}
-
-static void destroy_inodecache(void)
-{
- kmem_cache_destroy(smb_inode_cachep);
-}
-
-static int smb_remount(struct super_block *sb, int *flags, char *data)
-{
- *flags |= MS_NODIRATIME;
- return 0;
-}
-
-static const struct super_operations smb_sops =
-{
- .alloc_inode = smb_alloc_inode,
- .destroy_inode = smb_destroy_inode,
- .drop_inode = generic_delete_inode,
- .evict_inode = smb_evict_inode,
- .put_super = smb_put_super,
- .statfs = smb_statfs,
- .show_options = smb_show_options,
- .remount_fs = smb_remount,
-};
-
-
-/* We are always generating a new inode here */
-struct inode *
-smb_iget(struct super_block *sb, struct smb_fattr *fattr)
-{
- struct smb_sb_info *server = SMB_SB(sb);
- struct inode *result;
-
- DEBUG1("smb_iget: %p\n", fattr);
-
- result = new_inode(sb);
- if (!result)
- return result;
- result->i_ino = fattr->f_ino;
- SMB_I(result)->open = 0;
- SMB_I(result)->fileid = 0;
- SMB_I(result)->access = 0;
- SMB_I(result)->flags = 0;
- SMB_I(result)->closed = 0;
- SMB_I(result)->openers = 0;
- smb_set_inode_attr(result, fattr);
- if (S_ISREG(result->i_mode)) {
- result->i_op = &smb_file_inode_operations;
- result->i_fop = &smb_file_operations;
- result->i_data.a_ops = &smb_file_aops;
- } else if (S_ISDIR(result->i_mode)) {
- if (server->opt.capabilities & SMB_CAP_UNIX)
- result->i_op = &smb_dir_inode_operations_unix;
- else
- result->i_op = &smb_dir_inode_operations;
- result->i_fop = &smb_dir_operations;
- } else if (S_ISLNK(result->i_mode)) {
- result->i_op = &smb_link_inode_operations;
- } else {
- init_special_inode(result, result->i_mode, fattr->f_rdev);
- }
- insert_inode_hash(result);
- return result;
-}
-
-/*
- * Copy the inode data to a smb_fattr structure.
- */
-void
-smb_get_inode_attr(struct inode *inode, struct smb_fattr *fattr)
-{
- memset(fattr, 0, sizeof(struct smb_fattr));
- fattr->f_mode = inode->i_mode;
- fattr->f_nlink = inode->i_nlink;
- fattr->f_ino = inode->i_ino;
- fattr->f_uid = inode->i_uid;
- fattr->f_gid = inode->i_gid;
- fattr->f_size = inode->i_size;
- fattr->f_mtime = inode->i_mtime;
- fattr->f_ctime = inode->i_ctime;
- fattr->f_atime = inode->i_atime;
- fattr->f_blocks = inode->i_blocks;
-
- fattr->attr = SMB_I(inode)->attr;
- /*
- * Keep the attributes in sync with the inode permissions.
- */
- if (fattr->f_mode & S_IWUSR)
- fattr->attr &= ~aRONLY;
- else
- fattr->attr |= aRONLY;
-}
-
-/*
- * Update the inode, possibly causing it to invalidate its pages if mtime/size
- * is different from last time.
- */
-void
-smb_set_inode_attr(struct inode *inode, struct smb_fattr *fattr)
-{
- struct smb_inode_info *ei = SMB_I(inode);
-
- /*
- * A size change should have a different mtime, or same mtime
- * but different size.
- */
- time_t last_time = inode->i_mtime.tv_sec;
- loff_t last_sz = inode->i_size;
-
- inode->i_mode = fattr->f_mode;
- inode->i_nlink = fattr->f_nlink;
- inode->i_uid = fattr->f_uid;
- inode->i_gid = fattr->f_gid;
- inode->i_ctime = fattr->f_ctime;
- inode->i_blocks = fattr->f_blocks;
- inode->i_size = fattr->f_size;
- inode->i_mtime = fattr->f_mtime;
- inode->i_atime = fattr->f_atime;
- ei->attr = fattr->attr;
-
- /*
- * Update the "last time refreshed" field for revalidation.
- */
- ei->oldmtime = jiffies;
-
- if (inode->i_mtime.tv_sec != last_time || inode->i_size != last_sz) {
- VERBOSE("%ld changed, old=%ld, new=%ld, oz=%ld, nz=%ld\n",
- inode->i_ino,
- (long) last_time, (long) inode->i_mtime.tv_sec,
- (long) last_sz, (long) inode->i_size);
-
- if (!S_ISDIR(inode->i_mode))
- invalidate_remote_inode(inode);
- }
-}
-
-/*
- * This is called if the connection has gone bad ...
- * try to kill off all the current inodes.
- */
-void
-smb_invalidate_inodes(struct smb_sb_info *server)
-{
- VERBOSE("\n");
- shrink_dcache_sb(SB_of(server));
-}
-
-/*
- * This is called to update the inode attributes after
- * we've made changes to a file or directory.
- */
-static int
-smb_refresh_inode(struct dentry *dentry)
-{
- struct inode *inode = dentry->d_inode;
- int error;
- struct smb_fattr fattr;
-
- error = smb_proc_getattr(dentry, &fattr);
- if (!error) {
- smb_renew_times(dentry);
- /*
- * Check whether the type part of the mode changed,
- * and don't update the attributes if it did.
- *
- * And don't dick with the root inode
- */
- if (inode->i_ino == 2)
- return error;
- if (S_ISLNK(inode->i_mode))
- return error; /* VFS will deal with it */
-
- if ((inode->i_mode & S_IFMT) == (fattr.f_mode & S_IFMT)) {
- smb_set_inode_attr(inode, &fattr);
- } else {
- /*
- * Big trouble! The inode has become a new object,
- * so any operations attempted on it are invalid.
- *
- * To limit damage, mark the inode as bad so that
- * subsequent lookup validations will fail.
- */
- PARANOIA("%s/%s changed mode, %07o to %07o\n",
- DENTRY_PATH(dentry),
- inode->i_mode, fattr.f_mode);
-
- fattr.f_mode = inode->i_mode; /* save mode */
- make_bad_inode(inode);
- inode->i_mode = fattr.f_mode; /* restore mode */
- /*
- * No need to worry about unhashing the dentry: the
- * lookup validation will see that the inode is bad.
- * But we do want to invalidate the caches ...
- */
- if (!S_ISDIR(inode->i_mode))
- invalidate_remote_inode(inode);
- else
- smb_invalid_dir_cache(inode);
- error = -EIO;
- }
- }
- return error;
-}
-
-/*
- * This is called when we want to check whether the inode
- * has changed on the server. If it has changed, we must
- * invalidate our local caches.
- */
-int
-smb_revalidate_inode(struct dentry *dentry)
-{
- struct smb_sb_info *s = server_from_dentry(dentry);
- struct inode *inode = dentry->d_inode;
- int error = 0;
-
- DEBUG1("smb_revalidate_inode\n");
- lock_kernel();
-
- /*
- * Check whether we've recently refreshed the inode.
- */
- if (time_before(jiffies, SMB_I(inode)->oldmtime + SMB_MAX_AGE(s))) {
- VERBOSE("up-to-date, ino=%ld, jiffies=%lu, oldtime=%lu\n",
- inode->i_ino, jiffies, SMB_I(inode)->oldmtime);
- goto out;
- }
-
- error = smb_refresh_inode(dentry);
-out:
- unlock_kernel();
- return error;
-}
-
-/*
- * This routine is called when i_nlink == 0 and i_count goes to 0.
- * All blocking cleanup operations need to go here to avoid races.
- */
-static void
-smb_evict_inode(struct inode *ino)
-{
- DEBUG1("ino=%ld\n", ino->i_ino);
- truncate_inode_pages(&ino->i_data, 0);
- end_writeback(ino);
- lock_kernel();
- if (smb_close(ino))
- PARANOIA("could not close inode %ld\n", ino->i_ino);
- unlock_kernel();
-}
-
-static struct option opts[] = {
- { "version", 0, 'v' },
- { "win95", SMB_MOUNT_WIN95, 1 },
- { "oldattr", SMB_MOUNT_OLDATTR, 1 },
- { "dirattr", SMB_MOUNT_DIRATTR, 1 },
- { "case", SMB_MOUNT_CASE, 1 },
- { "uid", 0, 'u' },
- { "gid", 0, 'g' },
- { "file_mode", 0, 'f' },
- { "dir_mode", 0, 'd' },
- { "iocharset", 0, 'i' },
- { "codepage", 0, 'c' },
- { "ttl", 0, 't' },
- { NULL, 0, 0}
-};
-
-static int
-parse_options(struct smb_mount_data_kernel *mnt, char *options)
-{
- int c;
- unsigned long flags;
- unsigned long value;
- char *optarg;
- char *optopt;
-
- flags = 0;
- while ( (c = smb_getopt("smbfs", &options, opts,
- &optopt, &optarg, &flags, &value)) > 0) {
-
- VERBOSE("'%s' -> '%s'\n", optopt, optarg ? optarg : "<none>");
- switch (c) {
- case 1:
- /* got a "flag" option */
- break;
- case 'v':
- if (value != SMB_MOUNT_VERSION) {
- printk ("smbfs: Bad mount version %ld, expected %d\n",
- value, SMB_MOUNT_VERSION);
- return 0;
- }
- mnt->version = value;
- break;
- case 'u':
- mnt->uid = value;
- flags |= SMB_MOUNT_UID;
- break;
- case 'g':
- mnt->gid = value;
- flags |= SMB_MOUNT_GID;
- break;
- case 'f':
- mnt->file_mode = (value & S_IRWXUGO) | S_IFREG;
- flags |= SMB_MOUNT_FMODE;
- break;
- case 'd':
- mnt->dir_mode = (value & S_IRWXUGO) | S_IFDIR;
- flags |= SMB_MOUNT_DMODE;
- break;
- case 'i':
- strlcpy(mnt->codepage.local_name, optarg,
- SMB_NLS_MAXNAMELEN);
- break;
- case 'c':
- strlcpy(mnt->codepage.remote_name, optarg,
- SMB_NLS_MAXNAMELEN);
- break;
- case 't':
- mnt->ttl = value;
- break;
- default:
- printk ("smbfs: Unrecognized mount option %s\n",
- optopt);
- return -1;
- }
- }
- mnt->flags = flags;
- return c;
-}
-
-/*
- * smb_show_options() is for displaying mount options in /proc/mounts.
- * It tries to avoid showing settings that were not changed from their
- * defaults.
- */
-static int
-smb_show_options(struct seq_file *s, struct vfsmount *m)
-{
- struct smb_mount_data_kernel *mnt = SMB_SB(m->mnt_sb)->mnt;
- int i;
-
- for (i = 0; opts[i].name != NULL; i++)
- if (mnt->flags & opts[i].flag)
- seq_printf(s, ",%s", opts[i].name);
-
- if (mnt->flags & SMB_MOUNT_UID)
- seq_printf(s, ",uid=%d", mnt->uid);
- if (mnt->flags & SMB_MOUNT_GID)
- seq_printf(s, ",gid=%d", mnt->gid);
- if (mnt->mounted_uid != 0)
- seq_printf(s, ",mounted_uid=%d", mnt->mounted_uid);
-
- /*
- * Defaults for file_mode and dir_mode are unknown to us; they
- * depend on the current umask of the user doing the mount.
- */
- if (mnt->flags & SMB_MOUNT_FMODE)
- seq_printf(s, ",file_mode=%04o", mnt->file_mode & S_IRWXUGO);
- if (mnt->flags & SMB_MOUNT_DMODE)
- seq_printf(s, ",dir_mode=%04o", mnt->dir_mode & S_IRWXUGO);
-
- if (strcmp(mnt->codepage.local_name, CONFIG_NLS_DEFAULT))
- seq_printf(s, ",iocharset=%s", mnt->codepage.local_name);
- if (strcmp(mnt->codepage.remote_name, SMB_NLS_REMOTE))
- seq_printf(s, ",codepage=%s", mnt->codepage.remote_name);
-
- if (mnt->ttl != SMB_TTL_DEFAULT)
- seq_printf(s, ",ttl=%d", mnt->ttl);
-
- return 0;
-}
-
-static void
-smb_unload_nls(struct smb_sb_info *server)
-{
- unload_nls(server->remote_nls);
- unload_nls(server->local_nls);
-}
-
-static void
-smb_put_super(struct super_block *sb)
-{
- struct smb_sb_info *server = SMB_SB(sb);
-
- lock_kernel();
-
- smb_lock_server(server);
- server->state = CONN_INVALID;
- smbiod_unregister_server(server);
-
- smb_close_socket(server);
-
- if (server->conn_pid)
- kill_pid(server->conn_pid, SIGTERM, 1);
-
- bdi_destroy(&server->bdi);
- kfree(server->ops);
- smb_unload_nls(server);
- sb->s_fs_info = NULL;
- smb_unlock_server(server);
- put_pid(server->conn_pid);
- kfree(server);
-
- unlock_kernel();
-}
-
-static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
-{
- struct smb_sb_info *server;
- struct smb_mount_data_kernel *mnt;
- struct smb_mount_data *oldmnt;
- struct inode *root_inode;
- struct smb_fattr root;
- int ver;
- void *mem;
- static int warn_count;
-
- lock_kernel();
-
- if (warn_count < 5) {
- warn_count++;
- printk(KERN_EMERG "smbfs is deprecated and will be removed"
- " from the 2.6.27 kernel. Please migrate to cifs\n");
- }
-
- if (!raw_data)
- goto out_no_data;
-
- oldmnt = (struct smb_mount_data *) raw_data;
- ver = oldmnt->version;
- if (ver != SMB_MOUNT_OLDVERSION && cpu_to_be32(ver) != SMB_MOUNT_ASCII)
- goto out_wrong_data;
-
- sb->s_flags |= MS_NODIRATIME;
- sb->s_blocksize = 1024; /* Eh... Is this correct? */
- sb->s_blocksize_bits = 10;
- sb->s_magic = SMB_SUPER_MAGIC;
- sb->s_op = &smb_sops;
- sb->s_time_gran = 100;
-
- server = kzalloc(sizeof(struct smb_sb_info), GFP_KERNEL);
- if (!server)
- goto out_no_server;
- sb->s_fs_info = server;
-
- if (bdi_setup_and_register(&server->bdi, "smbfs", BDI_CAP_MAP_COPY))
- goto out_bdi;
-
- sb->s_bdi = &server->bdi;
-
- server->super_block = sb;
- server->mnt = NULL;
- server->sock_file = NULL;
- init_waitqueue_head(&server->conn_wq);
- init_MUTEX(&server->sem);
- INIT_LIST_HEAD(&server->entry);
- INIT_LIST_HEAD(&server->xmitq);
- INIT_LIST_HEAD(&server->recvq);
- server->conn_error = 0;
- server->conn_pid = NULL;
- server->state = CONN_INVALID; /* no connection yet */
- server->generation = 0;
-
- /* Allocate the global temp buffer and some superblock helper structs */
- /* FIXME: move these to the smb_sb_info struct */
- VERBOSE("alloc chunk = %lu\n", sizeof(struct smb_ops) +
- sizeof(struct smb_mount_data_kernel));
- mem = kmalloc(sizeof(struct smb_ops) +
- sizeof(struct smb_mount_data_kernel), GFP_KERNEL);
- if (!mem)
- goto out_no_mem;
-
- server->ops = mem;
- smb_install_null_ops(server->ops);
- server->mnt = mem + sizeof(struct smb_ops);
-
- /* Setup NLS stuff */
- server->remote_nls = NULL;
- server->local_nls = NULL;
-
- mnt = server->mnt;
-
- memset(mnt, 0, sizeof(struct smb_mount_data_kernel));
- strlcpy(mnt->codepage.local_name, CONFIG_NLS_DEFAULT,
- SMB_NLS_MAXNAMELEN);
- strlcpy(mnt->codepage.remote_name, SMB_NLS_REMOTE,
- SMB_NLS_MAXNAMELEN);
-
- mnt->ttl = SMB_TTL_DEFAULT;
- if (ver == SMB_MOUNT_OLDVERSION) {
- mnt->version = oldmnt->version;
-
- SET_UID(mnt->uid, oldmnt->uid);
- SET_GID(mnt->gid, oldmnt->gid);
-
- mnt->file_mode = (oldmnt->file_mode & S_IRWXUGO) | S_IFREG;
- mnt->dir_mode = (oldmnt->dir_mode & S_IRWXUGO) | S_IFDIR;
-
- mnt->flags = (oldmnt->file_mode >> 9) | SMB_MOUNT_UID |
- SMB_MOUNT_GID | SMB_MOUNT_FMODE | SMB_MOUNT_DMODE;
- } else {
- mnt->file_mode = S_IRWXU | S_IRGRP | S_IXGRP |
- S_IROTH | S_IXOTH | S_IFREG;
- mnt->dir_mode = S_IRWXU | S_IRGRP | S_IXGRP |
- S_IROTH | S_IXOTH | S_IFDIR;
- if (parse_options(mnt, raw_data))
- goto out_bad_option;
- }
- mnt->mounted_uid = current_uid();
- smb_setcodepage(server, &mnt->codepage);
-
- /*
- * Display the enabled options
- * Note: smb_proc_getattr uses these in 2.4 (but was changed in 2.2)
- */
- if (mnt->flags & SMB_MOUNT_OLDATTR)
- printk("SMBFS: Using core getattr (Win 95 speedup)\n");
- else if (mnt->flags & SMB_MOUNT_DIRATTR)
- printk("SMBFS: Using dir ff getattr\n");
-
- if (smbiod_register_server(server) < 0) {
- printk(KERN_ERR "smbfs: failed to start smbiod\n");
- goto out_no_smbiod;
- }
-
- /*
- * Keep the super block locked while we get the root inode.
- */
- smb_init_root_dirent(server, &root, sb);
- root_inode = smb_iget(sb, &root);
- if (!root_inode)
- goto out_no_root;
-
- sb->s_root = d_alloc_root(root_inode);
- if (!sb->s_root)
- goto out_no_root;
-
- smb_new_dentry(sb->s_root);
-
- unlock_kernel();
- return 0;
-
-out_no_root:
- iput(root_inode);
-out_no_smbiod:
- smb_unload_nls(server);
-out_bad_option:
- kfree(mem);
-out_no_mem:
- bdi_destroy(&server->bdi);
-out_bdi:
- if (!server->mnt)
- printk(KERN_ERR "smb_fill_super: allocation failure\n");
- sb->s_fs_info = NULL;
- kfree(server);
- goto out_fail;
-out_wrong_data:
- printk(KERN_ERR "smbfs: mount_data version %d is not supported\n", ver);
- goto out_fail;
-out_no_data:
- printk(KERN_ERR "smb_fill_super: missing data argument\n");
-out_fail:
- unlock_kernel();
- return -EINVAL;
-out_no_server:
- printk(KERN_ERR "smb_fill_super: cannot allocate struct smb_sb_info\n");
- unlock_kernel();
- return -ENOMEM;
-}
-
-static int
-smb_statfs(struct dentry *dentry, struct kstatfs *buf)
-{
- int result;
-
- lock_kernel();
-
- result = smb_proc_dskattr(dentry, buf);
-
- unlock_kernel();
-
- buf->f_type = SMB_SUPER_MAGIC;
- buf->f_namelen = SMB_MAXPATHLEN;
- return result;
-}
-
-int smb_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
-{
- int err = smb_revalidate_inode(dentry);
- if (!err)
- generic_fillattr(dentry->d_inode, stat);
- return err;
-}
-
-int
-smb_notify_change(struct dentry *dentry, struct iattr *attr)
-{
- struct inode *inode = dentry->d_inode;
- struct smb_sb_info *server = server_from_dentry(dentry);
- unsigned int mask = (S_IFREG | S_IFDIR | S_IRWXUGO);
- int error, changed, refresh = 0;
- struct smb_fattr fattr;
-
- lock_kernel();
-
- error = smb_revalidate_inode(dentry);
- if (error)
- goto out;
-
- if ((error = inode_change_ok(inode, attr)) < 0)
- goto out;
-
- error = -EPERM;
- if ((attr->ia_valid & ATTR_UID) && (attr->ia_uid != server->mnt->uid))
- goto out;
-
- if ((attr->ia_valid & ATTR_GID) && (attr->ia_uid != server->mnt->gid))
- goto out;
-
- if ((attr->ia_valid & ATTR_MODE) && (attr->ia_mode & ~mask))
- goto out;
-
- if ((attr->ia_valid & ATTR_SIZE) != 0) {
- VERBOSE("changing %s/%s, old size=%ld, new size=%ld\n",
- DENTRY_PATH(dentry),
- (long) inode->i_size, (long) attr->ia_size);
-
- filemap_write_and_wait(inode->i_mapping);
-
- error = smb_open(dentry, O_WRONLY);
- if (error)
- goto out;
- error = server->ops->truncate(inode, attr->ia_size);
- if (error)
- goto out;
- truncate_setsize(inode, attr->ia_size);
- refresh = 1;
- }
-
- if (server->opt.capabilities & SMB_CAP_UNIX) {
- /* For now we don't want to set the size with setattr_unix */
- attr->ia_valid &= ~ATTR_SIZE;
- /* FIXME: only call if we actually want to set something? */
- error = smb_proc_setattr_unix(dentry, attr, 0, 0);
- if (!error)
- refresh = 1;
-
- goto out;
- }
-
- /*
- * Initialize the fattr and check for changed fields.
- * Note: CTIME under SMB is creation time rather than
- * change time, so we don't attempt to change it.
- */
- smb_get_inode_attr(inode, &fattr);
-
- changed = 0;
- if ((attr->ia_valid & ATTR_MTIME) != 0) {
- fattr.f_mtime = attr->ia_mtime;
- changed = 1;
- }
- if ((attr->ia_valid & ATTR_ATIME) != 0) {
- fattr.f_atime = attr->ia_atime;
- /* Earlier protocols don't have an access time */
- if (server->opt.protocol >= SMB_PROTOCOL_LANMAN2)
- changed = 1;
- }
- if (changed) {
- error = smb_proc_settime(dentry, &fattr);
- if (error)
- goto out;
- refresh = 1;
- }
-
- /*
- * Check for mode changes ... we're extremely limited in
- * what can be set for SMB servers: just the read-only bit.
- */
- if ((attr->ia_valid & ATTR_MODE) != 0) {
- VERBOSE("%s/%s mode change, old=%x, new=%x\n",
- DENTRY_PATH(dentry), fattr.f_mode, attr->ia_mode);
- changed = 0;
- if (attr->ia_mode & S_IWUSR) {
- if (fattr.attr & aRONLY) {
- fattr.attr &= ~aRONLY;
- changed = 1;
- }
- } else {
- if (!(fattr.attr & aRONLY)) {
- fattr.attr |= aRONLY;
- changed = 1;
- }
- }
- if (changed) {
- error = smb_proc_setattr(dentry, &fattr);
- if (error)
- goto out;
- refresh = 1;
- }
- }
- error = 0;
-
-out:
- if (refresh)
- smb_refresh_inode(dentry);
- unlock_kernel();
- return error;
-}
-
-static int smb_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
-{
- return get_sb_nodev(fs_type, flags, data, smb_fill_super, mnt);
-}
-
-static struct file_system_type smb_fs_type = {
- .owner = THIS_MODULE,
- .name = "smbfs",
- .get_sb = smb_get_sb,
- .kill_sb = kill_anon_super,
- .fs_flags = FS_BINARY_MOUNTDATA,
-};
-
-static int __init init_smb_fs(void)
-{
- int err;
- DEBUG1("registering ...\n");
-
- err = init_inodecache();
- if (err)
- goto out_inode;
- err = smb_init_request_cache();
- if (err)
- goto out_request;
- err = register_filesystem(&smb_fs_type);
- if (err)
- goto out;
- return 0;
-out:
- smb_destroy_request_cache();
-out_request:
- destroy_inodecache();
-out_inode:
- return err;
-}
-
-static void __exit exit_smb_fs(void)
-{
- DEBUG1("unregistering ...\n");
- unregister_filesystem(&smb_fs_type);
- smb_destroy_request_cache();
- destroy_inodecache();
-}
-
-module_init(init_smb_fs)
-module_exit(exit_smb_fs)
-MODULE_LICENSE("GPL");
diff --git a/fs/smbfs/ioctl.c b/fs/smbfs/ioctl.c
deleted file mode 100644
index 07215312ad39..000000000000
--- a/fs/smbfs/ioctl.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * ioctl.c
- *
- * Copyright (C) 1995, 1996 by Volker Lendecke
- * Copyright (C) 1997 by Volker Lendecke
- *
- * Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/ioctl.h>
-#include <linux/time.h>
-#include <linux/mm.h>
-#include <linux/highuid.h>
-#include <linux/smp_lock.h>
-#include <linux/net.h>
-
-#include <linux/smb_fs.h>
-#include <linux/smb_mount.h>
-
-#include <asm/uaccess.h>
-
-#include "proto.h"
-
-long
-smb_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
-{
- struct smb_sb_info *server = server_from_inode(filp->f_path.dentry->d_inode);
- struct smb_conn_opt opt;
- int result = -EINVAL;
-
- lock_kernel();
- switch (cmd) {
- uid16_t uid16;
- uid_t uid32;
- case SMB_IOC_GETMOUNTUID:
- SET_UID(uid16, server->mnt->mounted_uid);
- result = put_user(uid16, (uid16_t __user *) arg);
- break;
- case SMB_IOC_GETMOUNTUID32:
- SET_UID(uid32, server->mnt->mounted_uid);
- result = put_user(uid32, (uid_t __user *) arg);
- break;
-
- case SMB_IOC_NEWCONN:
- /* arg is smb_conn_opt, or NULL if no connection was made */
- if (!arg) {
- result = 0;
- smb_lock_server(server);
- server->state = CONN_RETRIED;
- printk(KERN_ERR "Connection attempt failed! [%d]\n",
- server->conn_error);
- smbiod_flush(server);
- smb_unlock_server(server);
- break;
- }
-
- result = -EFAULT;
- if (!copy_from_user(&opt, (void __user *)arg, sizeof(opt)))
- result = smb_newconn(server, &opt);
- break;
- default:
- break;
- }
- unlock_kernel();
-
- return result;
-}
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
deleted file mode 100644
index 3dcf638d4d3a..000000000000
--- a/fs/smbfs/proc.c
+++ /dev/null
@@ -1,3503 +0,0 @@
-/*
- * proc.c
- *
- * Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
- * Copyright (C) 1997 by Volker Lendecke
- *
- * Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/types.h>
-#include <linux/capability.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include <linux/dcache.h>
-#include <linux/nls.h>
-#include <linux/smp_lock.h>
-#include <linux/net.h>
-#include <linux/vfs.h>
-#include <linux/smb_fs.h>
-#include <linux/smbno.h>
-#include <linux/smb_mount.h>
-
-#include <net/sock.h>
-
-#include <asm/string.h>
-#include <asm/div64.h>
-
-#include "smb_debug.h"
-#include "proto.h"
-#include "request.h"
-
-
-/* Features. Undefine if they cause problems, this should perhaps be a
- config option. */
-#define SMBFS_POSIX_UNLINK 1
-
-/* Allow smb_retry to be interrupted. */
-#define SMB_RETRY_INTR
-
-#define SMB_VWV(packet) ((packet) + SMB_HEADER_LEN)
-#define SMB_CMD(packet) (*(packet+8))
-#define SMB_WCT(packet) (*(packet+SMB_HEADER_LEN - 1))
-
-#define SMB_DIRINFO_SIZE 43
-#define SMB_STATUS_SIZE 21
-
-#define SMB_ST_BLKSIZE (PAGE_SIZE)
-#define SMB_ST_BLKSHIFT (PAGE_SHIFT)
-
-static struct smb_ops smb_ops_core;
-static struct smb_ops smb_ops_os2;
-static struct smb_ops smb_ops_win95;
-static struct smb_ops smb_ops_winNT;
-static struct smb_ops smb_ops_unix;
-static struct smb_ops smb_ops_null;
-
-static void
-smb_init_dirent(struct smb_sb_info *server, struct smb_fattr *fattr);
-static void
-smb_finish_dirent(struct smb_sb_info *server, struct smb_fattr *fattr);
-static int
-smb_proc_getattr_core(struct smb_sb_info *server, struct dentry *dir,
- struct smb_fattr *fattr);
-static int
-smb_proc_getattr_ff(struct smb_sb_info *server, struct dentry *dentry,
- struct smb_fattr *fattr);
-static int
-smb_proc_setattr_core(struct smb_sb_info *server, struct dentry *dentry,
- u16 attr);
-static int
-smb_proc_setattr_ext(struct smb_sb_info *server,
- struct inode *inode, struct smb_fattr *fattr);
-static int
-smb_proc_query_cifsunix(struct smb_sb_info *server);
-static void
-install_ops(struct smb_ops *dst, struct smb_ops *src);
-
-
-static void
-str_upper(char *name, int len)
-{
- while (len--)
- {
- if (*name >= 'a' && *name <= 'z')
- *name -= ('a' - 'A');
- name++;
- }
-}
-
-#if 0
-static void
-str_lower(char *name, int len)
-{
- while (len--)
- {
- if (*name >= 'A' && *name <= 'Z')
- *name += ('a' - 'A');
- name++;
- }
-}
-#endif
-
-/* reverse a string inline. This is used by the dircache walking routines */
-static void reverse_string(char *buf, int len)
-{
- char c;
- char *end = buf+len-1;
-
- while(buf < end) {
- c = *buf;
- *(buf++) = *end;
- *(end--) = c;
- }
-}
-
-/* no conversion, just a wrapper for memcpy. */
-static int convert_memcpy(unsigned char *output, int olen,
- const unsigned char *input, int ilen,
- struct nls_table *nls_from,
- struct nls_table *nls_to)
-{
- if (olen < ilen)
- return -ENAMETOOLONG;
- memcpy(output, input, ilen);
- return ilen;
-}
-
-static inline int write_char(unsigned char ch, char *output, int olen)
-{
- if (olen < 4)
- return -ENAMETOOLONG;
- sprintf(output, ":x%02x", ch);
- return 4;
-}
-
-static inline int write_unichar(wchar_t ch, char *output, int olen)
-{
- if (olen < 5)
- return -ENAMETOOLONG;
- sprintf(output, ":%04x", ch);
- return 5;
-}
-
-/* convert from one "codepage" to another (possibly being utf8). */
-static int convert_cp(unsigned char *output, int olen,
- const unsigned char *input, int ilen,
- struct nls_table *nls_from,
- struct nls_table *nls_to)
-{
- int len = 0;
- int n;
- wchar_t ch;
-
- while (ilen > 0) {
- /* convert by changing to unicode and back to the new cp */
- n = nls_from->char2uni(input, ilen, &ch);
- if (n == -EINVAL) {
- ilen--;
- n = write_char(*input++, output, olen);
- if (n < 0)
- goto fail;
- output += n;
- olen -= n;
- len += n;
- continue;
- } else if (n < 0)
- goto fail;
- input += n;
- ilen -= n;
-
- n = nls_to->uni2char(ch, output, olen);
- if (n == -EINVAL)
- n = write_unichar(ch, output, olen);
- if (n < 0)
- goto fail;
- output += n;
- olen -= n;
-
- len += n;
- }
- return len;
-fail:
- return n;
-}
-
-/* ----------------------------------------------------------- */
-
-/*
- * nls_unicode
- *
- * This encodes/decodes little endian unicode format
- */
-
-static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
-{
- if (boundlen < 2)
- return -EINVAL;
- *out++ = uni & 0xff;
- *out++ = uni >> 8;
- return 2;
-}
-
-static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni)
-{
- if (boundlen < 2)
- return -EINVAL;
- *uni = (rawstring[1] << 8) | rawstring[0];
- return 2;
-}
-
-static struct nls_table unicode_table = {
- .charset = "unicode",
- .uni2char = uni2char,
- .char2uni = char2uni,
-};
-
-/* ----------------------------------------------------------- */
-
-static int setcodepage(struct nls_table **p, char *name)
-{
- struct nls_table *nls;
-
- if (!name || !*name) {
- nls = NULL;
- } else if ( (nls = load_nls(name)) == NULL) {
- printk (KERN_ERR "smbfs: failed to load nls '%s'\n", name);
- return -EINVAL;
- }
-
- /* if already set, unload the previous one. */
- if (*p && *p != &unicode_table)
- unload_nls(*p);
- *p = nls;
-
- return 0;
-}
-
-/* Handles all changes to codepage settings. */
-int smb_setcodepage(struct smb_sb_info *server, struct smb_nls_codepage *cp)
-{
- int n = 0;
-
- smb_lock_server(server);
-
- /* Don't load any nls_* at all, if no remote is requested */
- if (!*cp->remote_name)
- goto out;
-
- /* local */
- n = setcodepage(&server->local_nls, cp->local_name);
- if (n != 0)
- goto out;
-
- /* remote */
- if (!strcmp(cp->remote_name, "unicode")) {
- server->remote_nls = &unicode_table;
- } else {
- n = setcodepage(&server->remote_nls, cp->remote_name);
- if (n != 0)
- setcodepage(&server->local_nls, NULL);
- }
-
-out:
- if (server->local_nls != NULL && server->remote_nls != NULL)
- server->ops->convert = convert_cp;
- else
- server->ops->convert = convert_memcpy;
-
- smb_unlock_server(server);
- return n;
-}
-
-
-/*****************************************************************************/
-/* */
-/* Encoding/Decoding section */
-/* */
-/*****************************************************************************/
-
-static __u8 *
-smb_encode_smb_length(__u8 * p, __u32 len)
-{
- *p = 0;
- *(p+1) = 0;
- *(p+2) = (len & 0xFF00) >> 8;
- *(p+3) = (len & 0xFF);
- if (len > 0xFFFF)
- {
- *(p+1) = 1;
- }
- return p + 4;
-}
-
-/*
- * smb_build_path: build the path to entry and name storing it in buf.
- * The path returned will have the trailing '\0'.
- */
-static int smb_build_path(struct smb_sb_info *server, unsigned char *buf,
- int maxlen,
- struct dentry *entry, struct qstr *name)
-{
- unsigned char *path = buf;
- int len;
- int unicode = (server->mnt->flags & SMB_MOUNT_UNICODE) != 0;
-
- if (maxlen < (2<<unicode))
- return -ENAMETOOLONG;
-
- if (maxlen > SMB_MAXPATHLEN + 1)
- maxlen = SMB_MAXPATHLEN + 1;
-
- if (entry == NULL)
- goto test_name_and_out;
-
- /*
- * If IS_ROOT, we have to do no walking at all.
- */
- if (IS_ROOT(entry) && !name) {
- *path++ = '\\';
- if (unicode) *path++ = '\0';
- *path++ = '\0';
- if (unicode) *path++ = '\0';
- return path-buf;
- }
-
- /*
- * Build the path string walking the tree backward from end to ROOT
- * and store it in reversed order [see reverse_string()]
- */
- dget(entry);
- while (!IS_ROOT(entry)) {
- struct dentry *parent;
-
- if (maxlen < (3<<unicode)) {
- dput(entry);
- return -ENAMETOOLONG;
- }
-
- spin_lock(&entry->d_lock);
- len = server->ops->convert(path, maxlen-2,
- entry->d_name.name, entry->d_name.len,
- server->local_nls, server->remote_nls);
- if (len < 0) {
- spin_unlock(&entry->d_lock);
- dput(entry);
- return len;
- }
- reverse_string(path, len);
- path += len;
- if (unicode) {
- /* Note: reverse order */
- *path++ = '\0';
- maxlen--;
- }
- *path++ = '\\';
- maxlen -= len+1;
- spin_unlock(&entry->d_lock);
-
- parent = dget_parent(entry);
- dput(entry);
- entry = parent;
- }
- dput(entry);
- reverse_string(buf, path-buf);
-
- /* maxlen has space for at least one char */
-test_name_and_out:
- if (name) {
- if (maxlen < (3<<unicode))
- return -ENAMETOOLONG;
- *path++ = '\\';
- if (unicode) {
- *path++ = '\0';
- maxlen--;
- }
- len = server->ops->convert(path, maxlen-2,
- name->name, name->len,
- server->local_nls, server->remote_nls);
- if (len < 0)
- return len;
- path += len;
- maxlen -= len+1;
- }
- /* maxlen has space for at least one char */
- *path++ = '\0';
- if (unicode) *path++ = '\0';
- return path-buf;
-}
-
-static int smb_encode_path(struct smb_sb_info *server, char *buf, int maxlen,
- struct dentry *dir, struct qstr *name)
-{
- int result;
-
- result = smb_build_path(server, buf, maxlen, dir, name);
- if (result < 0)
- goto out;
- if (server->opt.protocol <= SMB_PROTOCOL_COREPLUS)
- str_upper(buf, result);
-out:
- return result;
-}
-
-/* encode_path for non-trans2 request SMBs */
-static int smb_simple_encode_path(struct smb_request *req, char **p,
- struct dentry * entry, struct qstr * name)
-{
- struct smb_sb_info *server = req->rq_server;
- char *s = *p;
- int res;
- int maxlen = ((char *)req->rq_buffer + req->rq_bufsize) - s;
- int unicode = (server->mnt->flags & SMB_MOUNT_UNICODE);
-
- if (!maxlen)
- return -ENAMETOOLONG;
- *s++ = 4; /* ASCII data format */
-
- /*
- * SMB Unicode strings must be 16bit aligned relative the start of the
- * packet. If they are not they must be padded with 0.
- */
- if (unicode) {
- int align = s - (char *)req->rq_buffer;
- if (!(align & 1)) {
- *s++ = '\0';
- maxlen--;
- }
- }
-
- res = smb_encode_path(server, s, maxlen-1, entry, name);
- if (res < 0)
- return res;
- *p = s + res;
- return 0;
-}
-
-/* The following are taken directly from msdos-fs */
-
-/* Linear day numbers of the respective 1sts in non-leap years. */
-
-static int day_n[] =
-{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0, 0};
- /* JanFebMarApr May Jun Jul Aug Sep Oct Nov Dec */
-
-
-static time_t
-utc2local(struct smb_sb_info *server, time_t time)
-{
- return time - server->opt.serverzone*60;
-}
-
-static time_t
-local2utc(struct smb_sb_info *server, time_t time)
-{
- return time + server->opt.serverzone*60;
-}
-
-/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */
-
-static time_t
-date_dos2unix(struct smb_sb_info *server, __u16 date, __u16 time)
-{
- int month, year;
- time_t secs;
-
- /* first subtract and mask after that... Otherwise, if
- date == 0, bad things happen */
- month = ((date >> 5) - 1) & 15;
- year = date >> 9;
- secs = (time & 31) * 2 + 60 * ((time >> 5) & 63) + (time >> 11) * 3600 + 86400 *
- ((date & 31) - 1 + day_n[month] + (year / 4) + year * 365 - ((year & 3) == 0 &&
- month < 2 ? 1 : 0) + 3653);
- /* days since 1.1.70 plus 80's leap day */
- return local2utc(server, secs);
-}
-
-
-/* Convert linear UNIX date to a MS-DOS time/date pair. */
-
-static void
-date_unix2dos(struct smb_sb_info *server,
- int unix_date, __u16 *date, __u16 *time)
-{
- int day, year, nl_day, month;
-
- unix_date = utc2local(server, unix_date);
- if (unix_date < 315532800)
- unix_date = 315532800;
-
- *time = (unix_date % 60) / 2 +
- (((unix_date / 60) % 60) << 5) +
- (((unix_date / 3600) % 24) << 11);
-
- day = unix_date / 86400 - 3652;
- year = day / 365;
- if ((year + 3) / 4 + 365 * year > day)
- year--;
- day -= (year + 3) / 4 + 365 * year;
- if (day == 59 && !(year & 3)) {
- nl_day = day;
- month = 2;
- } else {
- nl_day = (year & 3) || day <= 59 ? day : day - 1;
- for (month = 1; month < 12; month++)
- if (day_n[month] > nl_day)
- break;
- }
- *date = nl_day - day_n[month - 1] + 1 + (month << 5) + (year << 9);
-}
-
-/* The following are taken from fs/ntfs/util.c */
-
-#define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000)
-
-/*
- * Convert the NT UTC (based 1601-01-01, in hundred nanosecond units)
- * into Unix UTC (based 1970-01-01, in seconds).
- */
-static struct timespec
-smb_ntutc2unixutc(u64 ntutc)
-{
- struct timespec ts;
- /* FIXME: what about the timezone difference? */
- /* Subtract the NTFS time offset, then convert to 1s intervals. */
- u64 t = ntutc - NTFS_TIME_OFFSET;
- ts.tv_nsec = do_div(t, 10000000) * 100;
- ts.tv_sec = t;
- return ts;
-}
-
-/* Convert the Unix UTC into NT time */
-static u64
-smb_unixutc2ntutc(struct timespec ts)
-{
- /* Note: timezone conversion is probably wrong. */
- /* return ((u64)utc2local(server, t)) * 10000000 + NTFS_TIME_OFFSET; */
- return ((u64)ts.tv_sec) * 10000000 + ts.tv_nsec/100 + NTFS_TIME_OFFSET;
-}
-
-#define MAX_FILE_MODE 6
-static mode_t file_mode[] = {
- S_IFREG, S_IFDIR, S_IFLNK, S_IFCHR, S_IFBLK, S_IFIFO, S_IFSOCK
-};
-
-static int smb_filetype_to_mode(u32 filetype)
-{
- if (filetype > MAX_FILE_MODE) {
- PARANOIA("Filetype out of range: %d\n", filetype);
- return S_IFREG;
- }
- return file_mode[filetype];
-}
-
-static u32 smb_filetype_from_mode(int mode)
-{
- if (S_ISREG(mode))
- return UNIX_TYPE_FILE;
- if (S_ISDIR(mode))
- return UNIX_TYPE_DIR;
- if (S_ISLNK(mode))
- return UNIX_TYPE_SYMLINK;
- if (S_ISCHR(mode))
- return UNIX_TYPE_CHARDEV;
- if (S_ISBLK(mode))
- return UNIX_TYPE_BLKDEV;
- if (S_ISFIFO(mode))
- return UNIX_TYPE_FIFO;
- if (S_ISSOCK(mode))
- return UNIX_TYPE_SOCKET;
- return UNIX_TYPE_UNKNOWN;
-}
-
-
-/*****************************************************************************/
-/* */
-/* Support section. */
-/* */
-/*****************************************************************************/
-
-__u32
-smb_len(__u8 * p)
-{
- return ((*(p+1) & 0x1) << 16L) | (*(p+2) << 8L) | *(p+3);
-}
-
-static __u16
-smb_bcc(__u8 * packet)
-{
- int pos = SMB_HEADER_LEN + SMB_WCT(packet) * sizeof(__u16);
- return WVAL(packet, pos);
-}
-
-/* smb_valid_packet: We check if packet fulfills the basic
- requirements of a smb packet */
-
-static int
-smb_valid_packet(__u8 * packet)
-{
- return (packet[4] == 0xff
- && packet[5] == 'S'
- && packet[6] == 'M'
- && packet[7] == 'B'
- && (smb_len(packet) + 4 == SMB_HEADER_LEN
- + SMB_WCT(packet) * 2 + smb_bcc(packet)));
-}
-
-/* smb_verify: We check if we got the answer we expected, and if we
- got enough data. If bcc == -1, we don't care. */
-
-static int
-smb_verify(__u8 * packet, int command, int wct, int bcc)
-{
- if (SMB_CMD(packet) != command)
- goto bad_command;
- if (SMB_WCT(packet) < wct)
- goto bad_wct;
- if (bcc != -1 && smb_bcc(packet) < bcc)
- goto bad_bcc;
- return 0;
-
-bad_command:
- printk(KERN_ERR "smb_verify: command=%x, SMB_CMD=%x??\n",
- command, SMB_CMD(packet));
- goto fail;
-bad_wct:
- printk(KERN_ERR "smb_verify: command=%x, wct=%d, SMB_WCT=%d??\n",
- command, wct, SMB_WCT(packet));
- goto fail;
-bad_bcc:
- printk(KERN_ERR "smb_verify: command=%x, bcc=%d, SMB_BCC=%d??\n",
- command, bcc, smb_bcc(packet));
-fail:
- return -EIO;
-}
-
-/*
- * Returns the maximum read or write size for the "payload". Making all of the
- * packet fit within the negotiated max_xmit size.
- *
- * N.B. Since this value is usually computed before locking the server,
- * the server's packet size must never be decreased!
- */
-static inline int
-smb_get_xmitsize(struct smb_sb_info *server, int overhead)
-{
- return server->opt.max_xmit - overhead;
-}
-
-/*
- * Calculate the maximum read size
- */
-int
-smb_get_rsize(struct smb_sb_info *server)
-{
- /* readX has 12 parameters, read has 5 */
- int overhead = SMB_HEADER_LEN + 12 * sizeof(__u16) + 2 + 1 + 2;
- int size = smb_get_xmitsize(server, overhead);
-
- VERBOSE("xmit=%d, size=%d\n", server->opt.max_xmit, size);
-
- return size;
-}
-
-/*
- * Calculate the maximum write size
- */
-int
-smb_get_wsize(struct smb_sb_info *server)
-{
- /* writeX has 14 parameters, write has 5 */
- int overhead = SMB_HEADER_LEN + 14 * sizeof(__u16) + 2 + 1 + 2;
- int size = smb_get_xmitsize(server, overhead);
-
- VERBOSE("xmit=%d, size=%d\n", server->opt.max_xmit, size);
-
- return size;
-}
-
-/*
- * Convert SMB error codes to -E... errno values.
- */
-int
-smb_errno(struct smb_request *req)
-{
- int errcls = req->rq_rcls;
- int error = req->rq_err;
- char *class = "Unknown";
-
- VERBOSE("errcls %d code %d from command 0x%x\n",
- errcls, error, SMB_CMD(req->rq_header));
-
- if (errcls == ERRDOS) {
- switch (error) {
- case ERRbadfunc:
- return -EINVAL;
- case ERRbadfile:
- case ERRbadpath:
- return -ENOENT;
- case ERRnofids:
- return -EMFILE;
- case ERRnoaccess:
- return -EACCES;
- case ERRbadfid:
- return -EBADF;
- case ERRbadmcb:
- return -EREMOTEIO;
- case ERRnomem:
- return -ENOMEM;
- case ERRbadmem:
- return -EFAULT;
- case ERRbadenv:
- case ERRbadformat:
- return -EREMOTEIO;
- case ERRbadaccess:
- return -EACCES;
- case ERRbaddata:
- return -E2BIG;
- case ERRbaddrive:
- return -ENXIO;
- case ERRremcd:
- return -EREMOTEIO;
- case ERRdiffdevice:
- return -EXDEV;
- case ERRnofiles:
- return -ENOENT;
- case ERRbadshare:
- return -ETXTBSY;
- case ERRlock:
- return -EDEADLK;
- case ERRfilexists:
- return -EEXIST;
- case ERROR_INVALID_PARAMETER:
- return -EINVAL;
- case ERROR_DISK_FULL:
- return -ENOSPC;
- case ERROR_INVALID_NAME:
- return -ENOENT;
- case ERROR_DIR_NOT_EMPTY:
- return -ENOTEMPTY;
- case ERROR_NOT_LOCKED:
- return -ENOLCK;
- case ERROR_ALREADY_EXISTS:
- return -EEXIST;
- default:
- class = "ERRDOS";
- goto err_unknown;
- }
- } else if (errcls == ERRSRV) {
- switch (error) {
- /* N.B. This is wrong ... EIO ? */
- case ERRerror:
- return -ENFILE;
- case ERRbadpw:
- return -EINVAL;
- case ERRbadtype:
- case ERRtimeout:
- return -EIO;
- case ERRaccess:
- return -EACCES;
- /*
- * This is a fatal error, as it means the "tree ID"
- * for this connection is no longer valid. We map
- * to a special error code and get a new connection.
- */
- case ERRinvnid:
- return -EBADSLT;
- default:
- class = "ERRSRV";
- goto err_unknown;
- }
- } else if (errcls == ERRHRD) {
- switch (error) {
- case ERRnowrite:
- return -EROFS;
- case ERRbadunit:
- return -ENODEV;
- case ERRnotready:
- return -EUCLEAN;
- case ERRbadcmd:
- case ERRdata:
- return -EIO;
- case ERRbadreq:
- return -ERANGE;
- case ERRbadshare:
- return -ETXTBSY;
- case ERRlock:
- return -EDEADLK;
- case ERRdiskfull:
- return -ENOSPC;
- default:
- class = "ERRHRD";
- goto err_unknown;
- }
- } else if (errcls == ERRCMD) {
- class = "ERRCMD";
- } else if (errcls == SUCCESS) {
- return 0; /* This is the only valid 0 return */
- }
-
-err_unknown:
- printk(KERN_ERR "smb_errno: class %s, code %d from command 0x%x\n",
- class, error, SMB_CMD(req->rq_header));
- return -EIO;
-}
-
-/* smb_request_ok: We expect the server to be locked. Then we do the
- request and check the answer completely. When smb_request_ok
- returns 0, you can be quite sure that everything went well. When
- the answer is <=0, the returned number is a valid unix errno. */
-
-static int
-smb_request_ok(struct smb_request *req, int command, int wct, int bcc)
-{
- int result;
-
- req->rq_resp_wct = wct;
- req->rq_resp_bcc = bcc;
-
- result = smb_add_request(req);
- if (result != 0) {
- DEBUG1("smb_request failed\n");
- goto out;
- }
-
- if (smb_valid_packet(req->rq_header) != 0) {
- PARANOIA("invalid packet!\n");
- goto out;
- }
-
- result = smb_verify(req->rq_header, command, wct, bcc);
-
-out:
- return result;
-}
-
-/*
- * This implements the NEWCONN ioctl. It installs the server pid,
- * sets server->state to CONN_VALID, and wakes up the waiting process.
- */
-int
-smb_newconn(struct smb_sb_info *server, struct smb_conn_opt *opt)
-{
- struct file *filp;
- struct sock *sk;
- int error;
-
- VERBOSE("fd=%d, pid=%d\n", opt->fd, current->pid);
-
- smb_lock_server(server);
-
- /*
- * Make sure we don't already have a valid connection ...
- */
- error = -EINVAL;
- if (server->state == CONN_VALID)
- goto out;
-
- error = -EACCES;
- if (current_uid() != server->mnt->mounted_uid &&
- !capable(CAP_SYS_ADMIN))
- goto out;
-
- error = -EBADF;
- filp = fget(opt->fd);
- if (!filp)
- goto out;
- if (!smb_valid_socket(filp->f_path.dentry->d_inode))
- goto out_putf;
-
- server->sock_file = filp;
- server->conn_pid = get_pid(task_pid(current));
- server->opt = *opt;
- server->generation += 1;
- server->state = CONN_VALID;
- error = 0;
-
- if (server->conn_error) {
- /*
- * conn_error is the returncode we originally decided to
- * drop the old connection on. This message should be positive
- * and not make people ask questions on why smbfs is printing
- * error messages ...
- */
- printk(KERN_INFO "SMB connection re-established (%d)\n",
- server->conn_error);
- server->conn_error = 0;
- }
-
- /*
- * Store the server in sock user_data (Only used by sunrpc)
- */
- sk = SOCKET_I(filp->f_path.dentry->d_inode)->sk;
- sk->sk_user_data = server;
-
- /* chain into the data_ready callback */
- server->data_ready = xchg(&sk->sk_data_ready, smb_data_ready);
-
- /* check if we have an old smbmount that uses seconds for the
- serverzone */
- if (server->opt.serverzone > 12*60 || server->opt.serverzone < -12*60)
- server->opt.serverzone /= 60;
-
- /* now that we have an established connection we can detect the server
- type and enable bug workarounds */
- if (server->opt.protocol < SMB_PROTOCOL_LANMAN2)
- install_ops(server->ops, &smb_ops_core);
- else if (server->opt.protocol == SMB_PROTOCOL_LANMAN2)
- install_ops(server->ops, &smb_ops_os2);
- else if (server->opt.protocol == SMB_PROTOCOL_NT1 &&
- (server->opt.max_xmit < 0x1000) &&
- !(server->opt.capabilities & SMB_CAP_NT_SMBS)) {
- /* FIXME: can we kill the WIN95 flag now? */
- server->mnt->flags |= SMB_MOUNT_WIN95;
- VERBOSE("detected WIN95 server\n");
- install_ops(server->ops, &smb_ops_win95);
- } else {
- /*
- * Samba has max_xmit 65535
- * NT4spX has max_xmit 4536 (or something like that)
- * win2k has ...
- */
- VERBOSE("detected NT1 (Samba, NT4/5) server\n");
- install_ops(server->ops, &smb_ops_winNT);
- }
-
- /* FIXME: the win9x code wants to modify these ... (seek/trunc bug) */
- if (server->mnt->flags & SMB_MOUNT_OLDATTR) {
- server->ops->getattr = smb_proc_getattr_core;
- } else if (server->mnt->flags & SMB_MOUNT_DIRATTR) {
- server->ops->getattr = smb_proc_getattr_ff;
- }
-
- /* Decode server capabilities */
- if (server->opt.capabilities & SMB_CAP_LARGE_FILES) {
- /* Should be ok to set this now, as no one can access the
- mount until the connection has been established. */
- SB_of(server)->s_maxbytes = ~0ULL >> 1;
- VERBOSE("LFS enabled\n");
- }
- if (server->opt.capabilities & SMB_CAP_UNICODE) {
- server->mnt->flags |= SMB_MOUNT_UNICODE;
- VERBOSE("Unicode enabled\n");
- } else {
- server->mnt->flags &= ~SMB_MOUNT_UNICODE;
- }
-#if 0
- /* flags we may test for other patches ... */
- if (server->opt.capabilities & SMB_CAP_LARGE_READX) {
- VERBOSE("Large reads enabled\n");
- }
- if (server->opt.capabilities & SMB_CAP_LARGE_WRITEX) {
- VERBOSE("Large writes enabled\n");
- }
-#endif
- if (server->opt.capabilities & SMB_CAP_UNIX) {
- struct inode *inode;
- VERBOSE("Using UNIX CIFS extensions\n");
- install_ops(server->ops, &smb_ops_unix);
- inode = SB_of(server)->s_root->d_inode;
- if (inode)
- inode->i_op = &smb_dir_inode_operations_unix;
- }
-
- VERBOSE("protocol=%d, max_xmit=%d, pid=%d capabilities=0x%x\n",
- server->opt.protocol, server->opt.max_xmit,
- pid_nr(server->conn_pid), server->opt.capabilities);
-
- /* FIXME: this really should be done by smbmount. */
- if (server->opt.max_xmit > SMB_MAX_PACKET_SIZE) {
- server->opt.max_xmit = SMB_MAX_PACKET_SIZE;
- }
-
- smb_unlock_server(server);
- smbiod_wake_up();
- if (server->opt.capabilities & SMB_CAP_UNIX)
- smb_proc_query_cifsunix(server);
-
- server->conn_complete++;
- wake_up_interruptible_all(&server->conn_wq);
- return error;
-
-out:
- smb_unlock_server(server);
- smbiod_wake_up();
- return error;
-
-out_putf:
- fput(filp);
- goto out;
-}
-
-/* smb_setup_header: We completely set up the packet. You only have to
- insert the command-specific fields */
-
-__u8 *
-smb_setup_header(struct smb_request *req, __u8 command, __u16 wct, __u16 bcc)
-{
- __u32 xmit_len = SMB_HEADER_LEN + wct * sizeof(__u16) + bcc + 2;
- __u8 *p = req->rq_header;
- struct smb_sb_info *server = req->rq_server;
-
- p = smb_encode_smb_length(p, xmit_len - 4);
-
- *p++ = 0xff;
- *p++ = 'S';
- *p++ = 'M';
- *p++ = 'B';
- *p++ = command;
-
- memset(p, '\0', 19);
- p += 19;
- p += 8;
-
- if (server->opt.protocol > SMB_PROTOCOL_CORE) {
- int flags = SMB_FLAGS_CASELESS_PATHNAMES;
- int flags2 = SMB_FLAGS2_LONG_PATH_COMPONENTS |
- SMB_FLAGS2_EXTENDED_ATTRIBUTES; /* EA? not really ... */
-
- *(req->rq_header + smb_flg) = flags;
- if (server->mnt->flags & SMB_MOUNT_UNICODE)
- flags2 |= SMB_FLAGS2_UNICODE_STRINGS;
- WSET(req->rq_header, smb_flg2, flags2);
- }
- *p++ = wct; /* wct */
- p += 2 * wct;
- WSET(p, 0, bcc);
-
- /* Include the header in the data to send */
- req->rq_iovlen = 1;
- req->rq_iov[0].iov_base = req->rq_header;
- req->rq_iov[0].iov_len = xmit_len - bcc;
-
- return req->rq_buffer;
-}
-
-static void
-smb_setup_bcc(struct smb_request *req, __u8 *p)
-{
- u16 bcc = p - req->rq_buffer;
- u8 *pbcc = req->rq_header + SMB_HEADER_LEN + 2*SMB_WCT(req->rq_header);
-
- WSET(pbcc, 0, bcc);
-
- smb_encode_smb_length(req->rq_header, SMB_HEADER_LEN +
- 2*SMB_WCT(req->rq_header) - 2 + bcc);
-
- /* Include the "bytes" in the data to send */
- req->rq_iovlen = 2;
- req->rq_iov[1].iov_base = req->rq_buffer;
- req->rq_iov[1].iov_len = bcc;
-}
-
-static int
-smb_proc_seek(struct smb_sb_info *server, __u16 fileid,
- __u16 mode, off_t offset)
-{
- int result;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, 0)))
- goto out;
-
- smb_setup_header(req, SMBlseek, 4, 0);
- WSET(req->rq_header, smb_vwv0, fileid);
- WSET(req->rq_header, smb_vwv1, mode);
- DSET(req->rq_header, smb_vwv2, offset);
- req->rq_flags |= SMB_REQ_NORETRY;
-
- result = smb_request_ok(req, SMBlseek, 2, 0);
- if (result < 0) {
- result = 0;
- goto out_free;
- }
-
- result = DVAL(req->rq_header, smb_vwv0);
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-static int
-smb_proc_open(struct smb_sb_info *server, struct dentry *dentry, int wish)
-{
- struct inode *ino = dentry->d_inode;
- struct smb_inode_info *ei = SMB_I(ino);
- int mode, read_write = 0x42, read_only = 0x40;
- int res;
- char *p;
- struct smb_request *req;
-
- /*
- * Attempt to open r/w, unless there are no write privileges.
- */
- mode = read_write;
- if (!(ino->i_mode & (S_IWUSR | S_IWGRP | S_IWOTH)))
- mode = read_only;
-#if 0
- /* FIXME: why is this code not in? below we fix it so that a caller
- wanting RO doesn't get RW. smb_revalidate_inode does some
- optimization based on access mode. tail -f needs it to be correct.
-
- We must open rw since we don't do the open if called a second time
- with different 'wish'. Is that not supported by smb servers? */
- if (!(wish & (O_WRONLY | O_RDWR)))
- mode = read_only;
-#endif
-
- res = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
-
- retry:
- p = smb_setup_header(req, SMBopen, 2, 0);
- WSET(req->rq_header, smb_vwv0, mode);
- WSET(req->rq_header, smb_vwv1, aSYSTEM | aHIDDEN | aDIR);
- res = smb_simple_encode_path(req, &p, dentry, NULL);
- if (res < 0)
- goto out_free;
- smb_setup_bcc(req, p);
-
- res = smb_request_ok(req, SMBopen, 7, 0);
- if (res != 0) {
- if (mode == read_write &&
- (res == -EACCES || res == -ETXTBSY || res == -EROFS))
- {
- VERBOSE("%s/%s R/W failed, error=%d, retrying R/O\n",
- DENTRY_PATH(dentry), res);
- mode = read_only;
- req->rq_flags = 0;
- goto retry;
- }
- goto out_free;
- }
- /* We should now have data in vwv[0..6]. */
-
- ei->fileid = WVAL(req->rq_header, smb_vwv0);
- ei->attr = WVAL(req->rq_header, smb_vwv1);
- /* smb_vwv2 has mtime */
- /* smb_vwv4 has size */
- ei->access = (WVAL(req->rq_header, smb_vwv6) & SMB_ACCMASK);
- ei->open = server->generation;
-
-out_free:
- smb_rput(req);
-out:
- return res;
-}
-
-/*
- * Make sure the file is open, and check that the access
- * is compatible with the desired access.
- */
-int
-smb_open(struct dentry *dentry, int wish)
-{
- struct inode *inode = dentry->d_inode;
- int result;
- __u16 access;
-
- result = -ENOENT;
- if (!inode) {
- printk(KERN_ERR "smb_open: no inode for dentry %s/%s\n",
- DENTRY_PATH(dentry));
- goto out;
- }
-
- if (!smb_is_open(inode)) {
- struct smb_sb_info *server = server_from_inode(inode);
- result = 0;
- if (!smb_is_open(inode))
- result = smb_proc_open(server, dentry, wish);
- if (result)
- goto out;
- /*
- * A successful open means the path is still valid ...
- */
- smb_renew_times(dentry);
- }
-
- /*
- * Check whether the access is compatible with the desired mode.
- */
- result = 0;
- access = SMB_I(inode)->access;
- if (access != wish && access != SMB_O_RDWR) {
- PARANOIA("%s/%s access denied, access=%x, wish=%x\n",
- DENTRY_PATH(dentry), access, wish);
- result = -EACCES;
- }
-out:
- return result;
-}
-
-static int
-smb_proc_close(struct smb_sb_info *server, __u16 fileid, __u32 mtime)
-{
- struct smb_request *req;
- int result = -ENOMEM;
-
- if (! (req = smb_alloc_request(server, 0)))
- goto out;
-
- smb_setup_header(req, SMBclose, 3, 0);
- WSET(req->rq_header, smb_vwv0, fileid);
- DSET(req->rq_header, smb_vwv1, utc2local(server, mtime));
- req->rq_flags |= SMB_REQ_NORETRY;
- result = smb_request_ok(req, SMBclose, 0, 0);
-
- smb_rput(req);
-out:
- return result;
-}
-
-/*
- * Win NT 4.0 has an apparent bug in that it fails to update the
- * modify time when writing to a file. As a workaround, we update
- * both modify and access time locally, and post the times to the
- * server when closing the file.
- */
-static int
-smb_proc_close_inode(struct smb_sb_info *server, struct inode * ino)
-{
- struct smb_inode_info *ei = SMB_I(ino);
- int result = 0;
- if (smb_is_open(ino))
- {
- /*
- * We clear the open flag in advance, in case another
- * process observes the value while we block below.
- */
- ei->open = 0;
-
- /*
- * Kludge alert: SMB timestamps are accurate only to
- * two seconds ... round the times to avoid needless
- * cache invalidations!
- */
- if (ino->i_mtime.tv_sec & 1) {
- ino->i_mtime.tv_sec--;
- ino->i_mtime.tv_nsec = 0;
- }
- if (ino->i_atime.tv_sec & 1) {
- ino->i_atime.tv_sec--;
- ino->i_atime.tv_nsec = 0;
- }
- /*
- * If the file is open with write permissions,
- * update the time stamps to sync mtime and atime.
- */
- if ((server->opt.capabilities & SMB_CAP_UNIX) == 0 &&
- (server->opt.protocol >= SMB_PROTOCOL_LANMAN2) &&
- !(ei->access == SMB_O_RDONLY))
- {
- struct smb_fattr fattr;
- smb_get_inode_attr(ino, &fattr);
- smb_proc_setattr_ext(server, ino, &fattr);
- }
-
- result = smb_proc_close(server, ei->fileid, ino->i_mtime.tv_sec);
- /*
- * Force a revalidation after closing ... some servers
- * don't post the size until the file has been closed.
- */
- if (server->opt.protocol < SMB_PROTOCOL_NT1)
- ei->oldmtime = 0;
- ei->closed = jiffies;
- }
- return result;
-}
-
-int
-smb_close(struct inode *ino)
-{
- int result = 0;
-
- if (smb_is_open(ino)) {
- struct smb_sb_info *server = server_from_inode(ino);
- result = smb_proc_close_inode(server, ino);
- }
- return result;
-}
-
-/*
- * This is used to close a file following a failed instantiate.
- * Since we don't have an inode, we can't use any of the above.
- */
-int
-smb_close_fileid(struct dentry *dentry, __u16 fileid)
-{
- struct smb_sb_info *server = server_from_dentry(dentry);
- int result;
-
- result = smb_proc_close(server, fileid, get_seconds());
- return result;
-}
-
-/* In smb_proc_read and smb_proc_write we do not retry, because the
- file-id would not be valid after a reconnection. */
-
-static void
-smb_proc_read_data(struct smb_request *req)
-{
- req->rq_iov[0].iov_base = req->rq_buffer;
- req->rq_iov[0].iov_len = 3;
-
- req->rq_iov[1].iov_base = req->rq_page;
- req->rq_iov[1].iov_len = req->rq_rsize;
- req->rq_iovlen = 2;
-
- req->rq_rlen = smb_len(req->rq_header) + 4 - req->rq_bytes_recvd;
-}
-
-static int
-smb_proc_read(struct inode *inode, loff_t offset, int count, char *data)
-{
- struct smb_sb_info *server = server_from_inode(inode);
- __u16 returned_count, data_len;
- unsigned char *buf;
- int result;
- struct smb_request *req;
- u8 rbuf[4];
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, 0)))
- goto out;
-
- smb_setup_header(req, SMBread, 5, 0);
- buf = req->rq_header;
- WSET(buf, smb_vwv0, SMB_I(inode)->fileid);
- WSET(buf, smb_vwv1, count);
- DSET(buf, smb_vwv2, offset);
- WSET(buf, smb_vwv4, 0);
-
- req->rq_page = data;
- req->rq_rsize = count;
- req->rq_callback = smb_proc_read_data;
- req->rq_buffer = rbuf;
- req->rq_flags |= SMB_REQ_NORETRY | SMB_REQ_STATIC;
-
- result = smb_request_ok(req, SMBread, 5, -1);
- if (result < 0)
- goto out_free;
- returned_count = WVAL(req->rq_header, smb_vwv0);
-
- data_len = WVAL(rbuf, 1);
-
- if (returned_count != data_len) {
- printk(KERN_NOTICE "smb_proc_read: returned != data_len\n");
- printk(KERN_NOTICE "smb_proc_read: ret_c=%d, data_len=%d\n",
- returned_count, data_len);
- }
- result = data_len;
-
-out_free:
- smb_rput(req);
-out:
- VERBOSE("ino=%ld, fileid=%d, count=%d, result=%d\n",
- inode->i_ino, SMB_I(inode)->fileid, count, result);
- return result;
-}
-
-static int
-smb_proc_write(struct inode *inode, loff_t offset, int count, const char *data)
-{
- struct smb_sb_info *server = server_from_inode(inode);
- int result;
- u16 fileid = SMB_I(inode)->fileid;
- u8 buf[4];
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, 0)))
- goto out;
-
- VERBOSE("ino=%ld, fileid=%d, count=%d@%Ld\n",
- inode->i_ino, fileid, count, offset);
-
- smb_setup_header(req, SMBwrite, 5, count + 3);
- WSET(req->rq_header, smb_vwv0, fileid);
- WSET(req->rq_header, smb_vwv1, count);
- DSET(req->rq_header, smb_vwv2, offset);
- WSET(req->rq_header, smb_vwv4, 0);
-
- buf[0] = 1;
- WSET(buf, 1, count); /* yes, again ... */
- req->rq_iov[1].iov_base = buf;
- req->rq_iov[1].iov_len = 3;
- req->rq_iov[2].iov_base = (char *) data;
- req->rq_iov[2].iov_len = count;
- req->rq_iovlen = 3;
- req->rq_flags |= SMB_REQ_NORETRY;
-
- result = smb_request_ok(req, SMBwrite, 1, 0);
- if (result >= 0)
- result = WVAL(req->rq_header, smb_vwv0);
-
- smb_rput(req);
-out:
- return result;
-}
-
-/*
- * In smb_proc_readX and smb_proc_writeX we do not retry, because the
- * file-id would not be valid after a reconnection.
- */
-
-#define SMB_READX_MAX_PAD 64
-static void
-smb_proc_readX_data(struct smb_request *req)
-{
- /* header length, excluding the netbios length (-4) */
- int hdrlen = SMB_HEADER_LEN + req->rq_resp_wct*2 - 2;
- int data_off = WVAL(req->rq_header, smb_vwv6);
-
- /*
- * Some genius made the padding to the data bytes arbitrary.
- * So we must first calculate the amount of padding used by the server.
- */
- data_off -= hdrlen;
- if (data_off > SMB_READX_MAX_PAD || data_off < 0) {
- PARANOIA("offset is larger than SMB_READX_MAX_PAD or negative!\n");
- PARANOIA("%d > %d || %d < 0\n", data_off, SMB_READX_MAX_PAD, data_off);
- req->rq_rlen = req->rq_bufsize + 1;
- return;
- }
- req->rq_iov[0].iov_base = req->rq_buffer;
- req->rq_iov[0].iov_len = data_off;
-
- req->rq_iov[1].iov_base = req->rq_page;
- req->rq_iov[1].iov_len = req->rq_rsize;
- req->rq_iovlen = 2;
-
- req->rq_rlen = smb_len(req->rq_header) + 4 - req->rq_bytes_recvd;
-}
-
-static int
-smb_proc_readX(struct inode *inode, loff_t offset, int count, char *data)
-{
- struct smb_sb_info *server = server_from_inode(inode);
- unsigned char *buf;
- int result;
- struct smb_request *req;
- static char pad[SMB_READX_MAX_PAD];
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, 0)))
- goto out;
-
- smb_setup_header(req, SMBreadX, 12, 0);
- buf = req->rq_header;
- WSET(buf, smb_vwv0, 0x00ff);
- WSET(buf, smb_vwv1, 0);
- WSET(buf, smb_vwv2, SMB_I(inode)->fileid);
- DSET(buf, smb_vwv3, (u32)offset); /* low 32 bits */
- WSET(buf, smb_vwv5, count);
- WSET(buf, smb_vwv6, 0);
- DSET(buf, smb_vwv7, 0);
- WSET(buf, smb_vwv9, 0);
- DSET(buf, smb_vwv10, (u32)(offset >> 32)); /* high 32 bits */
- WSET(buf, smb_vwv11, 0);
-
- req->rq_page = data;
- req->rq_rsize = count;
- req->rq_callback = smb_proc_readX_data;
- req->rq_buffer = pad;
- req->rq_bufsize = SMB_READX_MAX_PAD;
- req->rq_flags |= SMB_REQ_STATIC | SMB_REQ_NORETRY;
-
- result = smb_request_ok(req, SMBreadX, 12, -1);
- if (result < 0)
- goto out_free;
- result = WVAL(req->rq_header, smb_vwv5);
-
-out_free:
- smb_rput(req);
-out:
- VERBOSE("ino=%ld, fileid=%d, count=%d, result=%d\n",
- inode->i_ino, SMB_I(inode)->fileid, count, result);
- return result;
-}
-
-static int
-smb_proc_writeX(struct inode *inode, loff_t offset, int count, const char *data)
-{
- struct smb_sb_info *server = server_from_inode(inode);
- int result;
- u8 *p;
- static u8 pad[4];
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, 0)))
- goto out;
-
- VERBOSE("ino=%ld, fileid=%d, count=%d@%Ld\n",
- inode->i_ino, SMB_I(inode)->fileid, count, offset);
-
- p = smb_setup_header(req, SMBwriteX, 14, count + 1);
- WSET(req->rq_header, smb_vwv0, 0x00ff);
- WSET(req->rq_header, smb_vwv1, 0);
- WSET(req->rq_header, smb_vwv2, SMB_I(inode)->fileid);
- DSET(req->rq_header, smb_vwv3, (u32)offset); /* low 32 bits */
- DSET(req->rq_header, smb_vwv5, 0);
- WSET(req->rq_header, smb_vwv7, 0); /* write mode */
- WSET(req->rq_header, smb_vwv8, 0);
- WSET(req->rq_header, smb_vwv9, 0);
- WSET(req->rq_header, smb_vwv10, count); /* data length */
- WSET(req->rq_header, smb_vwv11, smb_vwv12 + 2 + 1);
- DSET(req->rq_header, smb_vwv12, (u32)(offset >> 32));
-
- req->rq_iov[1].iov_base = pad;
- req->rq_iov[1].iov_len = 1;
- req->rq_iov[2].iov_base = (char *) data;
- req->rq_iov[2].iov_len = count;
- req->rq_iovlen = 3;
- req->rq_flags |= SMB_REQ_NORETRY;
-
- result = smb_request_ok(req, SMBwriteX, 6, 0);
- if (result >= 0)
- result = WVAL(req->rq_header, smb_vwv2);
-
- smb_rput(req);
-out:
- return result;
-}
-
-int
-smb_proc_create(struct dentry *dentry, __u16 attr, time_t ctime, __u16 *fileid)
-{
- struct smb_sb_info *server = server_from_dentry(dentry);
- char *p;
- int result;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
-
- p = smb_setup_header(req, SMBcreate, 3, 0);
- WSET(req->rq_header, smb_vwv0, attr);
- DSET(req->rq_header, smb_vwv1, utc2local(server, ctime));
- result = smb_simple_encode_path(req, &p, dentry, NULL);
- if (result < 0)
- goto out_free;
- smb_setup_bcc(req, p);
-
- result = smb_request_ok(req, SMBcreate, 1, 0);
- if (result < 0)
- goto out_free;
-
- *fileid = WVAL(req->rq_header, smb_vwv0);
- result = 0;
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-int
-smb_proc_mv(struct dentry *old_dentry, struct dentry *new_dentry)
-{
- struct smb_sb_info *server = server_from_dentry(old_dentry);
- char *p;
- int result;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
-
- p = smb_setup_header(req, SMBmv, 1, 0);
- WSET(req->rq_header, smb_vwv0, aSYSTEM | aHIDDEN | aDIR);
- result = smb_simple_encode_path(req, &p, old_dentry, NULL);
- if (result < 0)
- goto out_free;
- result = smb_simple_encode_path(req, &p, new_dentry, NULL);
- if (result < 0)
- goto out_free;
- smb_setup_bcc(req, p);
-
- if ((result = smb_request_ok(req, SMBmv, 0, 0)) < 0)
- goto out_free;
- result = 0;
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-/*
- * Code common to mkdir and rmdir.
- */
-static int
-smb_proc_generic_command(struct dentry *dentry, __u8 command)
-{
- struct smb_sb_info *server = server_from_dentry(dentry);
- char *p;
- int result;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
-
- p = smb_setup_header(req, command, 0, 0);
- result = smb_simple_encode_path(req, &p, dentry, NULL);
- if (result < 0)
- goto out_free;
- smb_setup_bcc(req, p);
-
- result = smb_request_ok(req, command, 0, 0);
- if (result < 0)
- goto out_free;
- result = 0;
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-int
-smb_proc_mkdir(struct dentry *dentry)
-{
- return smb_proc_generic_command(dentry, SMBmkdir);
-}
-
-int
-smb_proc_rmdir(struct dentry *dentry)
-{
- return smb_proc_generic_command(dentry, SMBrmdir);
-}
-
-#if SMBFS_POSIX_UNLINK
-/*
- * Removes readonly attribute from a file. Used by unlink to give posix
- * semantics.
- */
-static int
-smb_set_rw(struct dentry *dentry,struct smb_sb_info *server)
-{
- int result;
- struct smb_fattr fattr;
-
- /* FIXME: cifsUE should allow removing a readonly file. */
-
- /* first get current attribute */
- smb_init_dirent(server, &fattr);
- result = server->ops->getattr(server, dentry, &fattr);
- smb_finish_dirent(server, &fattr);
- if (result < 0)
- return result;
-
- /* if RONLY attribute is set, remove it */
- if (fattr.attr & aRONLY) { /* read only attribute is set */
- fattr.attr &= ~aRONLY;
- result = smb_proc_setattr_core(server, dentry, fattr.attr);
- }
- return result;
-}
-#endif
-
-int
-smb_proc_unlink(struct dentry *dentry)
-{
- struct smb_sb_info *server = server_from_dentry(dentry);
- int flag = 0;
- char *p;
- int result;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
-
- retry:
- p = smb_setup_header(req, SMBunlink, 1, 0);
- WSET(req->rq_header, smb_vwv0, aSYSTEM | aHIDDEN);
- result = smb_simple_encode_path(req, &p, dentry, NULL);
- if (result < 0)
- goto out_free;
- smb_setup_bcc(req, p);
-
- if ((result = smb_request_ok(req, SMBunlink, 0, 0)) < 0) {
-#if SMBFS_POSIX_UNLINK
- if (result == -EACCES && !flag) {
- /* Posix semantics is for the read-only state
- of a file to be ignored in unlink(). In the
- SMB world a unlink() is refused on a
- read-only file. To make things easier for
- unix users we try to override the files
- permission if the unlink fails with the
- right error.
- This introduces a race condition that could
- lead to a file being written by someone who
- shouldn't have access, but as far as I can
- tell that is unavoidable */
-
- /* remove RONLY attribute and try again */
- result = smb_set_rw(dentry,server);
- if (result == 0) {
- flag = 1;
- req->rq_flags = 0;
- goto retry;
- }
- }
-#endif
- goto out_free;
- }
- result = 0;
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-int
-smb_proc_flush(struct smb_sb_info *server, __u16 fileid)
-{
- int result;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, 0)))
- goto out;
-
- smb_setup_header(req, SMBflush, 1, 0);
- WSET(req->rq_header, smb_vwv0, fileid);
- req->rq_flags |= SMB_REQ_NORETRY;
- result = smb_request_ok(req, SMBflush, 0, 0);
-
- smb_rput(req);
-out:
- return result;
-}
-
-static int
-smb_proc_trunc32(struct inode *inode, loff_t length)
-{
- /*
- * Writing 0bytes is old-SMB magic for truncating files.
- * MAX_NON_LFS should prevent this from being called with a too
- * large offset.
- */
- return smb_proc_write(inode, length, 0, NULL);
-}
-
-static int
-smb_proc_trunc64(struct inode *inode, loff_t length)
-{
- struct smb_sb_info *server = server_from_inode(inode);
- int result;
- char *param;
- char *data;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, 14)))
- goto out;
-
- param = req->rq_buffer;
- data = req->rq_buffer + 6;
-
- /* FIXME: must we also set allocation size? winNT seems to do that */
- WSET(param, 0, SMB_I(inode)->fileid);
- WSET(param, 2, SMB_SET_FILE_END_OF_FILE_INFO);
- WSET(param, 4, 0);
- LSET(data, 0, length);
-
- req->rq_trans2_command = TRANSACT2_SETFILEINFO;
- req->rq_ldata = 8;
- req->rq_data = data;
- req->rq_lparm = 6;
- req->rq_parm = param;
- req->rq_flags |= SMB_REQ_NORETRY;
- result = smb_add_request(req);
- if (result < 0)
- goto out_free;
-
- result = 0;
- if (req->rq_rcls != 0)
- result = smb_errno(req);
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-static int
-smb_proc_trunc95(struct inode *inode, loff_t length)
-{
- struct smb_sb_info *server = server_from_inode(inode);
- int result = smb_proc_trunc32(inode, length);
-
- /*
- * win9x doesn't appear to update the size immediately.
- * It will return the old file size after the truncate,
- * confusing smbfs. So we force an update.
- *
- * FIXME: is this still necessary?
- */
- smb_proc_flush(server, SMB_I(inode)->fileid);
- return result;
-}
-
-static void
-smb_init_dirent(struct smb_sb_info *server, struct smb_fattr *fattr)
-{
- memset(fattr, 0, sizeof(*fattr));
-
- fattr->f_nlink = 1;
- fattr->f_uid = server->mnt->uid;
- fattr->f_gid = server->mnt->gid;
- fattr->f_unix = 0;
-}
-
-static void
-smb_finish_dirent(struct smb_sb_info *server, struct smb_fattr *fattr)
-{
- if (fattr->f_unix)
- return;
-
- fattr->f_mode = server->mnt->file_mode;
- if (fattr->attr & aDIR) {
- fattr->f_mode = server->mnt->dir_mode;
- fattr->f_size = SMB_ST_BLKSIZE;
- }
- /* Check the read-only flag */
- if (fattr->attr & aRONLY)
- fattr->f_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
-
- /* How many 512 byte blocks do we need for this file? */
- fattr->f_blocks = 0;
- if (fattr->f_size != 0)
- fattr->f_blocks = 1 + ((fattr->f_size-1) >> 9);
- return;
-}
-
-void
-smb_init_root_dirent(struct smb_sb_info *server, struct smb_fattr *fattr,
- struct super_block *sb)
-{
- smb_init_dirent(server, fattr);
- fattr->attr = aDIR;
- fattr->f_ino = 2; /* traditional root inode number */
- fattr->f_mtime = current_fs_time(sb);
- smb_finish_dirent(server, fattr);
-}
-
-/*
- * Decode a dirent for old protocols
- *
- * qname is filled with the decoded, and possibly translated, name.
- * fattr receives decoded attributes
- *
- * Bugs Noted:
- * (1) Pathworks servers may pad the name with extra spaces.
- */
-static char *
-smb_decode_short_dirent(struct smb_sb_info *server, char *p,
- struct qstr *qname, struct smb_fattr *fattr,
- unsigned char *name_buf)
-{
- int len;
-
- /*
- * SMB doesn't have a concept of inode numbers ...
- */
- smb_init_dirent(server, fattr);
- fattr->f_ino = 0; /* FIXME: do we need this? */
-
- p += SMB_STATUS_SIZE; /* reserved (search_status) */
- fattr->attr = *p;
- fattr->f_mtime.tv_sec = date_dos2unix(server, WVAL(p, 3), WVAL(p, 1));
- fattr->f_mtime.tv_nsec = 0;
- fattr->f_size = DVAL(p, 5);
- fattr->f_ctime = fattr->f_mtime;
- fattr->f_atime = fattr->f_mtime;
- qname->name = p + 9;
- len = strnlen(qname->name, 12);
-
- /*
- * Trim trailing blanks for Pathworks servers
- */
- while (len > 2 && qname->name[len-1] == ' ')
- len--;
-
- smb_finish_dirent(server, fattr);
-
-#if 0
- /* FIXME: These only work for ascii chars, and recent smbmount doesn't
- allow the flag to be set anyway. It kills const. Remove? */
- switch (server->opt.case_handling) {
- case SMB_CASE_UPPER:
- str_upper(entry->name, len);
- break;
- case SMB_CASE_LOWER:
- str_lower(entry->name, len);
- break;
- default:
- break;
- }
-#endif
-
- qname->len = 0;
- len = server->ops->convert(name_buf, SMB_MAXNAMELEN,
- qname->name, len,
- server->remote_nls, server->local_nls);
- if (len > 0) {
- qname->len = len;
- qname->name = name_buf;
- DEBUG1("len=%d, name=%.*s\n",qname->len,qname->len,qname->name);
- }
-
- return p + 22;
-}
-
-/*
- * This routine is used to read in directory entries from the network.
- * Note that it is for short directory name seeks, i.e.: protocol <
- * SMB_PROTOCOL_LANMAN2
- */
-static int
-smb_proc_readdir_short(struct file *filp, void *dirent, filldir_t filldir,
- struct smb_cache_control *ctl)
-{
- struct dentry *dir = filp->f_path.dentry;
- struct smb_sb_info *server = server_from_dentry(dir);
- struct qstr qname;
- struct smb_fattr fattr;
- char *p;
- int result;
- int i, first, entries_seen, entries;
- int entries_asked = (server->opt.max_xmit - 100) / SMB_DIRINFO_SIZE;
- __u16 bcc;
- __u16 count;
- char status[SMB_STATUS_SIZE];
- static struct qstr mask = {
- .name = "*.*",
- .len = 3,
- };
- unsigned char *last_status;
- struct smb_request *req;
- unsigned char *name_buf;
-
- VERBOSE("%s/%s\n", DENTRY_PATH(dir));
-
- lock_kernel();
-
- result = -ENOMEM;
- if (! (name_buf = kmalloc(SMB_MAXNAMELEN, GFP_KERNEL)))
- goto out;
-
- first = 1;
- entries = 0;
- entries_seen = 2; /* implicit . and .. */
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, server->opt.max_xmit)))
- goto out_name;
-
- while (1) {
- p = smb_setup_header(req, SMBsearch, 2, 0);
- WSET(req->rq_header, smb_vwv0, entries_asked);
- WSET(req->rq_header, smb_vwv1, aDIR);
- if (first == 1) {
- result = smb_simple_encode_path(req, &p, dir, &mask);
- if (result < 0)
- goto out_free;
- if (p + 3 > (char *)req->rq_buffer + req->rq_bufsize) {
- result = -ENAMETOOLONG;
- goto out_free;
- }
- *p++ = 5;
- WSET(p, 0, 0);
- p += 2;
- first = 0;
- } else {
- if (p + 5 + SMB_STATUS_SIZE >
- (char *)req->rq_buffer + req->rq_bufsize) {
- result = -ENAMETOOLONG;
- goto out_free;
- }
-
- *p++ = 4;
- *p++ = 0;
- *p++ = 5;
- WSET(p, 0, SMB_STATUS_SIZE);
- p += 2;
- memcpy(p, status, SMB_STATUS_SIZE);
- p += SMB_STATUS_SIZE;
- }
-
- smb_setup_bcc(req, p);
-
- result = smb_request_ok(req, SMBsearch, 1, -1);
- if (result < 0) {
- if ((req->rq_rcls == ERRDOS) &&
- (req->rq_err == ERRnofiles))
- break;
- goto out_free;
- }
- count = WVAL(req->rq_header, smb_vwv0);
- if (count <= 0)
- break;
-
- result = -EIO;
- bcc = smb_bcc(req->rq_header);
- if (bcc != count * SMB_DIRINFO_SIZE + 3)
- goto out_free;
- p = req->rq_buffer + 3;
-
-
- /* Make sure the response fits in the buffer. Fixed sized
- entries means we don't have to check in the decode loop. */
-
- last_status = req->rq_buffer + 3 + (count-1) * SMB_DIRINFO_SIZE;
-
- if (last_status + SMB_DIRINFO_SIZE >=
- req->rq_buffer + req->rq_bufsize) {
- printk(KERN_ERR "smb_proc_readdir_short: "
- "last dir entry outside buffer! "
- "%d@%p %d@%p\n", SMB_DIRINFO_SIZE, last_status,
- req->rq_bufsize, req->rq_buffer);
- goto out_free;
- }
-
- /* Read the last entry into the status field. */
- memcpy(status, last_status, SMB_STATUS_SIZE);
-
-
- /* Now we are ready to parse smb directory entries. */
-
- for (i = 0; i < count; i++) {
- p = smb_decode_short_dirent(server, p,
- &qname, &fattr, name_buf);
- if (qname.len == 0)
- continue;
-
- if (entries_seen == 2 && qname.name[0] == '.') {
- if (qname.len == 1)
- continue;
- if (qname.name[1] == '.' && qname.len == 2)
- continue;
- }
- if (!smb_fill_cache(filp, dirent, filldir, ctl,
- &qname, &fattr))
- ; /* stop reading? */
- entries_seen++;
- }
- }
- result = entries;
-
-out_free:
- smb_rput(req);
-out_name:
- kfree(name_buf);
-out:
- unlock_kernel();
- return result;
-}
-
-static void smb_decode_unix_basic(struct smb_fattr *fattr, struct smb_sb_info *server, char *p)
-{
- u64 size, disk_bytes;
-
- /* FIXME: verify nls support. all is sent as utf8? */
-
- fattr->f_unix = 1;
- fattr->f_mode = 0;
-
- /* FIXME: use the uniqueID from the remote instead? */
- /* 0 L file size in bytes */
- /* 8 L file size on disk in bytes (block count) */
- /* 40 L uid */
- /* 48 L gid */
- /* 56 W file type */
- /* 60 L devmajor */
- /* 68 L devminor */
- /* 76 L unique ID (inode) */
- /* 84 L permissions */
- /* 92 L link count */
-
- size = LVAL(p, 0);
- disk_bytes = LVAL(p, 8);
-
- /*
- * Some samba versions round up on-disk byte usage
- * to 1MB boundaries, making it useless. When seeing
- * that, use the size instead.
- */
- if (!(disk_bytes & 0xfffff))
- disk_bytes = size+511;
-
- fattr->f_size = size;
- fattr->f_blocks = disk_bytes >> 9;
- fattr->f_ctime = smb_ntutc2unixutc(LVAL(p, 16));
- fattr->f_atime = smb_ntutc2unixutc(LVAL(p, 24));
- fattr->f_mtime = smb_ntutc2unixutc(LVAL(p, 32));
-
- if (server->mnt->flags & SMB_MOUNT_UID)
- fattr->f_uid = server->mnt->uid;
- else
- fattr->f_uid = LVAL(p, 40);
-
- if (server->mnt->flags & SMB_MOUNT_GID)
- fattr->f_gid = server->mnt->gid;
- else
- fattr->f_gid = LVAL(p, 48);
-
- fattr->f_mode |= smb_filetype_to_mode(WVAL(p, 56));
-
- if (S_ISBLK(fattr->f_mode) || S_ISCHR(fattr->f_mode)) {
- __u64 major = LVAL(p, 60);
- __u64 minor = LVAL(p, 68);
-
- fattr->f_rdev = MKDEV(major & 0xffffffff, minor & 0xffffffff);
- if (MAJOR(fattr->f_rdev) != (major & 0xffffffff) ||
- MINOR(fattr->f_rdev) != (minor & 0xffffffff))
- fattr->f_rdev = 0;
- }
-
- fattr->f_mode |= LVAL(p, 84);
-
- if ( (server->mnt->flags & SMB_MOUNT_DMODE) &&
- (S_ISDIR(fattr->f_mode)) )
- fattr->f_mode = (server->mnt->dir_mode & S_IRWXUGO) | S_IFDIR;
- else if ( (server->mnt->flags & SMB_MOUNT_FMODE) &&
- !(S_ISDIR(fattr->f_mode)) )
- fattr->f_mode = (server->mnt->file_mode & S_IRWXUGO) |
- (fattr->f_mode & S_IFMT);
-
-}
-
-/*
- * Interpret a long filename structure using the specified info level:
- * level 1 for anything below NT1 protocol
- * level 260 for NT1 protocol
- *
- * qname is filled with the decoded, and possibly translated, name
- * fattr receives decoded attributes.
- *
- * Bugs Noted:
- * (1) Win NT 4.0 appends a null byte to names and counts it in the length!
- */
-static char *
-smb_decode_long_dirent(struct smb_sb_info *server, char *p, int level,
- struct qstr *qname, struct smb_fattr *fattr,
- unsigned char *name_buf)
-{
- char *result;
- unsigned int len = 0;
- int n;
- __u16 date, time;
- int unicode = (server->mnt->flags & SMB_MOUNT_UNICODE);
-
- /*
- * SMB doesn't have a concept of inode numbers ...
- */
- smb_init_dirent(server, fattr);
- fattr->f_ino = 0; /* FIXME: do we need this? */
-
- switch (level) {
- case 1:
- len = *((unsigned char *) p + 22);
- qname->name = p + 23;
- result = p + 24 + len;
-
- date = WVAL(p, 0);
- time = WVAL(p, 2);
- fattr->f_ctime.tv_sec = date_dos2unix(server, date, time);
- fattr->f_ctime.tv_nsec = 0;
-
- date = WVAL(p, 4);
- time = WVAL(p, 6);
- fattr->f_atime.tv_sec = date_dos2unix(server, date, time);
- fattr->f_atime.tv_nsec = 0;
-
- date = WVAL(p, 8);
- time = WVAL(p, 10);
- fattr->f_mtime.tv_sec = date_dos2unix(server, date, time);
- fattr->f_mtime.tv_nsec = 0;
- fattr->f_size = DVAL(p, 12);
- /* ULONG allocation size */
- fattr->attr = WVAL(p, 20);
-
- VERBOSE("info 1 at %p, len=%d, name=%.*s\n",
- p, len, len, qname->name);
- break;
- case 260:
- result = p + WVAL(p, 0);
- len = DVAL(p, 60);
- if (len > 255) len = 255;
- /* NT4 null terminates, unless we are using unicode ... */
- qname->name = p + 94;
- if (!unicode && len && qname->name[len-1] == '\0')
- len--;
-
- fattr->f_ctime = smb_ntutc2unixutc(LVAL(p, 8));
- fattr->f_atime = smb_ntutc2unixutc(LVAL(p, 16));
- fattr->f_mtime = smb_ntutc2unixutc(LVAL(p, 24));
- /* change time (32) */
- fattr->f_size = LVAL(p, 40);
- /* alloc size (48) */
- fattr->attr = DVAL(p, 56);
-
- VERBOSE("info 260 at %p, len=%d, name=%.*s\n",
- p, len, len, qname->name);
- break;
- case SMB_FIND_FILE_UNIX:
- result = p + WVAL(p, 0);
- qname->name = p + 108;
-
- len = strlen(qname->name);
- /* FIXME: should we check the length?? */
-
- p += 8;
- smb_decode_unix_basic(fattr, server, p);
- VERBOSE("info SMB_FIND_FILE_UNIX at %p, len=%d, name=%.*s\n",
- p, len, len, qname->name);
- break;
- default:
- PARANOIA("Unknown info level %d\n", level);
- result = p + WVAL(p, 0);
- goto out;
- }
-
- smb_finish_dirent(server, fattr);
-
-#if 0
- /* FIXME: These only work for ascii chars, and recent smbmount doesn't
- allow the flag to be set anyway. Remove? */
- switch (server->opt.case_handling) {
- case SMB_CASE_UPPER:
- str_upper(qname->name, len);
- break;
- case SMB_CASE_LOWER:
- str_lower(qname->name, len);
- break;
- default:
- break;
- }
-#endif
-
- qname->len = 0;
- n = server->ops->convert(name_buf, SMB_MAXNAMELEN,
- qname->name, len,
- server->remote_nls, server->local_nls);
- if (n > 0) {
- qname->len = n;
- qname->name = name_buf;
- }
-
-out:
- return result;
-}
-
-/* findfirst/findnext flags */
-#define SMB_CLOSE_AFTER_FIRST (1<<0)
-#define SMB_CLOSE_IF_END (1<<1)
-#define SMB_REQUIRE_RESUME_KEY (1<<2)
-#define SMB_CONTINUE_BIT (1<<3)
-
-/*
- * Note: samba-2.0.7 (at least) has a very similar routine, cli_list, in
- * source/libsmb/clilist.c. When looking for smb bugs in the readdir code,
- * go there for advise.
- *
- * Bugs Noted:
- * (1) When using Info Level 1 Win NT 4.0 truncates directory listings
- * for certain patterns of names and/or lengths. The breakage pattern
- * is completely reproducible and can be toggled by the creation of a
- * single file. (E.g. echo hi >foo breaks, rm -f foo works.)
- */
-static int
-smb_proc_readdir_long(struct file *filp, void *dirent, filldir_t filldir,
- struct smb_cache_control *ctl)
-{
- struct dentry *dir = filp->f_path.dentry;
- struct smb_sb_info *server = server_from_dentry(dir);
- struct qstr qname;
- struct smb_fattr fattr;
-
- unsigned char *p, *lastname;
- char *mask, *param;
- __u16 command;
- int first, entries_seen;
-
- /* Both NT and OS/2 accept info level 1 (but see note below). */
- int info_level = 260;
- const int max_matches = 512;
-
- unsigned int ff_searchcount = 0;
- unsigned int ff_eos = 0;
- unsigned int ff_lastname = 0;
- unsigned int ff_dir_handle = 0;
- unsigned int loop_count = 0;
- unsigned int mask_len, i;
- int result;
- struct smb_request *req;
- unsigned char *name_buf;
- static struct qstr star = {
- .name = "*",
- .len = 1,
- };
-
- lock_kernel();
-
- /*
- * We always prefer unix style. Use info level 1 for older
- * servers that don't do 260.
- */
- if (server->opt.capabilities & SMB_CAP_UNIX)
- info_level = SMB_FIND_FILE_UNIX;
- else if (server->opt.protocol < SMB_PROTOCOL_NT1)
- info_level = 1;
-
- result = -ENOMEM;
- if (! (name_buf = kmalloc(SMB_MAXNAMELEN+2, GFP_KERNEL)))
- goto out;
- if (! (req = smb_alloc_request(server, server->opt.max_xmit)))
- goto out_name;
- param = req->rq_buffer;
-
- /*
- * Encode the initial path
- */
- mask = param + 12;
-
- result = smb_encode_path(server, mask, SMB_MAXPATHLEN+1, dir, &star);
- if (result <= 0)
- goto out_free;
- mask_len = result - 1; /* mask_len is strlen, not #bytes */
- result = 0;
- first = 1;
- VERBOSE("starting mask_len=%d, mask=%s\n", mask_len, mask);
-
- entries_seen = 2;
- ff_eos = 0;
-
- while (ff_eos == 0) {
- loop_count += 1;
- if (loop_count > 10) {
- printk(KERN_WARNING "smb_proc_readdir_long: "
- "Looping in FIND_NEXT??\n");
- result = -EIO;
- break;
- }
-
- if (first != 0) {
- command = TRANSACT2_FINDFIRST;
- WSET(param, 0, aSYSTEM | aHIDDEN | aDIR);
- WSET(param, 2, max_matches); /* max count */
- WSET(param, 4, SMB_CLOSE_IF_END);
- WSET(param, 6, info_level);
- DSET(param, 8, 0);
- } else {
- command = TRANSACT2_FINDNEXT;
-
- VERBOSE("handle=0x%X, lastname=%d, mask=%.*s\n",
- ff_dir_handle, ff_lastname, mask_len, mask);
-
- WSET(param, 0, ff_dir_handle); /* search handle */
- WSET(param, 2, max_matches); /* max count */
- WSET(param, 4, info_level);
- DSET(param, 6, 0);
- WSET(param, 10, SMB_CONTINUE_BIT|SMB_CLOSE_IF_END);
- }
-
- req->rq_trans2_command = command;
- req->rq_ldata = 0;
- req->rq_data = NULL;
- req->rq_lparm = 12 + mask_len + 1;
- req->rq_parm = param;
- req->rq_flags = 0;
- result = smb_add_request(req);
- if (result < 0) {
- PARANOIA("error=%d, breaking\n", result);
- break;
- }
-
- if (req->rq_rcls == ERRSRV && req->rq_err == ERRerror) {
- /* a damn Win95 bug - sometimes it clags if you
- ask it too fast */
- schedule_timeout_interruptible(msecs_to_jiffies(200));
- continue;
- }
-
- if (req->rq_rcls != 0) {
- result = smb_errno(req);
- PARANOIA("name=%s, result=%d, rcls=%d, err=%d\n",
- mask, result, req->rq_rcls, req->rq_err);
- break;
- }
-
- /* parse out some important return info */
- if (first != 0) {
- ff_dir_handle = WVAL(req->rq_parm, 0);
- ff_searchcount = WVAL(req->rq_parm, 2);
- ff_eos = WVAL(req->rq_parm, 4);
- ff_lastname = WVAL(req->rq_parm, 8);
- } else {
- ff_searchcount = WVAL(req->rq_parm, 0);
- ff_eos = WVAL(req->rq_parm, 2);
- ff_lastname = WVAL(req->rq_parm, 6);
- }
-
- if (ff_searchcount == 0)
- break;
-
- /* Now we are ready to parse smb directory entries. */
-
- /* point to the data bytes */
- p = req->rq_data;
- for (i = 0; i < ff_searchcount; i++) {
- /* make sure we stay within the buffer */
- if (p >= req->rq_data + req->rq_ldata) {
- printk(KERN_ERR "smb_proc_readdir_long: "
- "dirent pointer outside buffer! "
- "%p %d@%p\n",
- p, req->rq_ldata, req->rq_data);
- result = -EIO; /* always a comm. error? */
- goto out_free;
- }
-
- p = smb_decode_long_dirent(server, p, info_level,
- &qname, &fattr, name_buf);
-
- /* ignore . and .. from the server */
- if (entries_seen == 2 && qname.name[0] == '.') {
- if (qname.len == 1)
- continue;
- if (qname.name[1] == '.' && qname.len == 2)
- continue;
- }
-
- if (!smb_fill_cache(filp, dirent, filldir, ctl,
- &qname, &fattr))
- ; /* stop reading? */
- entries_seen++;
- }
-
- VERBOSE("received %d entries, eos=%d\n", ff_searchcount,ff_eos);
-
- /*
- * We might need the lastname for continuations.
- *
- * Note that some servers (win95?) point to the filename and
- * others (NT4, Samba using NT1) to the dir entry. We assume
- * here that those who do not point to a filename do not need
- * this info to continue the listing.
- *
- * OS/2 needs this and talks infolevel 1.
- * NetApps want lastname with infolevel 260.
- * win2k want lastname with infolevel 260, and points to
- * the record not to the name.
- * Samba+CifsUnixExt doesn't need lastname.
- *
- * Both are happy if we return the data they point to. So we do.
- * (FIXME: above is not true with win2k)
- */
- mask_len = 0;
- if (info_level != SMB_FIND_FILE_UNIX &&
- ff_lastname > 0 && ff_lastname < req->rq_ldata) {
- lastname = req->rq_data + ff_lastname;
-
- switch (info_level) {
- case 260:
- mask_len = req->rq_ldata - ff_lastname;
- break;
- case 1:
- /* lastname points to a length byte */
- mask_len = *lastname++;
- if (ff_lastname + 1 + mask_len > req->rq_ldata)
- mask_len = req->rq_ldata - ff_lastname - 1;
- break;
- }
-
- /*
- * Update the mask string for the next message.
- */
- if (mask_len > 255)
- mask_len = 255;
- if (mask_len)
- strncpy(mask, lastname, mask_len);
- }
- mask_len = strnlen(mask, mask_len);
- VERBOSE("new mask, len=%d@%d of %d, mask=%.*s\n",
- mask_len, ff_lastname, req->rq_ldata, mask_len, mask);
-
- first = 0;
- loop_count = 0;
- }
-
-out_free:
- smb_rput(req);
-out_name:
- kfree(name_buf);
-out:
- unlock_kernel();
- return result;
-}
-
-/*
- * This version uses the trans2 TRANSACT2_FINDFIRST message
- * to get the attribute data.
- *
- * Bugs Noted:
- */
-static int
-smb_proc_getattr_ff(struct smb_sb_info *server, struct dentry *dentry,
- struct smb_fattr *fattr)
-{
- char *param, *mask;
- __u16 date, time;
- int mask_len, result;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
- param = req->rq_buffer;
- mask = param + 12;
-
- mask_len = smb_encode_path(server, mask, SMB_MAXPATHLEN+1, dentry,NULL);
- if (mask_len < 0) {
- result = mask_len;
- goto out_free;
- }
- VERBOSE("name=%s, len=%d\n", mask, mask_len);
- WSET(param, 0, aSYSTEM | aHIDDEN | aDIR);
- WSET(param, 2, 1); /* max count */
- WSET(param, 4, 1); /* close after this call */
- WSET(param, 6, 1); /* info_level */
- DSET(param, 8, 0);
-
- req->rq_trans2_command = TRANSACT2_FINDFIRST;
- req->rq_ldata = 0;
- req->rq_data = NULL;
- req->rq_lparm = 12 + mask_len;
- req->rq_parm = param;
- req->rq_flags = 0;
- result = smb_add_request(req);
- if (result < 0)
- goto out_free;
- if (req->rq_rcls != 0) {
- result = smb_errno(req);
-#ifdef SMBFS_PARANOIA
- if (result != -ENOENT)
- PARANOIA("error for %s, rcls=%d, err=%d\n",
- mask, req->rq_rcls, req->rq_err);
-#endif
- goto out_free;
- }
- /* Make sure we got enough data ... */
- result = -EINVAL;
- if (req->rq_ldata < 22 || WVAL(req->rq_parm, 2) != 1) {
- PARANOIA("bad result for %s, len=%d, count=%d\n",
- mask, req->rq_ldata, WVAL(req->rq_parm, 2));
- goto out_free;
- }
-
- /*
- * Decode the response into the fattr ...
- */
- date = WVAL(req->rq_data, 0);
- time = WVAL(req->rq_data, 2);
- fattr->f_ctime.tv_sec = date_dos2unix(server, date, time);
- fattr->f_ctime.tv_nsec = 0;
-
- date = WVAL(req->rq_data, 4);
- time = WVAL(req->rq_data, 6);
- fattr->f_atime.tv_sec = date_dos2unix(server, date, time);
- fattr->f_atime.tv_nsec = 0;
-
- date = WVAL(req->rq_data, 8);
- time = WVAL(req->rq_data, 10);
- fattr->f_mtime.tv_sec = date_dos2unix(server, date, time);
- fattr->f_mtime.tv_nsec = 0;
- VERBOSE("name=%s, date=%x, time=%x, mtime=%ld\n",
- mask, date, time, fattr->f_mtime.tv_sec);
- fattr->f_size = DVAL(req->rq_data, 12);
- /* ULONG allocation size */
- fattr->attr = WVAL(req->rq_data, 20);
- result = 0;
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-static int
-smb_proc_getattr_core(struct smb_sb_info *server, struct dentry *dir,
- struct smb_fattr *fattr)
-{
- int result;
- char *p;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
-
- p = smb_setup_header(req, SMBgetatr, 0, 0);
- result = smb_simple_encode_path(req, &p, dir, NULL);
- if (result < 0)
- goto out_free;
- smb_setup_bcc(req, p);
-
- if ((result = smb_request_ok(req, SMBgetatr, 10, 0)) < 0)
- goto out_free;
- fattr->attr = WVAL(req->rq_header, smb_vwv0);
- fattr->f_mtime.tv_sec = local2utc(server, DVAL(req->rq_header, smb_vwv1));
- fattr->f_mtime.tv_nsec = 0;
- fattr->f_size = DVAL(req->rq_header, smb_vwv3);
- fattr->f_ctime = fattr->f_mtime;
- fattr->f_atime = fattr->f_mtime;
-#ifdef SMBFS_DEBUG_TIMESTAMP
- printk("getattr_core: %s/%s, mtime=%ld\n",
- DENTRY_PATH(dir), fattr->f_mtime);
-#endif
- result = 0;
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-/*
- * Bugs Noted:
- * (1) Win 95 swaps the date and time fields in the standard info level.
- */
-static int
-smb_proc_getattr_trans2(struct smb_sb_info *server, struct dentry *dir,
- struct smb_request *req, int infolevel)
-{
- char *p, *param;
- int result;
-
- param = req->rq_buffer;
- WSET(param, 0, infolevel);
- DSET(param, 2, 0);
- result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, dir, NULL);
- if (result < 0)
- goto out;
- p = param + 6 + result;
-
- req->rq_trans2_command = TRANSACT2_QPATHINFO;
- req->rq_ldata = 0;
- req->rq_data = NULL;
- req->rq_lparm = p - param;
- req->rq_parm = param;
- req->rq_flags = 0;
- result = smb_add_request(req);
- if (result < 0)
- goto out;
- if (req->rq_rcls != 0) {
- VERBOSE("for %s: result=%d, rcls=%d, err=%d\n",
- &param[6], result, req->rq_rcls, req->rq_err);
- result = smb_errno(req);
- goto out;
- }
- result = -ENOENT;
- if (req->rq_ldata < 22) {
- PARANOIA("not enough data for %s, len=%d\n",
- &param[6], req->rq_ldata);
- goto out;
- }
-
- result = 0;
-out:
- return result;
-}
-
-static int
-smb_proc_getattr_trans2_std(struct smb_sb_info *server, struct dentry *dir,
- struct smb_fattr *attr)
-{
- u16 date, time;
- int off_date = 0, off_time = 2;
- int result;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
-
- result = smb_proc_getattr_trans2(server, dir, req, SMB_INFO_STANDARD);
- if (result < 0)
- goto out_free;
-
- /*
- * Kludge alert: Win 95 swaps the date and time field,
- * contrary to the CIFS docs and Win NT practice.
- */
- if (server->mnt->flags & SMB_MOUNT_WIN95) {
- off_date = 2;
- off_time = 0;
- }
- date = WVAL(req->rq_data, off_date);
- time = WVAL(req->rq_data, off_time);
- attr->f_ctime.tv_sec = date_dos2unix(server, date, time);
- attr->f_ctime.tv_nsec = 0;
-
- date = WVAL(req->rq_data, 4 + off_date);
- time = WVAL(req->rq_data, 4 + off_time);
- attr->f_atime.tv_sec = date_dos2unix(server, date, time);
- attr->f_atime.tv_nsec = 0;
-
- date = WVAL(req->rq_data, 8 + off_date);
- time = WVAL(req->rq_data, 8 + off_time);
- attr->f_mtime.tv_sec = date_dos2unix(server, date, time);
- attr->f_mtime.tv_nsec = 0;
-#ifdef SMBFS_DEBUG_TIMESTAMP
- printk(KERN_DEBUG "getattr_trans2: %s/%s, date=%x, time=%x, mtime=%ld\n",
- DENTRY_PATH(dir), date, time, attr->f_mtime);
-#endif
- attr->f_size = DVAL(req->rq_data, 12);
- attr->attr = WVAL(req->rq_data, 20);
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-static int
-smb_proc_getattr_trans2_all(struct smb_sb_info *server, struct dentry *dir,
- struct smb_fattr *attr)
-{
- struct smb_request *req;
- int result;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
-
- result = smb_proc_getattr_trans2(server, dir, req,
- SMB_QUERY_FILE_ALL_INFO);
- if (result < 0)
- goto out_free;
-
- attr->f_ctime = smb_ntutc2unixutc(LVAL(req->rq_data, 0));
- attr->f_atime = smb_ntutc2unixutc(LVAL(req->rq_data, 8));
- attr->f_mtime = smb_ntutc2unixutc(LVAL(req->rq_data, 16));
- /* change (24) */
- attr->attr = WVAL(req->rq_data, 32);
- /* pad? (34) */
- /* allocated size (40) */
- attr->f_size = LVAL(req->rq_data, 48);
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-static int
-smb_proc_getattr_unix(struct smb_sb_info *server, struct dentry *dir,
- struct smb_fattr *attr)
-{
- struct smb_request *req;
- int result;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
-
- result = smb_proc_getattr_trans2(server, dir, req,
- SMB_QUERY_FILE_UNIX_BASIC);
- if (result < 0)
- goto out_free;
-
- smb_decode_unix_basic(attr, server, req->rq_data);
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-static int
-smb_proc_getattr_95(struct smb_sb_info *server, struct dentry *dir,
- struct smb_fattr *attr)
-{
- struct inode *inode = dir->d_inode;
- int result;
-
- /* FIXME: why not use the "all" version? */
- result = smb_proc_getattr_trans2_std(server, dir, attr);
- if (result < 0)
- goto out;
-
- /*
- * None of the getattr versions here can make win9x return the right
- * filesize if there are changes made to an open file.
- * A seek-to-end does return the right size, but we only need to do
- * that on files we have written.
- */
- if (inode && SMB_I(inode)->flags & SMB_F_LOCALWRITE &&
- smb_is_open(inode))
- {
- __u16 fileid = SMB_I(inode)->fileid;
- attr->f_size = smb_proc_seek(server, fileid, 2, 0);
- }
-
-out:
- return result;
-}
-
-static int
-smb_proc_ops_wait(struct smb_sb_info *server)
-{
- int result;
-
- result = wait_event_interruptible_timeout(server->conn_wq,
- server->conn_complete, 30*HZ);
-
- if (!result || signal_pending(current))
- return -EIO;
-
- return 0;
-}
-
-static int
-smb_proc_getattr_null(struct smb_sb_info *server, struct dentry *dir,
- struct smb_fattr *fattr)
-{
- int result;
-
- if (smb_proc_ops_wait(server) < 0)
- return -EIO;
-
- smb_init_dirent(server, fattr);
- result = server->ops->getattr(server, dir, fattr);
- smb_finish_dirent(server, fattr);
-
- return result;
-}
-
-static int
-smb_proc_readdir_null(struct file *filp, void *dirent, filldir_t filldir,
- struct smb_cache_control *ctl)
-{
- struct smb_sb_info *server = server_from_dentry(filp->f_path.dentry);
-
- if (smb_proc_ops_wait(server) < 0)
- return -EIO;
-
- return server->ops->readdir(filp, dirent, filldir, ctl);
-}
-
-int
-smb_proc_getattr(struct dentry *dir, struct smb_fattr *fattr)
-{
- struct smb_sb_info *server = server_from_dentry(dir);
- int result;
-
- smb_init_dirent(server, fattr);
- result = server->ops->getattr(server, dir, fattr);
- smb_finish_dirent(server, fattr);
-
- return result;
-}
-
-
-/*
- * Because of bugs in the core protocol, we use this only to set
- * attributes. See smb_proc_settime() below for timestamp handling.
- *
- * Bugs Noted:
- * (1) If mtime is non-zero, both Win 3.1 and Win 95 fail
- * with an undocumented error (ERRDOS code 50). Setting
- * mtime to 0 allows the attributes to be set.
- * (2) The extra parameters following the name string aren't
- * in the CIFS docs, but seem to be necessary for operation.
- */
-static int
-smb_proc_setattr_core(struct smb_sb_info *server, struct dentry *dentry,
- __u16 attr)
-{
- char *p;
- int result;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
-
- p = smb_setup_header(req, SMBsetatr, 8, 0);
- WSET(req->rq_header, smb_vwv0, attr);
- DSET(req->rq_header, smb_vwv1, 0); /* mtime */
- WSET(req->rq_header, smb_vwv3, 0); /* reserved values */
- WSET(req->rq_header, smb_vwv4, 0);
- WSET(req->rq_header, smb_vwv5, 0);
- WSET(req->rq_header, smb_vwv6, 0);
- WSET(req->rq_header, smb_vwv7, 0);
- result = smb_simple_encode_path(req, &p, dentry, NULL);
- if (result < 0)
- goto out_free;
- if (p + 2 > (char *)req->rq_buffer + req->rq_bufsize) {
- result = -ENAMETOOLONG;
- goto out_free;
- }
- *p++ = 4;
- *p++ = 0;
- smb_setup_bcc(req, p);
-
- result = smb_request_ok(req, SMBsetatr, 0, 0);
- if (result < 0)
- goto out_free;
- result = 0;
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-/*
- * Because of bugs in the trans2 setattr messages, we must set
- * attributes and timestamps separately. The core SMBsetatr
- * message seems to be the only reliable way to set attributes.
- */
-int
-smb_proc_setattr(struct dentry *dir, struct smb_fattr *fattr)
-{
- struct smb_sb_info *server = server_from_dentry(dir);
- int result;
-
- VERBOSE("setting %s/%s, open=%d\n",
- DENTRY_PATH(dir), smb_is_open(dir->d_inode));
- result = smb_proc_setattr_core(server, dir, fattr->attr);
- return result;
-}
-
-/*
- * Sets the timestamps for an file open with write permissions.
- */
-static int
-smb_proc_setattr_ext(struct smb_sb_info *server,
- struct inode *inode, struct smb_fattr *fattr)
-{
- __u16 date, time;
- int result;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, 0)))
- goto out;
-
- smb_setup_header(req, SMBsetattrE, 7, 0);
- WSET(req->rq_header, smb_vwv0, SMB_I(inode)->fileid);
- /* We don't change the creation time */
- WSET(req->rq_header, smb_vwv1, 0);
- WSET(req->rq_header, smb_vwv2, 0);
- date_unix2dos(server, fattr->f_atime.tv_sec, &date, &time);
- WSET(req->rq_header, smb_vwv3, date);
- WSET(req->rq_header, smb_vwv4, time);
- date_unix2dos(server, fattr->f_mtime.tv_sec, &date, &time);
- WSET(req->rq_header, smb_vwv5, date);
- WSET(req->rq_header, smb_vwv6, time);
-#ifdef SMBFS_DEBUG_TIMESTAMP
- printk(KERN_DEBUG "smb_proc_setattr_ext: date=%d, time=%d, mtime=%ld\n",
- date, time, fattr->f_mtime);
-#endif
-
- req->rq_flags |= SMB_REQ_NORETRY;
- result = smb_request_ok(req, SMBsetattrE, 0, 0);
- if (result < 0)
- goto out_free;
- result = 0;
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-/*
- * Bugs Noted:
- * (1) The TRANSACT2_SETPATHINFO message under Win NT 4.0 doesn't
- * set the file's attribute flags.
- */
-static int
-smb_proc_setattr_trans2(struct smb_sb_info *server,
- struct dentry *dir, struct smb_fattr *fattr)
-{
- __u16 date, time;
- char *p, *param;
- int result;
- char data[26];
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
- param = req->rq_buffer;
-
- WSET(param, 0, 1); /* Info level SMB_INFO_STANDARD */
- DSET(param, 2, 0);
- result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, dir, NULL);
- if (result < 0)
- goto out_free;
- p = param + 6 + result;
-
- WSET(data, 0, 0); /* creation time */
- WSET(data, 2, 0);
- date_unix2dos(server, fattr->f_atime.tv_sec, &date, &time);
- WSET(data, 4, date);
- WSET(data, 6, time);
- date_unix2dos(server, fattr->f_mtime.tv_sec, &date, &time);
- WSET(data, 8, date);
- WSET(data, 10, time);
-#ifdef SMBFS_DEBUG_TIMESTAMP
- printk(KERN_DEBUG "setattr_trans2: %s/%s, date=%x, time=%x, mtime=%ld\n",
- DENTRY_PATH(dir), date, time, fattr->f_mtime);
-#endif
- DSET(data, 12, 0); /* size */
- DSET(data, 16, 0); /* blksize */
- WSET(data, 20, 0); /* attr */
- DSET(data, 22, 0); /* ULONG EA size */
-
- req->rq_trans2_command = TRANSACT2_SETPATHINFO;
- req->rq_ldata = 26;
- req->rq_data = data;
- req->rq_lparm = p - param;
- req->rq_parm = param;
- req->rq_flags = 0;
- result = smb_add_request(req);
- if (result < 0)
- goto out_free;
- result = 0;
- if (req->rq_rcls != 0)
- result = smb_errno(req);
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-/*
- * ATTR_MODE 0x001
- * ATTR_UID 0x002
- * ATTR_GID 0x004
- * ATTR_SIZE 0x008
- * ATTR_ATIME 0x010
- * ATTR_MTIME 0x020
- * ATTR_CTIME 0x040
- * ATTR_ATIME_SET 0x080
- * ATTR_MTIME_SET 0x100
- * ATTR_FORCE 0x200
- * ATTR_ATTR_FLAG 0x400
- *
- * major/minor should only be set by mknod.
- */
-int
-smb_proc_setattr_unix(struct dentry *d, struct iattr *attr,
- unsigned int major, unsigned int minor)
-{
- struct smb_sb_info *server = server_from_dentry(d);
- u64 nttime;
- char *p, *param;
- int result;
- char data[100];
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
- param = req->rq_buffer;
-
- DEBUG1("valid flags = 0x%04x\n", attr->ia_valid);
-
- WSET(param, 0, SMB_SET_FILE_UNIX_BASIC);
- DSET(param, 2, 0);
- result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, d, NULL);
- if (result < 0)
- goto out_free;
- p = param + 6 + result;
-
- /* 0 L file size in bytes */
- /* 8 L file size on disk in bytes (block count) */
- /* 40 L uid */
- /* 48 L gid */
- /* 56 W file type enum */
- /* 60 L devmajor */
- /* 68 L devminor */
- /* 76 L unique ID (inode) */
- /* 84 L permissions */
- /* 92 L link count */
- LSET(data, 0, SMB_SIZE_NO_CHANGE);
- LSET(data, 8, SMB_SIZE_NO_CHANGE);
- LSET(data, 16, SMB_TIME_NO_CHANGE);
- LSET(data, 24, SMB_TIME_NO_CHANGE);
- LSET(data, 32, SMB_TIME_NO_CHANGE);
- LSET(data, 40, SMB_UID_NO_CHANGE);
- LSET(data, 48, SMB_GID_NO_CHANGE);
- DSET(data, 56, smb_filetype_from_mode(attr->ia_mode));
- LSET(data, 60, major);
- LSET(data, 68, minor);
- LSET(data, 76, 0);
- LSET(data, 84, SMB_MODE_NO_CHANGE);
- LSET(data, 92, 0);
-
- if (attr->ia_valid & ATTR_SIZE) {
- LSET(data, 0, attr->ia_size);
- LSET(data, 8, 0); /* can't set anyway */
- }
-
- /*
- * FIXME: check the conversion function it the correct one
- *
- * we can't set ctime but we might as well pass this to the server
- * and let it ignore it.
- */
- if (attr->ia_valid & ATTR_CTIME) {
- nttime = smb_unixutc2ntutc(attr->ia_ctime);
- LSET(data, 16, nttime);
- }
- if (attr->ia_valid & ATTR_ATIME) {
- nttime = smb_unixutc2ntutc(attr->ia_atime);
- LSET(data, 24, nttime);
- }
- if (attr->ia_valid & ATTR_MTIME) {
- nttime = smb_unixutc2ntutc(attr->ia_mtime);
- LSET(data, 32, nttime);
- }
-
- if (attr->ia_valid & ATTR_UID) {
- LSET(data, 40, attr->ia_uid);
- }
- if (attr->ia_valid & ATTR_GID) {
- LSET(data, 48, attr->ia_gid);
- }
-
- if (attr->ia_valid & ATTR_MODE) {
- LSET(data, 84, attr->ia_mode);
- }
-
- req->rq_trans2_command = TRANSACT2_SETPATHINFO;
- req->rq_ldata = 100;
- req->rq_data = data;
- req->rq_lparm = p - param;
- req->rq_parm = param;
- req->rq_flags = 0;
- result = smb_add_request(req);
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-
-/*
- * Set the modify and access timestamps for a file.
- *
- * Incredibly enough, in all of SMB there is no message to allow
- * setting both attributes and timestamps at once.
- *
- * Bugs Noted:
- * (1) Win 95 doesn't support the TRANSACT2_SETFILEINFO message
- * with info level 1 (INFO_STANDARD).
- * (2) Win 95 seems not to support setting directory timestamps.
- * (3) Under the core protocol apparently the only way to set the
- * timestamp is to open and close the file.
- */
-int
-smb_proc_settime(struct dentry *dentry, struct smb_fattr *fattr)
-{
- struct smb_sb_info *server = server_from_dentry(dentry);
- struct inode *inode = dentry->d_inode;
- int result;
-
- VERBOSE("setting %s/%s, open=%d\n",
- DENTRY_PATH(dentry), smb_is_open(inode));
-
- /* setting the time on a Win95 server fails (tridge) */
- if (server->opt.protocol >= SMB_PROTOCOL_LANMAN2 &&
- !(server->mnt->flags & SMB_MOUNT_WIN95)) {
- if (smb_is_open(inode) && SMB_I(inode)->access != SMB_O_RDONLY)
- result = smb_proc_setattr_ext(server, inode, fattr);
- else
- result = smb_proc_setattr_trans2(server, dentry, fattr);
- } else {
- /*
- * Fail silently on directories ... timestamp can't be set?
- */
- result = 0;
- if (S_ISREG(inode->i_mode)) {
- /*
- * Set the mtime by opening and closing the file.
- * Note that the file is opened read-only, but this
- * still allows us to set the date (tridge)
- */
- result = -EACCES;
- if (!smb_is_open(inode))
- smb_proc_open(server, dentry, SMB_O_RDONLY);
- if (smb_is_open(inode)) {
- inode->i_mtime = fattr->f_mtime;
- result = smb_proc_close_inode(server, inode);
- }
- }
- }
-
- return result;
-}
-
-int
-smb_proc_dskattr(struct dentry *dentry, struct kstatfs *attr)
-{
- struct smb_sb_info *server = SMB_SB(dentry->d_sb);
- int result;
- char *p;
- long unit;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, 0)))
- goto out;
-
- smb_setup_header(req, SMBdskattr, 0, 0);
- if ((result = smb_request_ok(req, SMBdskattr, 5, 0)) < 0)
- goto out_free;
- p = SMB_VWV(req->rq_header);
- unit = (WVAL(p, 2) * WVAL(p, 4)) >> SMB_ST_BLKSHIFT;
- attr->f_blocks = WVAL(p, 0) * unit;
- attr->f_bsize = SMB_ST_BLKSIZE;
- attr->f_bavail = attr->f_bfree = WVAL(p, 6) * unit;
- result = 0;
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-int
-smb_proc_read_link(struct smb_sb_info *server, struct dentry *d,
- char *buffer, int len)
-{
- char *p, *param;
- int result;
- struct smb_request *req;
-
- DEBUG1("readlink of %s/%s\n", DENTRY_PATH(d));
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
- param = req->rq_buffer;
-
- WSET(param, 0, SMB_QUERY_FILE_UNIX_LINK);
- DSET(param, 2, 0);
- result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, d, NULL);
- if (result < 0)
- goto out_free;
- p = param + 6 + result;
-
- req->rq_trans2_command = TRANSACT2_QPATHINFO;
- req->rq_ldata = 0;
- req->rq_data = NULL;
- req->rq_lparm = p - param;
- req->rq_parm = param;
- req->rq_flags = 0;
- result = smb_add_request(req);
- if (result < 0)
- goto out_free;
- DEBUG1("for %s: result=%d, rcls=%d, err=%d\n",
- &param[6], result, req->rq_rcls, req->rq_err);
-
- /* copy data up to the \0 or buffer length */
- result = len;
- if (req->rq_ldata < len)
- result = req->rq_ldata;
- strncpy(buffer, req->rq_data, result);
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-
-/*
- * Create a symlink object called dentry which points to oldpath.
- * Samba does not permit dangling links but returns a suitable error message.
- */
-int
-smb_proc_symlink(struct smb_sb_info *server, struct dentry *d,
- const char *oldpath)
-{
- char *p, *param;
- int result;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
- param = req->rq_buffer;
-
- WSET(param, 0, SMB_SET_FILE_UNIX_LINK);
- DSET(param, 2, 0);
- result = smb_encode_path(server, param + 6, SMB_MAXPATHLEN+1, d, NULL);
- if (result < 0)
- goto out_free;
- p = param + 6 + result;
-
- req->rq_trans2_command = TRANSACT2_SETPATHINFO;
- req->rq_ldata = strlen(oldpath) + 1;
- req->rq_data = (char *) oldpath;
- req->rq_lparm = p - param;
- req->rq_parm = param;
- req->rq_flags = 0;
- result = smb_add_request(req);
- if (result < 0)
- goto out_free;
-
- DEBUG1("for %s: result=%d, rcls=%d, err=%d\n",
- &param[6], result, req->rq_rcls, req->rq_err);
- result = 0;
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-/*
- * Create a hard link object called new_dentry which points to dentry.
- */
-int
-smb_proc_link(struct smb_sb_info *server, struct dentry *dentry,
- struct dentry *new_dentry)
-{
- char *p, *param;
- int result;
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, PAGE_SIZE)))
- goto out;
- param = req->rq_buffer;
-
- WSET(param, 0, SMB_SET_FILE_UNIX_HLINK);
- DSET(param, 2, 0);
- result = smb_encode_path(server, param + 6, SMB_MAXPATHLEN+1,
- new_dentry, NULL);
- if (result < 0)
- goto out_free;
- p = param + 6 + result;
-
- /* Grr, pointless separation of parameters and data ... */
- req->rq_data = p;
- req->rq_ldata = smb_encode_path(server, p, SMB_MAXPATHLEN+1,
- dentry, NULL);
-
- req->rq_trans2_command = TRANSACT2_SETPATHINFO;
- req->rq_lparm = p - param;
- req->rq_parm = param;
- req->rq_flags = 0;
- result = smb_add_request(req);
- if (result < 0)
- goto out_free;
-
- DEBUG1("for %s: result=%d, rcls=%d, err=%d\n",
- &param[6], result, req->rq_rcls, req->rq_err);
- result = 0;
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-static int
-smb_proc_query_cifsunix(struct smb_sb_info *server)
-{
- int result;
- int major, minor;
- u64 caps;
- char param[2];
- struct smb_request *req;
-
- result = -ENOMEM;
- if (! (req = smb_alloc_request(server, 100)))
- goto out;
-
- WSET(param, 0, SMB_QUERY_CIFS_UNIX_INFO);
-
- req->rq_trans2_command = TRANSACT2_QFSINFO;
- req->rq_ldata = 0;
- req->rq_data = NULL;
- req->rq_lparm = 2;
- req->rq_parm = param;
- req->rq_flags = 0;
- result = smb_add_request(req);
- if (result < 0)
- goto out_free;
-
- if (req->rq_ldata < 12) {
- PARANOIA("Not enough data\n");
- goto out_free;
- }
- major = WVAL(req->rq_data, 0);
- minor = WVAL(req->rq_data, 2);
-
- DEBUG1("Server implements CIFS Extensions for UNIX systems v%d.%d\n",
- major, minor);
- /* FIXME: verify that we are ok with this major/minor? */
-
- caps = LVAL(req->rq_data, 4);
- DEBUG1("Server capabilities 0x%016llx\n", caps);
-
-out_free:
- smb_rput(req);
-out:
- return result;
-}
-
-
-static void
-install_ops(struct smb_ops *dst, struct smb_ops *src)
-{
- memcpy(dst, src, sizeof(void *) * SMB_OPS_NUM_STATIC);
-}
-
-/* < LANMAN2 */
-static struct smb_ops smb_ops_core =
-{
- .read = smb_proc_read,
- .write = smb_proc_write,
- .readdir = smb_proc_readdir_short,
- .getattr = smb_proc_getattr_core,
- .truncate = smb_proc_trunc32,
-};
-
-/* LANMAN2, OS/2, others? */
-static struct smb_ops smb_ops_os2 =
-{
- .read = smb_proc_read,
- .write = smb_proc_write,
- .readdir = smb_proc_readdir_long,
- .getattr = smb_proc_getattr_trans2_std,
- .truncate = smb_proc_trunc32,
-};
-
-/* Win95, and possibly some NetApp versions too */
-static struct smb_ops smb_ops_win95 =
-{
- .read = smb_proc_read, /* does not support 12word readX */
- .write = smb_proc_write,
- .readdir = smb_proc_readdir_long,
- .getattr = smb_proc_getattr_95,
- .truncate = smb_proc_trunc95,
-};
-
-/* Samba, NT4 and NT5 */
-static struct smb_ops smb_ops_winNT =
-{
- .read = smb_proc_readX,
- .write = smb_proc_writeX,
- .readdir = smb_proc_readdir_long,
- .getattr = smb_proc_getattr_trans2_all,
- .truncate = smb_proc_trunc64,
-};
-
-/* Samba w/ unix extensions. Others? */
-static struct smb_ops smb_ops_unix =
-{
- .read = smb_proc_readX,
- .write = smb_proc_writeX,
- .readdir = smb_proc_readdir_long,
- .getattr = smb_proc_getattr_unix,
- /* FIXME: core/ext/time setattr needs to be cleaned up! */
- /* .setattr = smb_proc_setattr_unix, */
- .truncate = smb_proc_trunc64,
-};
-
-/* Place holder until real ops are in place */
-static struct smb_ops smb_ops_null =
-{
- .readdir = smb_proc_readdir_null,
- .getattr = smb_proc_getattr_null,
-};
-
-void smb_install_null_ops(struct smb_ops *ops)
-{
- install_ops(ops, &smb_ops_null);
-}
diff --git a/fs/smbfs/proto.h b/fs/smbfs/proto.h
deleted file mode 100644
index 05939a6f43e6..000000000000
--- a/fs/smbfs/proto.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Autogenerated with cproto on: Sat Sep 13 17:18:51 CEST 2003
- */
-
-struct smb_request;
-struct sock;
-struct statfs;
-
-/* proc.c */
-extern int smb_setcodepage(struct smb_sb_info *server, struct smb_nls_codepage *cp);
-extern __u32 smb_len(__u8 *p);
-extern int smb_get_rsize(struct smb_sb_info *server);
-extern int smb_get_wsize(struct smb_sb_info *server);
-extern int smb_errno(struct smb_request *req);
-extern int smb_newconn(struct smb_sb_info *server, struct smb_conn_opt *opt);
-extern __u8 *smb_setup_header(struct smb_request *req, __u8 command, __u16 wct, __u16 bcc);
-extern int smb_open(struct dentry *dentry, int wish);
-extern int smb_close(struct inode *ino);
-extern int smb_close_fileid(struct dentry *dentry, __u16 fileid);
-extern int smb_proc_create(struct dentry *dentry, __u16 attr, time_t ctime, __u16 *fileid);
-extern int smb_proc_mv(struct dentry *old_dentry, struct dentry *new_dentry);
-extern int smb_proc_mkdir(struct dentry *dentry);
-extern int smb_proc_rmdir(struct dentry *dentry);
-extern int smb_proc_unlink(struct dentry *dentry);
-extern int smb_proc_flush(struct smb_sb_info *server, __u16 fileid);
-extern void smb_init_root_dirent(struct smb_sb_info *server, struct smb_fattr *fattr,
- struct super_block *sb);
-extern int smb_proc_getattr(struct dentry *dir, struct smb_fattr *fattr);
-extern int smb_proc_setattr(struct dentry *dir, struct smb_fattr *fattr);
-extern int smb_proc_setattr_unix(struct dentry *d, struct iattr *attr, unsigned int major, unsigned int minor);
-extern int smb_proc_settime(struct dentry *dentry, struct smb_fattr *fattr);
-extern int smb_proc_dskattr(struct dentry *dentry, struct kstatfs *attr);
-extern int smb_proc_read_link(struct smb_sb_info *server, struct dentry *d, char *buffer, int len);
-extern int smb_proc_symlink(struct smb_sb_info *server, struct dentry *d, const char *oldpath);
-extern int smb_proc_link(struct smb_sb_info *server, struct dentry *dentry, struct dentry *new_dentry);
-extern void smb_install_null_ops(struct smb_ops *ops);
-/* dir.c */
-extern const struct file_operations smb_dir_operations;
-extern const struct inode_operations smb_dir_inode_operations;
-extern const struct inode_operations smb_dir_inode_operations_unix;
-extern void smb_new_dentry(struct dentry *dentry);
-extern void smb_renew_times(struct dentry *dentry);
-/* cache.c */
-extern void smb_invalid_dir_cache(struct inode *dir);
-extern void smb_invalidate_dircache_entries(struct dentry *parent);
-extern struct dentry *smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos);
-extern int smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir, struct smb_cache_control *ctrl, struct qstr *qname, struct smb_fattr *entry);
-/* sock.c */
-extern void smb_data_ready(struct sock *sk, int len);
-extern int smb_valid_socket(struct inode *inode);
-extern void smb_close_socket(struct smb_sb_info *server);
-extern int smb_recv_available(struct smb_sb_info *server);
-extern int smb_receive_header(struct smb_sb_info *server);
-extern int smb_receive_drop(struct smb_sb_info *server);
-extern int smb_receive(struct smb_sb_info *server, struct smb_request *req);
-extern int smb_send_request(struct smb_request *req);
-/* inode.c */
-extern struct inode *smb_iget(struct super_block *sb, struct smb_fattr *fattr);
-extern void smb_get_inode_attr(struct inode *inode, struct smb_fattr *fattr);
-extern void smb_set_inode_attr(struct inode *inode, struct smb_fattr *fattr);
-extern void smb_invalidate_inodes(struct smb_sb_info *server);
-extern int smb_revalidate_inode(struct dentry *dentry);
-extern int smb_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
-extern int smb_notify_change(struct dentry *dentry, struct iattr *attr);
-/* file.c */
-extern const struct address_space_operations smb_file_aops;
-extern const struct file_operations smb_file_operations;
-extern const struct inode_operations smb_file_inode_operations;
-/* ioctl.c */
-extern long smb_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
-/* smbiod.c */
-extern void smbiod_wake_up(void);
-extern int smbiod_register_server(struct smb_sb_info *server);
-extern void smbiod_unregister_server(struct smb_sb_info *server);
-extern void smbiod_flush(struct smb_sb_info *server);
-extern int smbiod_retry(struct smb_sb_info *server);
-/* request.c */
-extern int smb_init_request_cache(void);
-extern void smb_destroy_request_cache(void);
-extern struct smb_request *smb_alloc_request(struct smb_sb_info *server, int bufsize);
-extern void smb_rput(struct smb_request *req);
-extern int smb_add_request(struct smb_request *req);
-extern int smb_request_send_server(struct smb_sb_info *server);
-extern int smb_request_recv(struct smb_sb_info *server);
-/* symlink.c */
-extern int smb_symlink(struct inode *inode, struct dentry *dentry, const char *oldname);
-extern const struct inode_operations smb_link_inode_operations;
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
deleted file mode 100644
index 45f45933e862..000000000000
--- a/fs/smbfs/request.c
+++ /dev/null
@@ -1,818 +0,0 @@
-/*
- * request.c
- *
- * Copyright (C) 2001 by Urban Widmark
- *
- * Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/net.h>
-#include <linux/sched.h>
-
-#include <linux/smb_fs.h>
-#include <linux/smbno.h>
-#include <linux/smb_mount.h>
-
-#include "smb_debug.h"
-#include "request.h"
-#include "proto.h"
-
-/* #define SMB_SLAB_DEBUG (SLAB_RED_ZONE | SLAB_POISON) */
-#define SMB_SLAB_DEBUG 0
-
-/* cache for request structures */
-static struct kmem_cache *req_cachep;
-
-static int smb_request_send_req(struct smb_request *req);
-
-/*
- /proc/slabinfo:
- name, active, num, objsize, active_slabs, num_slaps, #pages
-*/
-
-
-int smb_init_request_cache(void)
-{
- req_cachep = kmem_cache_create("smb_request",
- sizeof(struct smb_request), 0,
- SMB_SLAB_DEBUG | SLAB_HWCACHE_ALIGN,
- NULL);
- if (req_cachep == NULL)
- return -ENOMEM;
-
- return 0;
-}
-
-void smb_destroy_request_cache(void)
-{
- kmem_cache_destroy(req_cachep);
-}
-
-/*
- * Allocate and initialise a request structure
- */
-static struct smb_request *smb_do_alloc_request(struct smb_sb_info *server,
- int bufsize)
-{
- struct smb_request *req;
- unsigned char *buf = NULL;
-
- req = kmem_cache_zalloc(req_cachep, GFP_KERNEL);
- VERBOSE("allocating request: %p\n", req);
- if (!req)
- goto out;
-
- if (bufsize > 0) {
- buf = kmalloc(bufsize, GFP_NOFS);
- if (!buf) {
- kmem_cache_free(req_cachep, req);
- return NULL;
- }
- }
-
- req->rq_buffer = buf;
- req->rq_bufsize = bufsize;
- req->rq_server = server;
- init_waitqueue_head(&req->rq_wait);
- INIT_LIST_HEAD(&req->rq_queue);
- atomic_set(&req->rq_count, 1);
-
-out:
- return req;
-}
-
-struct smb_request *smb_alloc_request(struct smb_sb_info *server, int bufsize)
-{
- struct smb_request *req = NULL;
-
- for (;;) {
- atomic_inc(&server->nr_requests);
- if (atomic_read(&server->nr_requests) <= MAX_REQUEST_HARD) {
- req = smb_do_alloc_request(server, bufsize);
- if (req != NULL)
- break;
- }
-
-#if 0
- /*
- * Try to free up at least one request in order to stay
- * below the hard limit
- */
- if (nfs_try_to_free_pages(server))
- continue;
-
- if (fatal_signal_pending(current))
- return ERR_PTR(-ERESTARTSYS);
- current->policy = SCHED_YIELD;
- schedule();
-#else
- /* FIXME: we want something like nfs does above, but that
- requires changes to all callers and can wait. */
- break;
-#endif
- }
- return req;
-}
-
-static void smb_free_request(struct smb_request *req)
-{
- atomic_dec(&req->rq_server->nr_requests);
- if (req->rq_buffer && !(req->rq_flags & SMB_REQ_STATIC))
- kfree(req->rq_buffer);
- kfree(req->rq_trans2buffer);
- kmem_cache_free(req_cachep, req);
-}
-
-/*
- * What prevents a rget to race with a rput? The count must never drop to zero
- * while it is in use. Only rput if it is ok that it is free'd.
- */
-static void smb_rget(struct smb_request *req)
-{
- atomic_inc(&req->rq_count);
-}
-void smb_rput(struct smb_request *req)
-{
- if (atomic_dec_and_test(&req->rq_count)) {
- list_del_init(&req->rq_queue);
- smb_free_request(req);
- }
-}
-
-/* setup to receive the data part of the SMB */
-static int smb_setup_bcc(struct smb_request *req)
-{
- int result = 0;
- req->rq_rlen = smb_len(req->rq_header) + 4 - req->rq_bytes_recvd;
-
- if (req->rq_rlen > req->rq_bufsize) {
- PARANOIA("Packet too large %d > %d\n",
- req->rq_rlen, req->rq_bufsize);
- return -ENOBUFS;
- }
-
- req->rq_iov[0].iov_base = req->rq_buffer;
- req->rq_iov[0].iov_len = req->rq_rlen;
- req->rq_iovlen = 1;
-
- return result;
-}
-
-/*
- * Prepare a "normal" request structure.
- */
-static int smb_setup_request(struct smb_request *req)
-{
- int len = smb_len(req->rq_header) + 4;
- req->rq_slen = len;
-
- /* if we expect a data part in the reply we set the iov's to read it */
- if (req->rq_resp_bcc)
- req->rq_setup_read = smb_setup_bcc;
-
- /* This tries to support re-using the same request */
- req->rq_bytes_sent = 0;
- req->rq_rcls = 0;
- req->rq_err = 0;
- req->rq_errno = 0;
- req->rq_fragment = 0;
- kfree(req->rq_trans2buffer);
- req->rq_trans2buffer = NULL;
-
- return 0;
-}
-
-/*
- * Prepare a transaction2 request structure
- */
-static int smb_setup_trans2request(struct smb_request *req)
-{
- struct smb_sb_info *server = req->rq_server;
- int mparam, mdata;
- static unsigned char padding[4];
-
- /* I know the following is very ugly, but I want to build the
- smb packet as efficiently as possible. */
-
- const int smb_parameters = 15;
- const int header = SMB_HEADER_LEN + 2 * smb_parameters + 2;
- const int oparam = ALIGN(header + 3, sizeof(u32));
- const int odata = ALIGN(oparam + req->rq_lparm, sizeof(u32));
- const int bcc = (req->rq_data ? odata + req->rq_ldata :
- oparam + req->rq_lparm) - header;
-
- if ((bcc + oparam) > server->opt.max_xmit)
- return -ENOMEM;
- smb_setup_header(req, SMBtrans2, smb_parameters, bcc);
-
- /*
- * max parameters + max data + max setup == bufsize to make NT4 happy
- * and not abort the transfer or split into multiple responses. It also
- * makes smbfs happy as handling packets larger than the buffer size
- * is extra work.
- *
- * OS/2 is probably going to hate me for this ...
- */
- mparam = SMB_TRANS2_MAX_PARAM;
- mdata = req->rq_bufsize - mparam;
-
- mdata = server->opt.max_xmit - mparam - 100;
- if (mdata < 1024) {
- mdata = 1024;
- mparam = 20;
- }
-
-#if 0
- /* NT/win2k has ~4k max_xmit, so with this we request more than it wants
- to return as one SMB. Useful for testing the fragmented trans2
- handling. */
- mdata = 8192;
-#endif
-
- WSET(req->rq_header, smb_tpscnt, req->rq_lparm);
- WSET(req->rq_header, smb_tdscnt, req->rq_ldata);
- WSET(req->rq_header, smb_mprcnt, mparam);
- WSET(req->rq_header, smb_mdrcnt, mdata);
- WSET(req->rq_header, smb_msrcnt, 0); /* max setup always 0 ? */
- WSET(req->rq_header, smb_flags, 0);
- DSET(req->rq_header, smb_timeout, 0);
- WSET(req->rq_header, smb_pscnt, req->rq_lparm);
- WSET(req->rq_header, smb_psoff, oparam - 4);
- WSET(req->rq_header, smb_dscnt, req->rq_ldata);
- WSET(req->rq_header, smb_dsoff, req->rq_data ? odata - 4 : 0);
- *(req->rq_header + smb_suwcnt) = 0x01; /* setup count */
- *(req->rq_header + smb_suwcnt + 1) = 0x00; /* reserved */
- WSET(req->rq_header, smb_setup0, req->rq_trans2_command);
-
- req->rq_iovlen = 2;
- req->rq_iov[0].iov_base = (void *) req->rq_header;
- req->rq_iov[0].iov_len = oparam;
- req->rq_iov[1].iov_base = (req->rq_parm==NULL) ? padding : req->rq_parm;
- req->rq_iov[1].iov_len = req->rq_lparm;
- req->rq_slen = oparam + req->rq_lparm;
-
- if (req->rq_data) {
- req->rq_iovlen += 2;
- req->rq_iov[2].iov_base = padding;
- req->rq_iov[2].iov_len = odata - oparam - req->rq_lparm;
- req->rq_iov[3].iov_base = req->rq_data;
- req->rq_iov[3].iov_len = req->rq_ldata;
- req->rq_slen = odata + req->rq_ldata;
- }
-
- /* always a data part for trans2 replies */
- req->rq_setup_read = smb_setup_bcc;
-
- return 0;
-}
-
-/*
- * Add a request and tell smbiod to process it
- */
-int smb_add_request(struct smb_request *req)
-{
- long timeleft;
- struct smb_sb_info *server = req->rq_server;
- int result = 0;
-
- smb_setup_request(req);
- if (req->rq_trans2_command) {
- if (req->rq_buffer == NULL) {
- PARANOIA("trans2 attempted without response buffer!\n");
- return -EIO;
- }
- result = smb_setup_trans2request(req);
- }
- if (result < 0)
- return result;
-
-#ifdef SMB_DEBUG_PACKET_SIZE
- add_xmit_stats(req);
-#endif
-
- /* add 'req' to the queue of requests */
- if (smb_lock_server_interruptible(server))
- return -EINTR;
-
- /*
- * Try to send the request as the process. If that fails we queue the
- * request and let smbiod send it later.
- */
-
- /* FIXME: each server has a number on the maximum number of parallel
- requests. 10, 50 or so. We should not allow more requests to be
- active. */
- if (server->mid > 0xf000)
- server->mid = 0;
- req->rq_mid = server->mid++;
- WSET(req->rq_header, smb_mid, req->rq_mid);
-
- result = 0;
- if (server->state == CONN_VALID) {
- if (list_empty(&server->xmitq))
- result = smb_request_send_req(req);
- if (result < 0) {
- /* Connection lost? */
- server->conn_error = result;
- server->state = CONN_INVALID;
- }
- }
- if (result != 1)
- list_add_tail(&req->rq_queue, &server->xmitq);
- smb_rget(req);
-
- if (server->state != CONN_VALID)
- smbiod_retry(server);
-
- smb_unlock_server(server);
-
- smbiod_wake_up();
-
- timeleft = wait_event_interruptible_timeout(req->rq_wait,
- req->rq_flags & SMB_REQ_RECEIVED, 30*HZ);
- if (!timeleft || signal_pending(current)) {
- /*
- * On timeout or on interrupt we want to try and remove the
- * request from the recvq/xmitq.
- * First check if the request is still part of a queue. (May
- * have been removed by some error condition)
- */
- smb_lock_server(server);
- if (!list_empty(&req->rq_queue)) {
- list_del_init(&req->rq_queue);
- smb_rput(req);
- }
- smb_unlock_server(server);
- }
-
- if (!timeleft) {
- PARANOIA("request [%p, mid=%d] timed out!\n",
- req, req->rq_mid);
- VERBOSE("smb_com: %02x\n", *(req->rq_header + smb_com));
- VERBOSE("smb_rcls: %02x\n", *(req->rq_header + smb_rcls));
- VERBOSE("smb_flg: %02x\n", *(req->rq_header + smb_flg));
- VERBOSE("smb_tid: %04x\n", WVAL(req->rq_header, smb_tid));
- VERBOSE("smb_pid: %04x\n", WVAL(req->rq_header, smb_pid));
- VERBOSE("smb_uid: %04x\n", WVAL(req->rq_header, smb_uid));
- VERBOSE("smb_mid: %04x\n", WVAL(req->rq_header, smb_mid));
- VERBOSE("smb_wct: %02x\n", *(req->rq_header + smb_wct));
-
- req->rq_rcls = ERRSRV;
- req->rq_err = ERRtimeout;
-
- /* Just in case it was "stuck" */
- smbiod_wake_up();
- }
- VERBOSE("woke up, rcls=%d\n", req->rq_rcls);
-
- if (req->rq_rcls != 0)
- req->rq_errno = smb_errno(req);
- if (signal_pending(current))
- req->rq_errno = -ERESTARTSYS;
- return req->rq_errno;
-}
-
-/*
- * Send a request and place it on the recvq if successfully sent.
- * Must be called with the server lock held.
- */
-static int smb_request_send_req(struct smb_request *req)
-{
- struct smb_sb_info *server = req->rq_server;
- int result;
-
- if (req->rq_bytes_sent == 0) {
- WSET(req->rq_header, smb_tid, server->opt.tid);
- WSET(req->rq_header, smb_pid, 1);
- WSET(req->rq_header, smb_uid, server->opt.server_uid);
- }
-
- result = smb_send_request(req);
- if (result < 0 && result != -EAGAIN)
- goto out;
-
- result = 0;
- if (!(req->rq_flags & SMB_REQ_TRANSMITTED))
- goto out;
-
- list_move_tail(&req->rq_queue, &server->recvq);
- result = 1;
-out:
- return result;
-}
-
-/*
- * Sends one request for this server. (smbiod)
- * Must be called with the server lock held.
- * Returns: <0 on error
- * 0 if no request could be completely sent
- * 1 if all data for one request was sent
- */
-int smb_request_send_server(struct smb_sb_info *server)
-{
- struct list_head *head;
- struct smb_request *req;
- int result;
-
- if (server->state != CONN_VALID)
- return 0;
-
- /* dequeue first request, if any */
- req = NULL;
- head = server->xmitq.next;
- if (head != &server->xmitq) {
- req = list_entry(head, struct smb_request, rq_queue);
- }
- if (!req)
- return 0;
-
- result = smb_request_send_req(req);
- if (result < 0) {
- server->conn_error = result;
- list_move(&req->rq_queue, &server->xmitq);
- result = -EIO;
- goto out;
- }
-
-out:
- return result;
-}
-
-/*
- * Try to find a request matching this "mid". Typically the first entry will
- * be the matching one.
- */
-static struct smb_request *find_request(struct smb_sb_info *server, int mid)
-{
- struct list_head *tmp;
- struct smb_request *req = NULL;
-
- list_for_each(tmp, &server->recvq) {
- req = list_entry(tmp, struct smb_request, rq_queue);
- if (req->rq_mid == mid) {
- break;
- }
- req = NULL;
- }
-
- if (!req) {
- VERBOSE("received reply with mid %d but no request!\n",
- WVAL(server->header, smb_mid));
- server->rstate = SMB_RECV_DROP;
- }
-
- return req;
-}
-
-/*
- * Called when we have read the smb header and believe this is a response.
- */
-static int smb_init_request(struct smb_sb_info *server, struct smb_request *req)
-{
- int hdrlen, wct;
-
- memcpy(req->rq_header, server->header, SMB_HEADER_LEN);
-
- wct = *(req->rq_header + smb_wct);
- if (wct > 20) {
- PARANOIA("wct too large, %d > 20\n", wct);
- server->rstate = SMB_RECV_DROP;
- return 0;
- }
-
- req->rq_resp_wct = wct;
- hdrlen = SMB_HEADER_LEN + wct*2 + 2;
- VERBOSE("header length: %d smb_wct: %2d\n", hdrlen, wct);
-
- req->rq_bytes_recvd = SMB_HEADER_LEN;
- req->rq_rlen = hdrlen;
- req->rq_iov[0].iov_base = req->rq_header;
- req->rq_iov[0].iov_len = hdrlen;
- req->rq_iovlen = 1;
- server->rstate = SMB_RECV_PARAM;
-
-#ifdef SMB_DEBUG_PACKET_SIZE
- add_recv_stats(smb_len(server->header));
-#endif
- return 0;
-}
-
-/*
- * Reads the SMB parameters
- */
-static int smb_recv_param(struct smb_sb_info *server, struct smb_request *req)
-{
- int result;
-
- result = smb_receive(server, req);
- if (result < 0)
- return result;
- if (req->rq_bytes_recvd < req->rq_rlen)
- return 0;
-
- VERBOSE("result: %d smb_bcc: %04x\n", result,
- WVAL(req->rq_header, SMB_HEADER_LEN +
- (*(req->rq_header + smb_wct) * 2)));
-
- result = 0;
- req->rq_iov[0].iov_base = NULL;
- req->rq_rlen = 0;
- if (req->rq_callback)
- req->rq_callback(req);
- else if (req->rq_setup_read)
- result = req->rq_setup_read(req);
- if (result < 0) {
- server->rstate = SMB_RECV_DROP;
- return result;
- }
-
- server->rstate = req->rq_rlen > 0 ? SMB_RECV_DATA : SMB_RECV_END;
-
- req->rq_bytes_recvd = 0; // recvd out of the iov
-
- VERBOSE("rlen: %d\n", req->rq_rlen);
- if (req->rq_rlen < 0) {
- PARANOIA("Parameters read beyond end of packet!\n");
- server->rstate = SMB_RECV_END;
- return -EIO;
- }
- return 0;
-}
-
-/*
- * Reads the SMB data
- */
-static int smb_recv_data(struct smb_sb_info *server, struct smb_request *req)
-{
- int result;
-
- result = smb_receive(server, req);
- if (result < 0)
- goto out;
- if (req->rq_bytes_recvd < req->rq_rlen)
- goto out;
- server->rstate = SMB_RECV_END;
-out:
- VERBOSE("result: %d\n", result);
- return result;
-}
-
-/*
- * Receive a transaction2 response
- * Return: 0 if the response has been fully read
- * 1 if there are further "fragments" to read
- * <0 if there is an error
- */
-static int smb_recv_trans2(struct smb_sb_info *server, struct smb_request *req)
-{
- unsigned char *inbuf;
- unsigned int parm_disp, parm_offset, parm_count, parm_tot;
- unsigned int data_disp, data_offset, data_count, data_tot;
- int hdrlen = SMB_HEADER_LEN + req->rq_resp_wct*2 - 2;
-
- VERBOSE("handling trans2\n");
-
- inbuf = req->rq_header;
- data_tot = WVAL(inbuf, smb_tdrcnt);
- parm_tot = WVAL(inbuf, smb_tprcnt);
- parm_disp = WVAL(inbuf, smb_prdisp);
- parm_offset = WVAL(inbuf, smb_proff);
- parm_count = WVAL(inbuf, smb_prcnt);
- data_disp = WVAL(inbuf, smb_drdisp);
- data_offset = WVAL(inbuf, smb_droff);
- data_count = WVAL(inbuf, smb_drcnt);
-
- /* Modify offset for the split header/buffer we use */
- if (data_count || data_offset) {
- if (unlikely(data_offset < hdrlen))
- goto out_bad_data;
- else
- data_offset -= hdrlen;
- }
- if (parm_count || parm_offset) {
- if (unlikely(parm_offset < hdrlen))
- goto out_bad_parm;
- else
- parm_offset -= hdrlen;
- }
-
- if (parm_count == parm_tot && data_count == data_tot) {
- /*
- * This packet has all the trans2 data.
- *
- * We setup the request so that this will be the common
- * case. It may be a server error to not return a
- * response that fits.
- */
- VERBOSE("single trans2 response "
- "dcnt=%u, pcnt=%u, doff=%u, poff=%u\n",
- data_count, parm_count,
- data_offset, parm_offset);
- req->rq_ldata = data_count;
- req->rq_lparm = parm_count;
- req->rq_data = req->rq_buffer + data_offset;
- req->rq_parm = req->rq_buffer + parm_offset;
- if (unlikely(parm_offset + parm_count > req->rq_rlen))
- goto out_bad_parm;
- if (unlikely(data_offset + data_count > req->rq_rlen))
- goto out_bad_data;
- return 0;
- }
-
- VERBOSE("multi trans2 response "
- "frag=%d, dcnt=%u, pcnt=%u, doff=%u, poff=%u\n",
- req->rq_fragment,
- data_count, parm_count,
- data_offset, parm_offset);
-
- if (!req->rq_fragment) {
- int buf_len;
-
- /* We got the first trans2 fragment */
- req->rq_fragment = 1;
- req->rq_total_data = data_tot;
- req->rq_total_parm = parm_tot;
- req->rq_ldata = 0;
- req->rq_lparm = 0;
-
- buf_len = data_tot + parm_tot;
- if (buf_len > SMB_MAX_PACKET_SIZE)
- goto out_too_long;
-
- req->rq_trans2bufsize = buf_len;
- req->rq_trans2buffer = kzalloc(buf_len, GFP_NOFS);
- if (!req->rq_trans2buffer)
- goto out_no_mem;
-
- req->rq_parm = req->rq_trans2buffer;
- req->rq_data = req->rq_trans2buffer + parm_tot;
- } else if (unlikely(req->rq_total_data < data_tot ||
- req->rq_total_parm < parm_tot))
- goto out_data_grew;
-
- if (unlikely(parm_disp + parm_count > req->rq_total_parm ||
- parm_offset + parm_count > req->rq_rlen))
- goto out_bad_parm;
- if (unlikely(data_disp + data_count > req->rq_total_data ||
- data_offset + data_count > req->rq_rlen))
- goto out_bad_data;
-
- inbuf = req->rq_buffer;
- memcpy(req->rq_parm + parm_disp, inbuf + parm_offset, parm_count);
- memcpy(req->rq_data + data_disp, inbuf + data_offset, data_count);
-
- req->rq_ldata += data_count;
- req->rq_lparm += parm_count;
-
- /*
- * Check whether we've received all of the data. Note that
- * we use the packet totals -- total lengths might shrink!
- */
- if (req->rq_ldata >= data_tot && req->rq_lparm >= parm_tot) {
- req->rq_ldata = data_tot;
- req->rq_lparm = parm_tot;
- return 0;
- }
- return 1;
-
-out_too_long:
- printk(KERN_ERR "smb_trans2: data/param too long, data=%u, parm=%u\n",
- data_tot, parm_tot);
- goto out_EIO;
-out_no_mem:
- printk(KERN_ERR "smb_trans2: couldn't allocate data area of %d bytes\n",
- req->rq_trans2bufsize);
- req->rq_errno = -ENOMEM;
- goto out;
-out_data_grew:
- printk(KERN_ERR "smb_trans2: data/params grew!\n");
- goto out_EIO;
-out_bad_parm:
- printk(KERN_ERR "smb_trans2: invalid parms, disp=%u, cnt=%u, tot=%u, ofs=%u\n",
- parm_disp, parm_count, parm_tot, parm_offset);
- goto out_EIO;
-out_bad_data:
- printk(KERN_ERR "smb_trans2: invalid data, disp=%u, cnt=%u, tot=%u, ofs=%u\n",
- data_disp, data_count, data_tot, data_offset);
-out_EIO:
- req->rq_errno = -EIO;
-out:
- return req->rq_errno;
-}
-
-/*
- * State machine for receiving responses. We handle the fact that we can't
- * read the full response in one try by having states telling us how much we
- * have read.
- *
- * Must be called with the server lock held (only called from smbiod).
- *
- * Return: <0 on error
- */
-int smb_request_recv(struct smb_sb_info *server)
-{
- struct smb_request *req = NULL;
- int result = 0;
-
- if (smb_recv_available(server) <= 0)
- return 0;
-
- VERBOSE("state: %d\n", server->rstate);
- switch (server->rstate) {
- case SMB_RECV_DROP:
- result = smb_receive_drop(server);
- if (result < 0)
- break;
- if (server->rstate == SMB_RECV_DROP)
- break;
- server->rstate = SMB_RECV_START;
- /* fallthrough */
- case SMB_RECV_START:
- server->smb_read = 0;
- server->rstate = SMB_RECV_HEADER;
- /* fallthrough */
- case SMB_RECV_HEADER:
- result = smb_receive_header(server);
- if (result < 0)
- break;
- if (server->rstate == SMB_RECV_HEADER)
- break;
- if (! (*(server->header + smb_flg) & SMB_FLAGS_REPLY) ) {
- server->rstate = SMB_RECV_REQUEST;
- break;
- }
- if (server->rstate != SMB_RECV_HCOMPLETE)
- break;
- /* fallthrough */
- case SMB_RECV_HCOMPLETE:
- req = find_request(server, WVAL(server->header, smb_mid));
- if (!req)
- break;
- smb_init_request(server, req);
- req->rq_rcls = *(req->rq_header + smb_rcls);
- req->rq_err = WVAL(req->rq_header, smb_err);
- if (server->rstate != SMB_RECV_PARAM)
- break;
- /* fallthrough */
- case SMB_RECV_PARAM:
- if (!req)
- req = find_request(server,WVAL(server->header,smb_mid));
- if (!req)
- break;
- result = smb_recv_param(server, req);
- if (result < 0)
- break;
- if (server->rstate != SMB_RECV_DATA)
- break;
- /* fallthrough */
- case SMB_RECV_DATA:
- if (!req)
- req = find_request(server,WVAL(server->header,smb_mid));
- if (!req)
- break;
- result = smb_recv_data(server, req);
- if (result < 0)
- break;
- break;
-
- /* We should never be called with any of these states */
- case SMB_RECV_END:
- case SMB_RECV_REQUEST:
- BUG();
- }
-
- if (result < 0) {
- /* We saw an error */
- return result;
- }
-
- if (server->rstate != SMB_RECV_END)
- return 0;
-
- result = 0;
- if (req->rq_trans2_command && req->rq_rcls == SUCCESS)
- result = smb_recv_trans2(server, req);
-
- /*
- * Response completely read. Drop any extra bytes sent by the server.
- * (Yes, servers sometimes add extra bytes to responses)
- */
- VERBOSE("smb_len: %d smb_read: %d\n",
- server->smb_len, server->smb_read);
- if (server->smb_read < server->smb_len)
- smb_receive_drop(server);
-
- server->rstate = SMB_RECV_START;
-
- if (!result) {
- list_del_init(&req->rq_queue);
- req->rq_flags |= SMB_REQ_RECEIVED;
- smb_rput(req);
- wake_up_interruptible(&req->rq_wait);
- }
- return 0;
-}
diff --git a/fs/smbfs/request.h b/fs/smbfs/request.h
deleted file mode 100644
index efb21451e7c9..000000000000
--- a/fs/smbfs/request.h
+++ /dev/null
@@ -1,70 +0,0 @@
-#include <linux/list.h>
-#include <linux/types.h>
-#include <linux/uio.h>
-#include <linux/wait.h>
-
-struct smb_request {
- struct list_head rq_queue; /* recvq or xmitq for the server */
-
- atomic_t rq_count;
-
- wait_queue_head_t rq_wait;
- int rq_flags;
- int rq_mid; /* multiplex ID, set by request.c */
-
- struct smb_sb_info *rq_server;
-
- /* header + word count + parameter words + byte count */
- unsigned char rq_header[SMB_HEADER_LEN + 20*2 + 2];
-
- int rq_bufsize;
- unsigned char *rq_buffer;
-
- /* FIXME: this is not good enough for merging IO requests. */
- unsigned char *rq_page;
- int rq_rsize;
-
- int rq_resp_wct;
- int rq_resp_bcc;
-
- int rq_rlen;
- int rq_bytes_recvd;
-
- int rq_slen;
- int rq_bytes_sent;
-
- int rq_iovlen;
- struct kvec rq_iov[4];
-
- int (*rq_setup_read) (struct smb_request *);
- void (*rq_callback) (struct smb_request *);
-
- /* ------ trans2 stuff ------ */
-
- u16 rq_trans2_command; /* 0 if not a trans2 request */
- unsigned int rq_ldata;
- unsigned char *rq_data;
- unsigned int rq_lparm;
- unsigned char *rq_parm;
-
- int rq_fragment;
- u32 rq_total_data;
- u32 rq_total_parm;
- int rq_trans2bufsize;
- unsigned char *rq_trans2buffer;
-
- /* ------ response ------ */
-
- unsigned short rq_rcls;
- unsigned short rq_err;
- int rq_errno;
-};
-
-#define SMB_REQ_STATIC 0x0001 /* rq_buffer is static */
-#define SMB_REQ_NORETRY 0x0002 /* request is invalid after retry */
-
-#define SMB_REQ_TRANSMITTED 0x4000 /* all data has been sent */
-#define SMB_REQ_RECEIVED 0x8000 /* reply received, smbiod is done */
-
-#define xSMB_REQ_NOREPLY 0x0004 /* we don't want the reply (if any) */
-#define xSMB_REQ_NORECEIVER 0x0008 /* caller doesn't wait for response */
diff --git a/fs/smbfs/smb_debug.h b/fs/smbfs/smb_debug.h
deleted file mode 100644
index fc4b1a5dd755..000000000000
--- a/fs/smbfs/smb_debug.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Defines some debug macros for smbfs.
- */
-
-/* This makes a dentry parent/child name pair. Useful for debugging printk's */
-#define DENTRY_PATH(dentry) \
- (dentry)->d_parent->d_name.name,(dentry)->d_name.name
-
-/*
- * safety checks that should never happen ???
- * these are normally enabled.
- */
-#ifdef SMBFS_PARANOIA
-# define PARANOIA(f, a...) printk(KERN_NOTICE "%s: " f, __func__ , ## a)
-#else
-# define PARANOIA(f, a...) do { ; } while(0)
-#endif
-
-/* lots of debug messages */
-#ifdef SMBFS_DEBUG_VERBOSE
-# define VERBOSE(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
-#else
-# define VERBOSE(f, a...) do { ; } while(0)
-#endif
-
-/*
- * "normal" debug messages, but not with a normal DEBUG define ... way
- * too common name.
- */
-#ifdef SMBFS_DEBUG
-#define DEBUG1(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
-#else
-#define DEBUG1(f, a...) do { ; } while(0)
-#endif
diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c
deleted file mode 100644
index 0e39a924f10a..000000000000
--- a/fs/smbfs/smbiod.c
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- * smbiod.c
- *
- * Copyright (C) 2000, Charles Loep / Corel Corp.
- * Copyright (C) 2001, Urban Widmark
- */
-
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/file.h>
-#include <linux/dcache.h>
-#include <linux/module.h>
-#include <linux/net.h>
-#include <linux/kthread.h>
-#include <net/ip.h>
-
-#include <linux/smb_fs.h>
-#include <linux/smbno.h>
-#include <linux/smb_mount.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#include "smb_debug.h"
-#include "request.h"
-#include "proto.h"
-
-enum smbiod_state {
- SMBIOD_DEAD,
- SMBIOD_STARTING,
- SMBIOD_RUNNING,
-};
-
-static enum smbiod_state smbiod_state = SMBIOD_DEAD;
-static struct task_struct *smbiod_thread;
-static DECLARE_WAIT_QUEUE_HEAD(smbiod_wait);
-static LIST_HEAD(smb_servers);
-static DEFINE_SPINLOCK(servers_lock);
-
-#define SMBIOD_DATA_READY (1<<0)
-static unsigned long smbiod_flags;
-
-static int smbiod(void *);
-static int smbiod_start(void);
-
-/*
- * called when there's work for us to do
- */
-void smbiod_wake_up(void)
-{
- if (smbiod_state == SMBIOD_DEAD)
- return;
- set_bit(SMBIOD_DATA_READY, &smbiod_flags);
- wake_up_interruptible(&smbiod_wait);
-}
-
-/*
- * start smbiod if none is running
- */
-static int smbiod_start(void)
-{
- struct task_struct *tsk;
- int err = 0;
-
- if (smbiod_state != SMBIOD_DEAD)
- return 0;
- smbiod_state = SMBIOD_STARTING;
- __module_get(THIS_MODULE);
- spin_unlock(&servers_lock);
- tsk = kthread_run(smbiod, NULL, "smbiod");
- if (IS_ERR(tsk)) {
- err = PTR_ERR(tsk);
- module_put(THIS_MODULE);
- }
-
- spin_lock(&servers_lock);
- if (err < 0) {
- smbiod_state = SMBIOD_DEAD;
- smbiod_thread = NULL;
- } else {
- smbiod_state = SMBIOD_RUNNING;
- smbiod_thread = tsk;
- }
- return err;
-}
-
-/*
- * register a server & start smbiod if necessary
- */
-int smbiod_register_server(struct smb_sb_info *server)
-{
- int ret;
- spin_lock(&servers_lock);
- list_add(&server->entry, &smb_servers);
- VERBOSE("%p\n", server);
- ret = smbiod_start();
- spin_unlock(&servers_lock);
- return ret;
-}
-
-/*
- * Unregister a server
- * Must be called with the server lock held.
- */
-void smbiod_unregister_server(struct smb_sb_info *server)
-{
- spin_lock(&servers_lock);
- list_del_init(&server->entry);
- VERBOSE("%p\n", server);
- spin_unlock(&servers_lock);
-
- smbiod_wake_up();
- smbiod_flush(server);
-}
-
-void smbiod_flush(struct smb_sb_info *server)
-{
- struct list_head *tmp, *n;
- struct smb_request *req;
-
- list_for_each_safe(tmp, n, &server->xmitq) {
- req = list_entry(tmp, struct smb_request, rq_queue);
- req->rq_errno = -EIO;
- list_del_init(&req->rq_queue);
- smb_rput(req);
- wake_up_interruptible(&req->rq_wait);
- }
- list_for_each_safe(tmp, n, &server->recvq) {
- req = list_entry(tmp, struct smb_request, rq_queue);
- req->rq_errno = -EIO;
- list_del_init(&req->rq_queue);
- smb_rput(req);
- wake_up_interruptible(&req->rq_wait);
- }
-}
-
-/*
- * Wake up smbmount and make it reconnect to the server.
- * This must be called with the server locked.
- *
- * FIXME: add smbconnect version to this
- */
-int smbiod_retry(struct smb_sb_info *server)
-{
- struct list_head *head;
- struct smb_request *req;
- struct pid *pid = get_pid(server->conn_pid);
- int result = 0;
-
- VERBOSE("state: %d\n", server->state);
- if (server->state == CONN_VALID || server->state == CONN_RETRYING)
- goto out;
-
- smb_invalidate_inodes(server);
-
- /*
- * Some requests are meaningless after a retry, so we abort them.
- * One example are all requests using 'fileid' since the files are
- * closed on retry.
- */
- head = server->xmitq.next;
- while (head != &server->xmitq) {
- req = list_entry(head, struct smb_request, rq_queue);
- head = head->next;
-
- req->rq_bytes_sent = 0;
- if (req->rq_flags & SMB_REQ_NORETRY) {
- VERBOSE("aborting request %p on xmitq\n", req);
- req->rq_errno = -EIO;
- list_del_init(&req->rq_queue);
- smb_rput(req);
- wake_up_interruptible(&req->rq_wait);
- }
- }
-
- /*
- * FIXME: test the code for retrying request we already sent
- */
- head = server->recvq.next;
- while (head != &server->recvq) {
- req = list_entry(head, struct smb_request, rq_queue);
- head = head->next;
-#if 0
- if (req->rq_flags & SMB_REQ_RETRY) {
- /* must move the request to the xmitq */
- VERBOSE("retrying request %p on recvq\n", req);
- list_move(&req->rq_queue, &server->xmitq);
- continue;
- }
-#endif
-
- VERBOSE("aborting request %p on recvq\n", req);
- /* req->rq_rcls = ???; */ /* FIXME: set smb error code too? */
- req->rq_errno = -EIO;
- list_del_init(&req->rq_queue);
- smb_rput(req);
- wake_up_interruptible(&req->rq_wait);
- }
-
- smb_close_socket(server);
-
- if (!pid) {
- /* FIXME: this is fatal, umount? */
- printk(KERN_ERR "smb_retry: no connection process\n");
- server->state = CONN_RETRIED;
- goto out;
- }
-
- /*
- * Change state so that only one retry per server will be started.
- */
- server->state = CONN_RETRYING;
-
- /*
- * Note: use the "priv" flag, as a user process may need to reconnect.
- */
- result = kill_pid(pid, SIGUSR1, 1);
- if (result) {
- /* FIXME: this is most likely fatal, umount? */
- printk(KERN_ERR "smb_retry: signal failed [%d]\n", result);
- goto out;
- }
- VERBOSE("signalled pid %d\n", pid_nr(pid));
-
- /* FIXME: The retried requests should perhaps get a "time boost". */
-
-out:
- put_pid(pid);
- return result;
-}
-
-/*
- * Currently handles lockingX packets.
- */
-static void smbiod_handle_request(struct smb_sb_info *server)
-{
- PARANOIA("smbiod got a request ... and we don't implement oplocks!\n");
- server->rstate = SMB_RECV_DROP;
-}
-
-/*
- * Do some IO for one server.
- */
-static void smbiod_doio(struct smb_sb_info *server)
-{
- int result;
- int maxwork = 7;
-
- if (server->state != CONN_VALID)
- goto out;
-
- do {
- result = smb_request_recv(server);
- if (result < 0) {
- server->state = CONN_INVALID;
- smbiod_retry(server);
- goto out; /* reconnecting is slow */
- } else if (server->rstate == SMB_RECV_REQUEST)
- smbiod_handle_request(server);
- } while (result > 0 && maxwork-- > 0);
-
- /*
- * If there is more to read then we want to be sure to wake up again.
- */
- if (server->state != CONN_VALID)
- goto out;
- if (smb_recv_available(server) > 0)
- set_bit(SMBIOD_DATA_READY, &smbiod_flags);
-
- do {
- result = smb_request_send_server(server);
- if (result < 0) {
- server->state = CONN_INVALID;
- smbiod_retry(server);
- goto out; /* reconnecting is slow */
- }
- } while (result > 0);
-
- /*
- * If the last request was not sent out we want to wake up again.
- */
- if (!list_empty(&server->xmitq))
- set_bit(SMBIOD_DATA_READY, &smbiod_flags);
-
-out:
- return;
-}
-
-/*
- * smbiod kernel thread
- */
-static int smbiod(void *unused)
-{
- VERBOSE("SMB Kernel thread starting (%d) ...\n", current->pid);
-
- for (;;) {
- struct smb_sb_info *server;
- struct list_head *pos, *n;
-
- /* FIXME: Use poll? */
- wait_event_interruptible(smbiod_wait,
- test_bit(SMBIOD_DATA_READY, &smbiod_flags));
- if (signal_pending(current)) {
- spin_lock(&servers_lock);
- smbiod_state = SMBIOD_DEAD;
- spin_unlock(&servers_lock);
- break;
- }
-
- clear_bit(SMBIOD_DATA_READY, &smbiod_flags);
-
- spin_lock(&servers_lock);
- if (list_empty(&smb_servers)) {
- smbiod_state = SMBIOD_DEAD;
- spin_unlock(&servers_lock);
- break;
- }
-
- list_for_each_safe(pos, n, &smb_servers) {
- server = list_entry(pos, struct smb_sb_info, entry);
- VERBOSE("checking server %p\n", server);
-
- if (server->state == CONN_VALID) {
- spin_unlock(&servers_lock);
-
- smb_lock_server(server);
- smbiod_doio(server);
- smb_unlock_server(server);
-
- spin_lock(&servers_lock);
- }
- }
- spin_unlock(&servers_lock);
- }
-
- VERBOSE("SMB Kernel thread exiting (%d) ...\n", current->pid);
- module_put_and_exit(0);
-}
diff --git a/fs/smbfs/sock.c b/fs/smbfs/sock.c
deleted file mode 100644
index e37fe4deebd0..000000000000
--- a/fs/smbfs/sock.c
+++ /dev/null
@@ -1,386 +0,0 @@
-/*
- * sock.c
- *
- * Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
- * Copyright (C) 1997 by Volker Lendecke
- *
- * Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/fs.h>
-#include <linux/time.h>
-#include <linux/errno.h>
-#include <linux/socket.h>
-#include <linux/fcntl.h>
-#include <linux/file.h>
-#include <linux/in.h>
-#include <linux/net.h>
-#include <linux/mm.h>
-#include <linux/netdevice.h>
-#include <linux/workqueue.h>
-#include <net/scm.h>
-#include <net/tcp_states.h>
-#include <net/ip.h>
-
-#include <linux/smb_fs.h>
-#include <linux/smb.h>
-#include <linux/smbno.h>
-
-#include <asm/uaccess.h>
-#include <asm/ioctls.h>
-
-#include "smb_debug.h"
-#include "proto.h"
-#include "request.h"
-
-
-static int
-_recvfrom(struct socket *socket, unsigned char *ubuf, int size, unsigned flags)
-{
- struct kvec iov = {ubuf, size};
- struct msghdr msg = {.msg_flags = flags};
- msg.msg_flags |= MSG_DONTWAIT | MSG_NOSIGNAL;
- return kernel_recvmsg(socket, &msg, &iov, 1, size, msg.msg_flags);
-}
-
-/*
- * Return the server this socket belongs to
- */
-static struct smb_sb_info *
-server_from_socket(struct socket *socket)
-{
- return socket->sk->sk_user_data;
-}
-
-/*
- * Called when there is data on the socket.
- */
-void
-smb_data_ready(struct sock *sk, int len)
-{
- struct smb_sb_info *server = server_from_socket(sk->sk_socket);
- void (*data_ready)(struct sock *, int) = server->data_ready;
-
- data_ready(sk, len);
- VERBOSE("(%p, %d)\n", sk, len);
- smbiod_wake_up();
-}
-
-int
-smb_valid_socket(struct inode * inode)
-{
- return (inode && S_ISSOCK(inode->i_mode) &&
- SOCKET_I(inode)->type == SOCK_STREAM);
-}
-
-static struct socket *
-server_sock(struct smb_sb_info *server)
-{
- struct file *file;
-
- if (server && (file = server->sock_file))
- {
-#ifdef SMBFS_PARANOIA
- if (!smb_valid_socket(file->f_path.dentry->d_inode))
- PARANOIA("bad socket!\n");
-#endif
- return SOCKET_I(file->f_path.dentry->d_inode);
- }
- return NULL;
-}
-
-void
-smb_close_socket(struct smb_sb_info *server)
-{
- struct file * file = server->sock_file;
-
- if (file) {
- struct socket *sock = server_sock(server);
-
- VERBOSE("closing socket %p\n", sock);
- sock->sk->sk_data_ready = server->data_ready;
- server->sock_file = NULL;
- fput(file);
- }
-}
-
-static int
-smb_get_length(struct socket *socket, unsigned char *header)
-{
- int result;
-
- result = _recvfrom(socket, header, 4, MSG_PEEK);
- if (result == -EAGAIN)
- return -ENODATA;
- if (result < 0) {
- PARANOIA("recv error = %d\n", -result);
- return result;
- }
- if (result < 4)
- return -ENODATA;
-
- switch (header[0]) {
- case 0x00:
- case 0x82:
- break;
-
- case 0x85:
- DEBUG1("Got SESSION KEEP ALIVE\n");
- _recvfrom(socket, header, 4, 0); /* read away */
- return -ENODATA;
-
- default:
- PARANOIA("Invalid NBT packet, code=%x\n", header[0]);
- return -EIO;
- }
-
- /* The length in the RFC NB header is the raw data length */
- return smb_len(header);
-}
-
-int
-smb_recv_available(struct smb_sb_info *server)
-{
- mm_segment_t oldfs;
- int avail, err;
- struct socket *sock = server_sock(server);
-
- oldfs = get_fs();
- set_fs(get_ds());
- err = sock->ops->ioctl(sock, SIOCINQ, (unsigned long) &avail);
- set_fs(oldfs);
- return (err >= 0) ? avail : err;
-}
-
-/*
- * Adjust the kvec to move on 'n' bytes (from nfs/sunrpc)
- */
-static int
-smb_move_iov(struct kvec **data, size_t *num, struct kvec *vec, unsigned amount)
-{
- struct kvec *iv = *data;
- int i;
- int len;
-
- /*
- * Eat any sent kvecs
- */
- while (iv->iov_len <= amount) {
- amount -= iv->iov_len;
- iv++;
- (*num)--;
- }
-
- /*
- * And chew down the partial one
- */
- vec[0].iov_len = iv->iov_len-amount;
- vec[0].iov_base =((unsigned char *)iv->iov_base)+amount;
- iv++;
-
- len = vec[0].iov_len;
-
- /*
- * And copy any others
- */
- for (i = 1; i < *num; i++) {
- vec[i] = *iv++;
- len += vec[i].iov_len;
- }
-
- *data = vec;
- return len;
-}
-
-/*
- * smb_receive_header
- * Only called by the smbiod thread.
- */
-int
-smb_receive_header(struct smb_sb_info *server)
-{
- struct socket *sock;
- int result = 0;
- unsigned char peek_buf[4];
-
- result = -EIO;
- sock = server_sock(server);
- if (!sock)
- goto out;
- if (sock->sk->sk_state != TCP_ESTABLISHED)
- goto out;
-
- if (!server->smb_read) {
- result = smb_get_length(sock, peek_buf);
- if (result < 0) {
- if (result == -ENODATA)
- result = 0;
- goto out;
- }
- server->smb_len = result + 4;
-
- if (server->smb_len < SMB_HEADER_LEN) {
- PARANOIA("short packet: %d\n", result);
- server->rstate = SMB_RECV_DROP;
- result = -EIO;
- goto out;
- }
- if (server->smb_len > SMB_MAX_PACKET_SIZE) {
- PARANOIA("long packet: %d\n", result);
- server->rstate = SMB_RECV_DROP;
- result = -EIO;
- goto out;
- }
- }
-
- result = _recvfrom(sock, server->header + server->smb_read,
- SMB_HEADER_LEN - server->smb_read, 0);
- VERBOSE("_recvfrom: %d\n", result);
- if (result < 0) {
- VERBOSE("receive error: %d\n", result);
- goto out;
- }
- server->smb_read += result;
-
- if (server->smb_read == SMB_HEADER_LEN)
- server->rstate = SMB_RECV_HCOMPLETE;
-out:
- return result;
-}
-
-static char drop_buffer[PAGE_SIZE];
-
-/*
- * smb_receive_drop - read and throw away the data
- * Only called by the smbiod thread.
- *
- * FIXME: we are in the kernel, could we just tell the socket that we want
- * to drop stuff from the buffer?
- */
-int
-smb_receive_drop(struct smb_sb_info *server)
-{
- struct socket *sock;
- unsigned int flags;
- struct kvec iov;
- struct msghdr msg;
- int rlen = smb_len(server->header) - server->smb_read + 4;
- int result = -EIO;
-
- if (rlen > PAGE_SIZE)
- rlen = PAGE_SIZE;
-
- sock = server_sock(server);
- if (!sock)
- goto out;
- if (sock->sk->sk_state != TCP_ESTABLISHED)
- goto out;
-
- flags = MSG_DONTWAIT | MSG_NOSIGNAL;
- iov.iov_base = drop_buffer;
- iov.iov_len = PAGE_SIZE;
- msg.msg_flags = flags;
- msg.msg_name = NULL;
- msg.msg_namelen = 0;
- msg.msg_control = NULL;
-
- result = kernel_recvmsg(sock, &msg, &iov, 1, rlen, flags);
-
- VERBOSE("read: %d\n", result);
- if (result < 0) {
- VERBOSE("receive error: %d\n", result);
- goto out;
- }
- server->smb_read += result;
-
- if (server->smb_read >= server->smb_len)
- server->rstate = SMB_RECV_END;
-
-out:
- return result;
-}
-
-/*
- * smb_receive
- * Only called by the smbiod thread.
- */
-int
-smb_receive(struct smb_sb_info *server, struct smb_request *req)
-{
- struct socket *sock;
- unsigned int flags;
- struct kvec iov[4];
- struct kvec *p = req->rq_iov;
- size_t num = req->rq_iovlen;
- struct msghdr msg;
- int rlen;
- int result = -EIO;
-
- sock = server_sock(server);
- if (!sock)
- goto out;
- if (sock->sk->sk_state != TCP_ESTABLISHED)
- goto out;
-
- flags = MSG_DONTWAIT | MSG_NOSIGNAL;
- msg.msg_flags = flags;
- msg.msg_name = NULL;
- msg.msg_namelen = 0;
- msg.msg_control = NULL;
-
- /* Dont repeat bytes and count available bufferspace */
- rlen = min_t(int, smb_move_iov(&p, &num, iov, req->rq_bytes_recvd),
- (req->rq_rlen - req->rq_bytes_recvd));
-
- result = kernel_recvmsg(sock, &msg, p, num, rlen, flags);
-
- VERBOSE("read: %d\n", result);
- if (result < 0) {
- VERBOSE("receive error: %d\n", result);
- goto out;
- }
- req->rq_bytes_recvd += result;
- server->smb_read += result;
-
-out:
- return result;
-}
-
-/*
- * Try to send a SMB request. This may return after sending only parts of the
- * request. SMB_REQ_TRANSMITTED will be set if a request was fully sent.
- *
- * Parts of this was taken from xprt_sendmsg from net/sunrpc/xprt.c
- */
-int
-smb_send_request(struct smb_request *req)
-{
- struct smb_sb_info *server = req->rq_server;
- struct socket *sock;
- struct msghdr msg = {.msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT};
- int slen = req->rq_slen - req->rq_bytes_sent;
- int result = -EIO;
- struct kvec iov[4];
- struct kvec *p = req->rq_iov;
- size_t num = req->rq_iovlen;
-
- sock = server_sock(server);
- if (!sock)
- goto out;
- if (sock->sk->sk_state != TCP_ESTABLISHED)
- goto out;
-
- /* Dont repeat bytes */
- if (req->rq_bytes_sent)
- smb_move_iov(&p, &num, iov, req->rq_bytes_sent);
-
- result = kernel_sendmsg(sock, &msg, p, num, slen);
-
- if (result >= 0) {
- req->rq_bytes_sent += result;
- if (req->rq_bytes_sent >= req->rq_slen)
- req->rq_flags |= SMB_REQ_TRANSMITTED;
- }
-out:
- return result;
-}
diff --git a/fs/smbfs/symlink.c b/fs/smbfs/symlink.c
deleted file mode 100644
index 00b2909bd469..000000000000
--- a/fs/smbfs/symlink.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * symlink.c
- *
- * Copyright (C) 2002 by John Newbigin
- *
- * Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/fcntl.h>
-#include <linux/stat.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/pagemap.h>
-#include <linux/net.h>
-#include <linux/namei.h>
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
-#include <linux/smbno.h>
-#include <linux/smb_fs.h>
-
-#include "smb_debug.h"
-#include "proto.h"
-
-int smb_symlink(struct inode *inode, struct dentry *dentry, const char *oldname)
-{
- DEBUG1("create symlink %s -> %s/%s\n", oldname, DENTRY_PATH(dentry));
-
- return smb_proc_symlink(server_from_dentry(dentry), dentry, oldname);
-}
-
-static void *smb_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- char *link = __getname();
- DEBUG1("followlink of %s/%s\n", DENTRY_PATH(dentry));
-
- if (!link) {
- link = ERR_PTR(-ENOMEM);
- } else {
- int len = smb_proc_read_link(server_from_dentry(dentry),
- dentry, link, PATH_MAX - 1);
- if (len < 0) {
- __putname(link);
- link = ERR_PTR(len);
- } else {
- link[len] = 0;
- }
- }
- nd_set_link(nd, link);
- return NULL;
-}
-
-static void smb_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
-{
- char *s = nd_get_link(nd);
- if (!IS_ERR(s))
- __putname(s);
-}
-
-const struct inode_operations smb_link_inode_operations =
-{
- .readlink = generic_readlink,
- .follow_link = smb_follow_link,
- .put_link = smb_put_link,
-};
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 07a4f1156048..24de30ba34c1 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -370,12 +370,10 @@ static void squashfs_put_super(struct super_block *sb)
}
-static int squashfs_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data,
- struct vfsmount *mnt)
+static struct dentry *squashfs_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, squashfs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, squashfs_fill_super);
}
@@ -451,7 +449,7 @@ static void squashfs_destroy_inode(struct inode *inode)
static struct file_system_type squashfs_fs_type = {
.owner = THIS_MODULE,
.name = "squashfs",
- .get_sb = squashfs_get_sb,
+ .mount = squashfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV
};
diff --git a/fs/squashfs/xattr.c b/fs/squashfs/xattr.c
index 652b8541f9c6..3876c36699a1 100644
--- a/fs/squashfs/xattr.c
+++ b/fs/squashfs/xattr.c
@@ -158,17 +158,18 @@ static int squashfs_xattr_get(struct inode *inode, int name_index,
strncmp(target, name, name_size) == 0) {
/* found xattr */
if (type & SQUASHFS_XATTR_VALUE_OOL) {
- __le64 xattr;
+ __le64 xattr_val;
+ u64 xattr;
/* val is a reference to the real location */
err = squashfs_read_metadata(sb, &val, &start,
&offset, sizeof(val));
if (err < 0)
goto failed;
- err = squashfs_read_metadata(sb, &xattr, &start,
- &offset, sizeof(xattr));
+ err = squashfs_read_metadata(sb, &xattr_val,
+ &start, &offset, sizeof(xattr_val));
if (err < 0)
goto failed;
- xattr = le64_to_cpu(xattr);
+ xattr = le64_to_cpu(xattr_val);
start = SQUASHFS_XATTR_BLK(xattr) +
msblk->xattr_table;
offset = SQUASHFS_XATTR_OFFSET(xattr);
diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h
index 49fe0d719fbf..b634efce4bde 100644
--- a/fs/squashfs/xattr.h
+++ b/fs/squashfs/xattr.h
@@ -25,7 +25,7 @@
extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64,
u64 *, int *);
extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *,
- int *, unsigned long long *);
+ unsigned int *, unsigned long long *);
#else
static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
u64 start, u64 *xattr_table_start, int *xattr_ids)
@@ -35,7 +35,7 @@ static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
}
static inline int squashfs_xattr_lookup(struct super_block *sb,
- unsigned int index, int *count, int *size,
+ unsigned int index, int *count, unsigned int *size,
unsigned long long *xattr)
{
return 0;
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c
index cfb41106098f..d33be5dd6c32 100644
--- a/fs/squashfs/xattr_id.c
+++ b/fs/squashfs/xattr_id.c
@@ -34,6 +34,7 @@
#include "squashfs_fs_sb.h"
#include "squashfs_fs_i.h"
#include "squashfs.h"
+#include "xattr.h"
/*
* Map xattr id using the xattr id look up table
diff --git a/fs/super.c b/fs/super.c
index b9c9869165db..ca696155cd9a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -715,15 +715,14 @@ static int ns_set_super(struct super_block *sb, void *data)
return set_anon_super(sb, NULL);
}
-int get_sb_ns(struct file_system_type *fs_type, int flags, void *data,
- int (*fill_super)(struct super_block *, void *, int),
- struct vfsmount *mnt)
+struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
+ void *data, int (*fill_super)(struct super_block *, void *, int))
{
struct super_block *sb;
sb = sget(fs_type, ns_test_super, ns_set_super, data);
if (IS_ERR(sb))
- return PTR_ERR(sb);
+ return ERR_CAST(sb);
if (!sb->s_root) {
int err;
@@ -731,17 +730,16 @@ int get_sb_ns(struct file_system_type *fs_type, int flags, void *data,
err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
if (err) {
deactivate_locked_super(sb);
- return err;
+ return ERR_PTR(err);
}
sb->s_flags |= MS_ACTIVE;
}
- simple_set_mnt(mnt, sb);
- return 0;
+ return dget(sb->s_root);
}
-EXPORT_SYMBOL(get_sb_ns);
+EXPORT_SYMBOL(mount_ns);
#ifdef CONFIG_BLOCK
static int set_bdev_super(struct super_block *s, void *data)
@@ -762,10 +760,9 @@ static int test_bdev_super(struct super_block *s, void *data)
return (void *)s->s_bdev == data;
}
-int get_sb_bdev(struct file_system_type *fs_type,
+struct dentry *mount_bdev(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data,
- int (*fill_super)(struct super_block *, void *, int),
- struct vfsmount *mnt)
+ int (*fill_super)(struct super_block *, void *, int))
{
struct block_device *bdev;
struct super_block *s;
@@ -777,7 +774,7 @@ int get_sb_bdev(struct file_system_type *fs_type,
bdev = open_bdev_exclusive(dev_name, mode, fs_type);
if (IS_ERR(bdev))
- return PTR_ERR(bdev);
+ return ERR_CAST(bdev);
/*
* once the super is inserted into the list by sget, s_umount
@@ -829,15 +826,30 @@ int get_sb_bdev(struct file_system_type *fs_type,
bdev->bd_super = s;
}
- simple_set_mnt(mnt, s);
- return 0;
+ return dget(s->s_root);
error_s:
error = PTR_ERR(s);
error_bdev:
close_bdev_exclusive(bdev, mode);
error:
- return error;
+ return ERR_PTR(error);
+}
+EXPORT_SYMBOL(mount_bdev);
+
+int get_sb_bdev(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data,
+ int (*fill_super)(struct super_block *, void *, int),
+ struct vfsmount *mnt)
+{
+ struct dentry *root;
+
+ root = mount_bdev(fs_type, flags, dev_name, data, fill_super);
+ if (IS_ERR(root))
+ return PTR_ERR(root);
+ mnt->mnt_root = root;
+ mnt->mnt_sb = root->d_sb;
+ return 0;
}
EXPORT_SYMBOL(get_sb_bdev);
@@ -856,29 +868,42 @@ void kill_block_super(struct super_block *sb)
EXPORT_SYMBOL(kill_block_super);
#endif
-int get_sb_nodev(struct file_system_type *fs_type,
+struct dentry *mount_nodev(struct file_system_type *fs_type,
int flags, void *data,
- int (*fill_super)(struct super_block *, void *, int),
- struct vfsmount *mnt)
+ int (*fill_super)(struct super_block *, void *, int))
{
int error;
struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
if (IS_ERR(s))
- return PTR_ERR(s);
+ return ERR_CAST(s);
s->s_flags = flags;
error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
if (error) {
deactivate_locked_super(s);
- return error;
+ return ERR_PTR(error);
}
s->s_flags |= MS_ACTIVE;
- simple_set_mnt(mnt, s);
- return 0;
+ return dget(s->s_root);
}
+EXPORT_SYMBOL(mount_nodev);
+int get_sb_nodev(struct file_system_type *fs_type,
+ int flags, void *data,
+ int (*fill_super)(struct super_block *, void *, int),
+ struct vfsmount *mnt)
+{
+ struct dentry *root;
+
+ root = mount_nodev(fs_type, flags, data, fill_super);
+ if (IS_ERR(root))
+ return PTR_ERR(root);
+ mnt->mnt_root = root;
+ mnt->mnt_sb = root->d_sb;
+ return 0;
+}
EXPORT_SYMBOL(get_sb_nodev);
static int compare_single(struct super_block *s, void *p)
@@ -886,29 +911,42 @@ static int compare_single(struct super_block *s, void *p)
return 1;
}
-int get_sb_single(struct file_system_type *fs_type,
+struct dentry *mount_single(struct file_system_type *fs_type,
int flags, void *data,
- int (*fill_super)(struct super_block *, void *, int),
- struct vfsmount *mnt)
+ int (*fill_super)(struct super_block *, void *, int))
{
struct super_block *s;
int error;
s = sget(fs_type, compare_single, set_anon_super, NULL);
if (IS_ERR(s))
- return PTR_ERR(s);
+ return ERR_CAST(s);
if (!s->s_root) {
s->s_flags = flags;
error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
if (error) {
deactivate_locked_super(s);
- return error;
+ return ERR_PTR(error);
}
s->s_flags |= MS_ACTIVE;
} else {
do_remount_sb(s, flags, data, 0);
}
- simple_set_mnt(mnt, s);
+ return dget(s->s_root);
+}
+EXPORT_SYMBOL(mount_single);
+
+int get_sb_single(struct file_system_type *fs_type,
+ int flags, void *data,
+ int (*fill_super)(struct super_block *, void *, int),
+ struct vfsmount *mnt)
+{
+ struct dentry *root;
+ root = mount_single(fs_type, flags, data, fill_super);
+ if (IS_ERR(root))
+ return PTR_ERR(root);
+ mnt->mnt_root = root;
+ mnt->mnt_sb = root->d_sb;
return 0;
}
@@ -918,6 +956,7 @@ struct vfsmount *
vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
{
struct vfsmount *mnt;
+ struct dentry *root;
char *secdata = NULL;
int error;
@@ -942,9 +981,19 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
goto out_free_secdata;
}
- error = type->get_sb(type, flags, name, data, mnt);
- if (error < 0)
- goto out_free_secdata;
+ if (type->mount) {
+ root = type->mount(type, flags, name, data);
+ if (IS_ERR(root)) {
+ error = PTR_ERR(root);
+ goto out_free_secdata;
+ }
+ mnt->mnt_root = root;
+ mnt->mnt_sb = root->d_sb;
+ } else {
+ error = type->get_sb(type, flags, name, data, mnt);
+ if (error < 0)
+ goto out_free_secdata;
+ }
BUG_ON(!mnt->mnt_sb);
WARN_ON(!mnt->mnt_sb->s_bdi);
mnt->mnt_sb->s_flags |= MS_BORN;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index f2af22574c50..266895783b47 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -23,7 +23,7 @@
#include "sysfs.h"
-static struct vfsmount *sysfs_mount;
+static struct vfsmount *sysfs_mnt;
struct kmem_cache *sysfs_dir_cachep;
static const struct super_operations sysfs_ops = {
@@ -95,18 +95,17 @@ static int sysfs_set_super(struct super_block *sb, void *data)
return error;
}
-static int sysfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *sysfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
struct sysfs_super_info *info;
enum kobj_ns_type type;
struct super_block *sb;
int error;
- error = -ENOMEM;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
- goto out;
+ return ERR_PTR(-ENOMEM);
for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
info->ns[type] = kobj_ns_current(type);
@@ -114,24 +113,19 @@ static int sysfs_get_sb(struct file_system_type *fs_type,
sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info);
if (IS_ERR(sb) || sb->s_fs_info != info)
kfree(info);
- if (IS_ERR(sb)) {
- error = PTR_ERR(sb);
- goto out;
- }
+ if (IS_ERR(sb))
+ return ERR_CAST(sb);
if (!sb->s_root) {
sb->s_flags = flags;
error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
if (error) {
deactivate_locked_super(sb);
- goto out;
+ return ERR_PTR(error);
}
sb->s_flags |= MS_ACTIVE;
}
- simple_set_mnt(mnt, sb);
- error = 0;
-out:
- return error;
+ return dget(sb->s_root);
}
static void sysfs_kill_sb(struct super_block *sb)
@@ -147,7 +141,7 @@ static void sysfs_kill_sb(struct super_block *sb)
static struct file_system_type sysfs_fs_type = {
.name = "sysfs",
- .get_sb = sysfs_get_sb,
+ .mount = sysfs_mount,
.kill_sb = sysfs_kill_sb,
};
@@ -189,11 +183,11 @@ int __init sysfs_init(void)
err = register_filesystem(&sysfs_fs_type);
if (!err) {
- sysfs_mount = kern_mount(&sysfs_fs_type);
- if (IS_ERR(sysfs_mount)) {
+ sysfs_mnt = kern_mount(&sysfs_fs_type);
+ if (IS_ERR(sysfs_mnt)) {
printk(KERN_ERR "sysfs: could not mount!\n");
- err = PTR_ERR(sysfs_mount);
- sysfs_mount = NULL;
+ err = PTR_ERR(sysfs_mnt);
+ sysfs_mnt = NULL;
unregister_filesystem(&sysfs_fs_type);
goto out_err;
}
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index a0b0cda6927e..3d9c62be0c10 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -526,23 +526,22 @@ failed:
/* Every kernel module contains stuff like this. */
-static int sysv_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *sysv_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, sysv_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, sysv_fill_super);
}
-static int v7_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *v7_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, v7_fill_super, mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, v7_fill_super);
}
static struct file_system_type sysv_fs_type = {
.owner = THIS_MODULE,
.name = "sysv",
- .get_sb = sysv_get_sb,
+ .mount = sysv_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
@@ -550,7 +549,7 @@ static struct file_system_type sysv_fs_type = {
static struct file_system_type v7_fs_type = {
.owner = THIS_MODULE,
.name = "v7",
- .get_sb = v7_get_sb,
+ .mount = v7_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 9a47c9f0ad07..91fac54c70e3 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2038,8 +2038,8 @@ static int sb_test(struct super_block *sb, void *data)
return c->vi.cdev == *dev;
}
-static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
- const char *name, void *data, struct vfsmount *mnt)
+static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
+ const char *name, void *data)
{
struct ubi_volume_desc *ubi;
struct ubi_volume_info vi;
@@ -2057,7 +2057,7 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
if (IS_ERR(ubi)) {
dbg_err("cannot open \"%s\", error %d",
name, (int)PTR_ERR(ubi));
- return PTR_ERR(ubi);
+ return ERR_CAST(ubi);
}
ubi_get_volume_info(ubi, &vi);
@@ -2095,20 +2095,19 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
/* 'fill_super()' opens ubi again so we must close it here */
ubi_close_volume(ubi);
- simple_set_mnt(mnt, sb);
- return 0;
+ return dget(sb->s_root);
out_deact:
deactivate_locked_super(sb);
out_close:
ubi_close_volume(ubi);
- return err;
+ return ERR_PTR(err);
}
static struct file_system_type ubifs_fs_type = {
.name = "ubifs",
.owner = THIS_MODULE,
- .get_sb = ubifs_get_sb,
+ .mount = ubifs_mount,
.kill_sb = kill_anon_super,
};
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 76f3d6d97b40..4a5c7c61836a 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -107,17 +107,16 @@ struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi)
}
/* UDF filesystem type */
-static int udf_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data,
- struct vfsmount *mnt)
+static struct dentry *udf_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, udf_fill_super, mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, udf_fill_super);
}
static struct file_system_type udf_fstype = {
.owner = THIS_MODULE,
.name = "udf",
- .get_sb = udf_get_sb,
+ .mount = udf_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 6b9be90dae7d..2c47daed56da 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1454,16 +1454,16 @@ static const struct super_operations ufs_super_ops = {
.show_options = ufs_show_options,
};
-static int ufs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ufs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, ufs_fill_super, mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, ufs_fill_super);
}
static struct file_system_type ufs_fs_type = {
.owner = THIS_MODULE,
.name = "ufs",
- .get_sb = ufs_get_sb,
+ .mount = ufs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 480f28127f09..6100ec0fa1d4 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -22,6 +22,7 @@ config XFS_FS
config XFS_QUOTA
bool "XFS Quota support"
depends on XFS_FS
+ select QUOTACTL
help
If you say Y here, you will be able to set limits for disk usage on
a per user and/or a per group basis under XFS. XFS considers quota
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c9af48fffcd7..7d287afccde5 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1111,11 +1111,12 @@ xfs_vm_writepage(
uptodate = 0;
/*
- * A hole may still be marked uptodate because discard_buffer
- * leaves the flag set.
+ * set_page_dirty dirties all buffers in a page, independent
+ * of their state. The dirty state however is entirely
+ * meaningless for holes (!mapped && uptodate), so skip
+ * buffers covering holes here.
*/
if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
- ASSERT(!buffer_dirty(bh));
imap_valid = 0;
continue;
}
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 63fd2c07cb57..aa1d353def29 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1781,7 +1781,6 @@ xfs_buf_delwri_split(
INIT_LIST_HEAD(list);
spin_lock(dwlk);
list_for_each_entry_safe(bp, n, dwq, b_list) {
- trace_xfs_buf_delwri_split(bp, _RET_IP_);
ASSERT(bp->b_flags & XBF_DELWRI);
if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) {
@@ -1795,6 +1794,7 @@ xfs_buf_delwri_split(
_XBF_RUN_QUEUES);
bp->b_flags |= XBF_WRITE;
list_move_tail(&bp->b_list, list);
+ trace_xfs_buf_delwri_split(bp, _RET_IP_);
} else
skipped++;
}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 2ea238f6d38e..ad442d9e392e 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -416,7 +416,7 @@ xfs_attrlist_by_handle(
if (IS_ERR(dentry))
return PTR_ERR(dentry);
- kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL);
+ kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL);
if (!kbuf)
goto out_dput;
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 96107efc0c61..94d5fd6a2973 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -762,7 +762,8 @@ xfs_setup_inode(
inode->i_state = I_NEW;
inode_sb_list_add(inode);
- insert_inode_hash(inode);
+ /* make the inode look hashed for the writeback code */
+ hlist_add_fake(&inode->i_hash);
inode->i_mode = ip->i_d.di_mode;
inode->i_nlink = ip->i_d.di_nlink;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index cf808782c065..064f964d4f3c 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -353,9 +353,6 @@ xfs_parseargs(
mp->m_qflags &= ~XFS_OQUOTA_ENFD;
} else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
mp->m_flags |= XFS_MOUNT_DELAYLOG;
- cmn_err(CE_WARN,
- "Enabling EXPERIMENTAL delayed logging feature "
- "- use at your own risk.\n");
} else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
} else if (!strcmp(this_char, "ihashsize")) {
@@ -1609,16 +1606,14 @@ xfs_fs_fill_super(
goto out_free_sb;
}
-STATIC int
-xfs_fs_get_sb(
+STATIC struct dentry *
+xfs_fs_mount(
struct file_system_type *fs_type,
int flags,
const char *dev_name,
- void *data,
- struct vfsmount *mnt)
+ void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
}
static const struct super_operations xfs_super_operations = {
@@ -1639,7 +1634,7 @@ static const struct super_operations xfs_super_operations = {
static struct file_system_type xfs_fs_type = {
.owner = THIS_MODULE,
.name = "xfs",
- .get_sb = xfs_fs_get_sb,
+ .mount = xfs_fs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 37d33254981d..afb0d7cfad1c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -853,6 +853,7 @@ restart:
if (trylock) {
if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
skipped++;
+ xfs_perag_put(pag);
continue;
}
first_index = pag->pag_ici_reclaim_cursor;
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 9b715dce5699..9124425b7f2f 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -744,9 +744,15 @@ xfs_filestream_new_ag(
* If the file's parent directory is known, take its iolock in exclusive
* mode to prevent two sibling files from racing each other to migrate
* themselves and their parent to different AGs.
+ *
+ * Note that we lock the parent directory iolock inside the child
+ * iolock here. That's fine as we never hold both parent and child
+ * iolock in any other place. This is different from the ilock,
+ * which requires locking of the child after the parent for namespace
+ * operations.
*/
if (pip)
- xfs_ilock(pip, XFS_IOLOCK_EXCL);
+ xfs_ilock(pip, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
/*
* A new AG needs to be found for the file. If the file's parent
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b1498ab5a399..19e9dfa1c254 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -275,6 +275,7 @@ xfs_free_perag(
pag = radix_tree_delete(&mp->m_perag_tree, agno);
spin_unlock(&mp->m_perag_lock);
ASSERT(pag);
+ ASSERT(atomic_read(&pag->pag_ref) == 0);
call_rcu(&pag->rcu_head, __xfs_free_perag);
}
}
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index e0e64b113bd6..9bb6eda4cd21 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -346,8 +346,17 @@ xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
#define xfs_trans_mod_dquot_byino(tp, ip, fields, delta)
#define xfs_trans_apply_dquot_deltas(tp)
#define xfs_trans_unreserve_and_mod_dquots(tp)
-#define xfs_trans_reserve_quota_nblks(tp, ip, nblks, ninos, flags) (0)
-#define xfs_trans_reserve_quota_bydquots(tp, mp, u, g, nb, ni, fl) (0)
+static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp,
+ struct xfs_inode *ip, long nblks, long ninos, uint flags)
+{
+ return 0;
+}
+static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp,
+ struct xfs_mount *mp, struct xfs_dquot *udqp,
+ struct xfs_dquot *gdqp, long nblks, long nions, uint flags)
+{
+ return 0;
+}
#define xfs_qm_vop_create_dqattach(tp, ip, u, g)
#define xfs_qm_vop_rename_dqattach(it) (0)
#define xfs_qm_vop_chown(tp, ip, old, new) (NULL)
@@ -357,11 +366,14 @@ xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
#define xfs_qm_dqdetach(ip)
#define xfs_qm_dqrele(d)
#define xfs_qm_statvfs(ip, s)
-#define xfs_qm_sync(mp, fl) (0)
+static inline int xfs_qm_sync(struct xfs_mount *mp, int flags)
+{
+ return 0;
+}
#define xfs_qm_newmount(mp, a, b) (0)
#define xfs_qm_mount_quotas(mp)
#define xfs_qm_unmount(mp)
-#define xfs_qm_unmount_quotas(mp) (0)
+#define xfs_qm_unmount_quotas(mp)
#endif /* CONFIG_XFS_QUOTA */
#define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \